From: Wang Yinfeng wangyinfeng@phytium.com.cn
hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I41AUQ CVE: NA
-------------------------------------------------
Phytium S2500 adjusts the GIC's implementation to support multi-socket system designs. This patch adds the driver of this new implementation while keeping the kernel binary compatible with other ARM servers in the ecosystem.
Signed-off-by: Wang Yinfeng wangyinfeng@phytium.com.cn Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com Reviewed-by: Xiongfeng Wang wangxiongfeng2@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com --- arch/arm64/Kconfig.platforms | 6 + drivers/irqchip/Kconfig | 9 + drivers/irqchip/Makefile | 1 + drivers/irqchip/irq-gic-phytium-2500-its.c | 5524 ++++++++++++++++++ drivers/irqchip/irq-gic-phytium-2500.c | 2503 ++++++++ include/acpi/actbl2.h | 3 +- include/linux/irqchip/arm-gic-phytium-2500.h | 725 +++ 7 files changed, 8770 insertions(+), 1 deletion(-) create mode 100644 drivers/irqchip/irq-gic-phytium-2500-its.c create mode 100644 drivers/irqchip/irq-gic-phytium-2500.c create mode 100644 include/linux/irqchip/arm-gic-phytium-2500.h
diff --git a/arch/arm64/Kconfig.platforms b/arch/arm64/Kconfig.platforms index 5c4ac1c9f4e0..dd6b7466fe28 100644 --- a/arch/arm64/Kconfig.platforms +++ b/arch/arm64/Kconfig.platforms @@ -199,6 +199,12 @@ config ARCH_MXC This enables support for the ARMv8 based SoCs in the NXP i.MX family.
+config ARCH_PHYTIUM + bool "Phytium SoC Family" + help + This enables support for Phytium ARMv8 SoC family. + select ARM_GIC_PHYTIUM_2500 + config ARCH_QCOM bool "Qualcomm Platforms" select GPIOLIB diff --git a/drivers/irqchip/Kconfig b/drivers/irqchip/Kconfig index 6156a065681b..8bee8d6d7a42 100644 --- a/drivers/irqchip/Kconfig +++ b/drivers/irqchip/Kconfig @@ -56,6 +56,15 @@ config ARM_GIC_V3_ITS_FSL_MC depends on FSL_MC_BUS default ARM_GIC_V3_ITS
+config ARM_GIC_PHYTIUM_2500 + bool + select IRQ_DOMAIN + select GENERIC_IRQ_MULTI_HANDLER + select IRQ_DOMAIN_HIERARCHY + select PARTITION_PERCPU + select GENERIC_IRQ_EFFECTIVE_AFF_MASK + select GENERIC_MSI_IRQ_DOMAIN + config ARM_NVIC bool select IRQ_DOMAIN_HIERARCHY diff --git a/drivers/irqchip/Makefile b/drivers/irqchip/Makefile index 94c2885882ee..ed171c68cc6d 100644 --- a/drivers/irqchip/Makefile +++ b/drivers/irqchip/Makefile @@ -34,6 +34,7 @@ obj-$(CONFIG_ARM_GIC_V3) += irq-gic-v3.o irq-gic-v3-mbi.o irq-gic-common.o obj-$(CONFIG_ARM_GIC_V3_ITS) += irq-gic-v3-its.o irq-gic-v3-its-platform-msi.o irq-gic-v4.o obj-$(CONFIG_ARM_GIC_V3_ITS_PCI) += irq-gic-v3-its-pci-msi.o obj-$(CONFIG_ARM_GIC_V3_ITS_FSL_MC) += irq-gic-v3-its-fsl-mc-msi.o +obj-$(CONFIG_ARM_GIC_PHYTIUM_2500) += irq-gic-phytium-2500.o irq-gic-phytium-2500-its.o obj-$(CONFIG_PARTITION_PERCPU) += irq-partition-percpu.o obj-$(CONFIG_HISILICON_IRQ_MBIGEN) += irq-mbigen.o obj-$(CONFIG_ARM_NVIC) += irq-nvic.o diff --git a/drivers/irqchip/irq-gic-phytium-2500-its.c b/drivers/irqchip/irq-gic-phytium-2500-its.c new file mode 100644 index 000000000000..227fbc00c3da --- /dev/null +++ b/drivers/irqchip/irq-gic-phytium-2500-its.c @@ -0,0 +1,5524 @@ +/* + * Copyright (C) 2020 Phytium Corporation. + * Author: Wang Yinfeng wangyinfeng@phytium.com.cn + * Chen Baozi chenbaozi@phytium.com.cn + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see http://www.gnu.org/licenses/. + */ + +#include <linux/acpi.h> +#include <linux/acpi_iort.h> +#include <linux/bitfield.h> +#include <linux/bitmap.h> +#include <linux/cpu.h> +#include <linux/crash_dump.h> +#include <linux/delay.h> +#include <linux/dma-iommu.h> +#include <linux/efi.h> +#include <linux/interrupt.h> +#include <linux/iopoll.h> +#include <linux/irqdomain.h> +#include <linux/list.h> +#include <linux/log2.h> +#include <linux/memblock.h> +#include <linux/mm.h> +#include <linux/msi.h> +#include <linux/of.h> +#include <linux/of_address.h> +#include <linux/of_irq.h> +#include <linux/of_pci.h> +#include <linux/of_platform.h> +#include <linux/percpu.h> +#include <linux/slab.h> +#include <linux/syscore_ops.h> + +#include <linux/irqchip.h> +#include <linux/irqchip/arm-gic-phytium-2500.h> +#include <linux/irqchip/arm-gic-v4.h> + +#include <asm/cputype.h> +#include <asm/exception.h> + +#include "irq-gic-common.h" + +#define ITS_FLAGS_CMDQ_NEEDS_FLUSHING (1ULL << 0) +#define ITS_FLAGS_WORKAROUND_CAVIUM_22375 (1ULL << 1) +#define ITS_FLAGS_WORKAROUND_CAVIUM_23144 (1ULL << 2) + +#define RDIST_FLAGS_PROPBASE_NEEDS_FLUSHING (1 << 0) +#define RDIST_FLAGS_RD_TABLES_PREALLOCATED (1 << 1) + +static u32 lpi_id_bits; + +/* + * We allocate memory for PROPBASE to cover 2 ^ lpi_id_bits LPIs to + * deal with (one configuration byte per interrupt). PENDBASE has to + * be 64kB aligned (one bit per LPI, plus 8192 bits for SPI/PPI/SGI). + */ +#define LPI_NRBITS lpi_id_bits +#define LPI_PROPBASE_SZ ALIGN(BIT(LPI_NRBITS), SZ_64K) +#define LPI_PENDBASE_SZ ALIGN(BIT(LPI_NRBITS) / 8, SZ_64K) + +#define LPI_PROP_DEFAULT_PRIO GICD_INT_DEF_PRI + +/* + * Collection structure - just an ID, and a redistributor address to + * ping. We use one per CPU as a bag of interrupts assigned to this + * CPU. + */ +struct its_collection { + u64 target_address; + u16 col_id; +}; + +/* + * The ITS_BASER structure - contains memory information, cached + * value of BASER register configuration and ITS page size. + */ +struct its_baser { + void *base; + u64 val; + u32 order; + u32 psz; +}; + +struct its_device; + +/* + * The ITS structure - contains most of the infrastructure, with the + * top-level MSI domain, the command queue, the collections, and the + * list of devices writing to it. + * + * dev_alloc_lock has to be taken for device allocations, while the + * spinlock must be taken to parse data structures such as the device + * list. + */ +struct its_node { + raw_spinlock_t lock; + struct mutex dev_alloc_lock; + struct list_head entry; + void __iomem *base; + void __iomem *sgir_base; + phys_addr_t phys_base; + struct its_cmd_block *cmd_base; + struct its_cmd_block *cmd_write; + struct its_baser tables[GITS_BASER_NR_REGS]; + struct its_collection *collections; + struct fwnode_handle *fwnode_handle; + u64 (*get_msi_base)(struct its_device *its_dev); + u64 typer; + u64 cbaser_save; + u32 ctlr_save; + u32 mpidr; + struct list_head its_device_list; + u64 flags; + unsigned long list_nr; + int numa_node; + unsigned int msi_domain_flags; + u32 pre_its_base; /* for Socionext Synquacer */ + int vlpi_redist_offset; +}; + +#define is_v4(its) (!!((its)->typer & GITS_TYPER_VLPIS)) +#define is_v4_1(its) (!!((its)->typer & GITS_TYPER_VMAPP)) +#define device_ids(its) (FIELD_GET(GITS_TYPER_DEVBITS, (its)->typer) + 1) + +#define ITS_ITT_ALIGN SZ_256 + +/* The maximum number of VPEID bits supported by VLPI commands */ +#define ITS_MAX_VPEID_BITS \ + ({ \ + int nvpeid = 16; \ + if (gic_rdists->has_rvpeid && \ + gic_rdists->gicd_typer2 & GICD_TYPER2_VIL) \ + nvpeid = 1 + (gic_rdists->gicd_typer2 & \ + GICD_TYPER2_VID); \ + \ + nvpeid; \ + }) +#define ITS_MAX_VPEID (1 << (ITS_MAX_VPEID_BITS)) + +/* Convert page order to size in bytes */ +#define PAGE_ORDER_TO_SIZE(o) (PAGE_SIZE << (o)) + +struct event_lpi_map { + unsigned long *lpi_map; + u16 *col_map; + irq_hw_number_t lpi_base; + int nr_lpis; + raw_spinlock_t vlpi_lock; + struct its_vm *vm; + struct its_vlpi_map *vlpi_maps; + int nr_vlpis; +}; + +/* + * The ITS view of a device - belongs to an ITS, owns an interrupt + * translation table, and a list of interrupts. If it some of its + * LPIs are injected into a guest (GICv4), the event_map.vm field + * indicates which one. + */ +struct its_device { + struct list_head entry; + struct its_node *its; + struct event_lpi_map event_map; + void *itt; + u32 nr_ites; + u32 device_id; + bool shared; +}; + +static struct { + raw_spinlock_t lock; + struct its_device *dev; + struct its_vpe **vpes; + int next_victim; +} vpe_proxy; + +struct cpu_lpi_count { + atomic_t managed; + atomic_t unmanaged; +}; + +static DEFINE_PER_CPU(struct cpu_lpi_count, cpu_lpi_count_ft2500); + +static LIST_HEAD(its_nodes); +static DEFINE_RAW_SPINLOCK(its_lock); +static struct rdists *gic_rdists; +static struct irq_domain *its_parent; + +static unsigned long its_list_map; +static u16 vmovp_seq_num; +static DEFINE_RAW_SPINLOCK(vmovp_lock); + +static DEFINE_IDA(its_vpeid_ida); + +#define gic_data_rdist() (raw_cpu_ptr(gic_rdists->rdist)) +#define gic_data_rdist_cpu(cpu) (per_cpu_ptr(gic_rdists->rdist, cpu)) +#define gic_data_rdist_rd_base() (gic_data_rdist()->rd_base) +#define gic_data_rdist_vlpi_base() (gic_data_rdist_rd_base() + SZ_128K) + +/* + * Skip ITSs that have no vLPIs mapped, unless we're on GICv4.1, as we + * always have vSGIs mapped. + */ +static bool require_its_list_vmovp(struct its_vm *vm, struct its_node *its) +{ + return (gic_rdists->has_rvpeid || vm->vlpi_count[its->list_nr]); +} + +static u16 get_its_list(struct its_vm *vm) +{ + struct its_node *its; + unsigned long its_list = 0; + + list_for_each_entry(its, &its_nodes, entry) { + if (!is_v4(its)) + continue; + + if (require_its_list_vmovp(vm, its)) + __set_bit(its->list_nr, &its_list); + } + + return (u16)its_list; +} + +static inline u32 its_get_event_id(struct irq_data *d) +{ + struct its_device *its_dev = irq_data_get_irq_chip_data(d); + return d->hwirq - its_dev->event_map.lpi_base; +} + +static struct its_collection *dev_event_to_col(struct its_device *its_dev, + u32 event) +{ + struct its_node *its = its_dev->its; + + return its->collections + its_dev->event_map.col_map[event]; +} + +static struct its_vlpi_map *dev_event_to_vlpi_map(struct its_device *its_dev, + u32 event) +{ + if (WARN_ON_ONCE(event >= its_dev->event_map.nr_lpis)) + return NULL; + + return &its_dev->event_map.vlpi_maps[event]; +} + +static struct its_vlpi_map *get_vlpi_map(struct irq_data *d) +{ + if (irqd_is_forwarded_to_vcpu(d)) { + struct its_device *its_dev = irq_data_get_irq_chip_data(d); + u32 event = its_get_event_id(d); + + return dev_event_to_vlpi_map(its_dev, event); + } + + return NULL; +} + +static int vpe_to_cpuid_lock(struct its_vpe *vpe, unsigned long *flags) +{ + raw_spin_lock_irqsave(&vpe->vpe_lock, *flags); + return vpe->col_idx; +} + +static void vpe_to_cpuid_unlock(struct its_vpe *vpe, unsigned long flags) +{ + raw_spin_unlock_irqrestore(&vpe->vpe_lock, flags); +} + +static int irq_to_cpuid_lock(struct irq_data *d, unsigned long *flags) +{ + struct its_vlpi_map *map = get_vlpi_map(d); + int cpu; + + if (map) { + cpu = vpe_to_cpuid_lock(map->vpe, flags); + } else { + /* Physical LPIs are already locked via the irq_desc lock */ + struct its_device *its_dev = irq_data_get_irq_chip_data(d); + cpu = its_dev->event_map.col_map[its_get_event_id(d)]; + /* Keep GCC quiet... */ + *flags = 0; + } + + return cpu; +} + +static void irq_to_cpuid_unlock(struct irq_data *d, unsigned long flags) +{ + struct its_vlpi_map *map = get_vlpi_map(d); + + if (map) + vpe_to_cpuid_unlock(map->vpe, flags); +} + +static struct its_collection *valid_col(struct its_collection *col) +{ + if (WARN_ON_ONCE(col->target_address & GENMASK_ULL(15, 0))) + return NULL; + + return col; +} + +static struct its_vpe *valid_vpe(struct its_node *its, struct its_vpe *vpe) +{ + if (valid_col(its->collections + vpe->col_idx)) + return vpe; + + return NULL; +} + +/* + * ITS command descriptors - parameters to be encoded in a command + * block. + */ +struct its_cmd_desc { + union { + struct { + struct its_device *dev; + u32 event_id; + } its_inv_cmd; + + struct { + struct its_device *dev; + u32 event_id; + } its_clear_cmd; + + struct { + struct its_device *dev; + u32 event_id; + } its_int_cmd; + + struct { + struct its_device *dev; + int valid; + } its_mapd_cmd; + + struct { + struct its_collection *col; + int valid; + } its_mapc_cmd; + + struct { + struct its_device *dev; + u32 phys_id; + u32 event_id; + } its_mapti_cmd; + + struct { + struct its_device *dev; + struct its_collection *col; + u32 event_id; + } its_movi_cmd; + + struct { + struct its_device *dev; + u32 event_id; + } its_discard_cmd; + + struct { + struct its_collection *col; + } its_invall_cmd; + + struct { + struct its_vpe *vpe; + } its_vinvall_cmd; + + struct { + struct its_vpe *vpe; + struct its_collection *col; + bool valid; + } its_vmapp_cmd; + + struct { + struct its_vpe *vpe; + struct its_device *dev; + u32 virt_id; + u32 event_id; + bool db_enabled; + } its_vmapti_cmd; + + struct { + struct its_vpe *vpe; + struct its_device *dev; + u32 event_id; + bool db_enabled; + } its_vmovi_cmd; + + struct { + struct its_vpe *vpe; + struct its_collection *col; + u16 seq_num; + u16 its_list; + } its_vmovp_cmd; + + struct { + struct its_vpe *vpe; + } its_invdb_cmd; + + struct { + struct its_vpe *vpe; + u8 sgi; + u8 priority; + bool enable; + bool group; + bool clear; + } its_vsgi_cmd; + }; +}; + +/* + * The ITS command block, which is what the ITS actually parses. + */ +struct its_cmd_block { + union { + u64 raw_cmd[4]; + __le64 raw_cmd_le[4]; + }; +}; + +#define ITS_CMD_QUEUE_SZ SZ_64K +#define ITS_CMD_QUEUE_NR_ENTRIES (ITS_CMD_QUEUE_SZ / sizeof(struct its_cmd_block)) + +typedef struct its_collection *(*its_cmd_builder_t)(struct its_node *, + struct its_cmd_block *, + struct its_cmd_desc *); + +typedef struct its_vpe *(*its_cmd_vbuilder_t)(struct its_node *, + struct its_cmd_block *, + struct its_cmd_desc *); + +static void its_mask_encode(u64 *raw_cmd, u64 val, int h, int l) +{ + u64 mask = GENMASK_ULL(h, l); + *raw_cmd &= ~mask; + *raw_cmd |= (val << l) & mask; +} + +static void its_encode_cmd(struct its_cmd_block *cmd, u8 cmd_nr) +{ + its_mask_encode(&cmd->raw_cmd[0], cmd_nr, 7, 0); +} + +static void its_encode_devid(struct its_cmd_block *cmd, u32 devid) +{ + its_mask_encode(&cmd->raw_cmd[0], devid, 63, 32); +} + +static void its_encode_event_id(struct its_cmd_block *cmd, u32 id) +{ + its_mask_encode(&cmd->raw_cmd[1], id, 31, 0); +} + +static void its_encode_phys_id(struct its_cmd_block *cmd, u32 phys_id) +{ + its_mask_encode(&cmd->raw_cmd[1], phys_id, 63, 32); +} + +static void its_encode_size(struct its_cmd_block *cmd, u8 size) +{ + its_mask_encode(&cmd->raw_cmd[1], size, 4, 0); +} + +static void its_encode_itt(struct its_cmd_block *cmd, u64 itt_addr) +{ + its_mask_encode(&cmd->raw_cmd[2], itt_addr >> 8, 51, 8); +} + +static void its_encode_valid(struct its_cmd_block *cmd, int valid) +{ + its_mask_encode(&cmd->raw_cmd[2], !!valid, 63, 63); +} + +static void its_encode_target(struct its_cmd_block *cmd, u64 target_addr) +{ + its_mask_encode(&cmd->raw_cmd[2], target_addr >> 16, 51, 16); +} + +static void its_encode_collection(struct its_cmd_block *cmd, u16 col) +{ + its_mask_encode(&cmd->raw_cmd[2], col, 15, 0); +} + +static void its_encode_vpeid(struct its_cmd_block *cmd, u16 vpeid) +{ + its_mask_encode(&cmd->raw_cmd[1], vpeid, 47, 32); +} + +static void its_encode_virt_id(struct its_cmd_block *cmd, u32 virt_id) +{ + its_mask_encode(&cmd->raw_cmd[2], virt_id, 31, 0); +} + +static void its_encode_db_phys_id(struct its_cmd_block *cmd, u32 db_phys_id) +{ + its_mask_encode(&cmd->raw_cmd[2], db_phys_id, 63, 32); +} + +static void its_encode_db_valid(struct its_cmd_block *cmd, bool db_valid) +{ + its_mask_encode(&cmd->raw_cmd[2], db_valid, 0, 0); +} + +static void its_encode_seq_num(struct its_cmd_block *cmd, u16 seq_num) +{ + its_mask_encode(&cmd->raw_cmd[0], seq_num, 47, 32); +} + +static void its_encode_its_list(struct its_cmd_block *cmd, u16 its_list) +{ + its_mask_encode(&cmd->raw_cmd[1], its_list, 15, 0); +} + +static void its_encode_vpt_addr(struct its_cmd_block *cmd, u64 vpt_pa) +{ + its_mask_encode(&cmd->raw_cmd[3], vpt_pa >> 16, 51, 16); +} + +static void its_encode_vpt_size(struct its_cmd_block *cmd, u8 vpt_size) +{ + its_mask_encode(&cmd->raw_cmd[3], vpt_size, 4, 0); +} + +static void its_encode_vconf_addr(struct its_cmd_block *cmd, u64 vconf_pa) +{ + its_mask_encode(&cmd->raw_cmd[0], vconf_pa >> 16, 51, 16); +} + +static void its_encode_alloc(struct its_cmd_block *cmd, bool alloc) +{ + its_mask_encode(&cmd->raw_cmd[0], alloc, 8, 8); +} + +static void its_encode_ptz(struct its_cmd_block *cmd, bool ptz) +{ + its_mask_encode(&cmd->raw_cmd[0], ptz, 9, 9); +} + +static void its_encode_vmapp_default_db(struct its_cmd_block *cmd, + u32 vpe_db_lpi) +{ + its_mask_encode(&cmd->raw_cmd[1], vpe_db_lpi, 31, 0); +} + +static void its_encode_vmovp_default_db(struct its_cmd_block *cmd, + u32 vpe_db_lpi) +{ + its_mask_encode(&cmd->raw_cmd[3], vpe_db_lpi, 31, 0); +} + +static void its_encode_db(struct its_cmd_block *cmd, bool db) +{ + its_mask_encode(&cmd->raw_cmd[2], db, 63, 63); +} + +static void its_encode_sgi_intid(struct its_cmd_block *cmd, u8 sgi) +{ + its_mask_encode(&cmd->raw_cmd[0], sgi, 35, 32); +} + +static void its_encode_sgi_priority(struct its_cmd_block *cmd, u8 prio) +{ + its_mask_encode(&cmd->raw_cmd[0], prio >> 4, 23, 20); +} + +static void its_encode_sgi_group(struct its_cmd_block *cmd, bool grp) +{ + its_mask_encode(&cmd->raw_cmd[0], grp, 10, 10); +} + +static void its_encode_sgi_clear(struct its_cmd_block *cmd, bool clr) +{ + its_mask_encode(&cmd->raw_cmd[0], clr, 9, 9); +} + +static void its_encode_sgi_enable(struct its_cmd_block *cmd, bool en) +{ + its_mask_encode(&cmd->raw_cmd[0], en, 8, 8); +} + +static inline void its_fixup_cmd(struct its_cmd_block *cmd) +{ + /* Let's fixup BE commands */ + cmd->raw_cmd_le[0] = cpu_to_le64(cmd->raw_cmd[0]); + cmd->raw_cmd_le[1] = cpu_to_le64(cmd->raw_cmd[1]); + cmd->raw_cmd_le[2] = cpu_to_le64(cmd->raw_cmd[2]); + cmd->raw_cmd_le[3] = cpu_to_le64(cmd->raw_cmd[3]); +} + +static struct its_collection *its_build_mapd_cmd(struct its_node *its, + struct its_cmd_block *cmd, + struct its_cmd_desc *desc) +{ + unsigned long itt_addr; + u8 size = ilog2(desc->its_mapd_cmd.dev->nr_ites); + + itt_addr = virt_to_phys(desc->its_mapd_cmd.dev->itt); + itt_addr = ALIGN(itt_addr, ITS_ITT_ALIGN); + + its_encode_cmd(cmd, GITS_CMD_MAPD); + its_encode_devid(cmd, desc->its_mapd_cmd.dev->device_id); + its_encode_size(cmd, size - 1); + its_encode_itt(cmd, itt_addr); + its_encode_valid(cmd, desc->its_mapd_cmd.valid); + + its_fixup_cmd(cmd); + + return NULL; +} + +static struct its_collection *its_build_mapc_cmd(struct its_node *its, + struct its_cmd_block *cmd, + struct its_cmd_desc *desc) +{ + its_encode_cmd(cmd, GITS_CMD_MAPC); + its_encode_collection(cmd, desc->its_mapc_cmd.col->col_id); + its_encode_target(cmd, desc->its_mapc_cmd.col->target_address); + its_encode_valid(cmd, desc->its_mapc_cmd.valid); + + its_fixup_cmd(cmd); + + return desc->its_mapc_cmd.col; +} + +static struct its_collection *its_build_mapti_cmd(struct its_node *its, + struct its_cmd_block *cmd, + struct its_cmd_desc *desc) +{ + struct its_collection *col; + + col = dev_event_to_col(desc->its_mapti_cmd.dev, + desc->its_mapti_cmd.event_id); + col->col_id = col->col_id % 64; + + its_encode_cmd(cmd, GITS_CMD_MAPTI); + its_encode_devid(cmd, desc->its_mapti_cmd.dev->device_id); + its_encode_event_id(cmd, desc->its_mapti_cmd.event_id); + its_encode_phys_id(cmd, desc->its_mapti_cmd.phys_id); + its_encode_collection(cmd, col->col_id); + + its_fixup_cmd(cmd); + + return valid_col(col); +} + +static struct its_collection *its_build_movi_cmd(struct its_node *its, + struct its_cmd_block *cmd, + struct its_cmd_desc *desc) +{ + struct its_collection *col; + + col = dev_event_to_col(desc->its_movi_cmd.dev, + desc->its_movi_cmd.event_id); + + its_encode_cmd(cmd, GITS_CMD_MOVI); + its_encode_devid(cmd, desc->its_movi_cmd.dev->device_id); + its_encode_event_id(cmd, desc->its_movi_cmd.event_id); + its_encode_collection(cmd, desc->its_movi_cmd.col->col_id); + + its_fixup_cmd(cmd); + + return valid_col(col); +} + +static struct its_collection *its_build_discard_cmd(struct its_node *its, + struct its_cmd_block *cmd, + struct its_cmd_desc *desc) +{ + struct its_collection *col; + + col = dev_event_to_col(desc->its_discard_cmd.dev, + desc->its_discard_cmd.event_id); + + its_encode_cmd(cmd, GITS_CMD_DISCARD); + its_encode_devid(cmd, desc->its_discard_cmd.dev->device_id); + its_encode_event_id(cmd, desc->its_discard_cmd.event_id); + + its_fixup_cmd(cmd); + + return valid_col(col); +} + +static struct its_collection *its_build_inv_cmd(struct its_node *its, + struct its_cmd_block *cmd, + struct its_cmd_desc *desc) +{ + struct its_collection *col; + + col = dev_event_to_col(desc->its_inv_cmd.dev, + desc->its_inv_cmd.event_id); + + its_encode_cmd(cmd, GITS_CMD_INV); + its_encode_devid(cmd, desc->its_inv_cmd.dev->device_id); + its_encode_event_id(cmd, desc->its_inv_cmd.event_id); + + its_fixup_cmd(cmd); + + return valid_col(col); +} + +static struct its_collection *its_build_int_cmd(struct its_node *its, + struct its_cmd_block *cmd, + struct its_cmd_desc *desc) +{ + struct its_collection *col; + + col = dev_event_to_col(desc->its_int_cmd.dev, + desc->its_int_cmd.event_id); + + its_encode_cmd(cmd, GITS_CMD_INT); + its_encode_devid(cmd, desc->its_int_cmd.dev->device_id); + its_encode_event_id(cmd, desc->its_int_cmd.event_id); + + its_fixup_cmd(cmd); + + return valid_col(col); +} + +static struct its_collection *its_build_clear_cmd(struct its_node *its, + struct its_cmd_block *cmd, + struct its_cmd_desc *desc) +{ + struct its_collection *col; + + col = dev_event_to_col(desc->its_clear_cmd.dev, + desc->its_clear_cmd.event_id); + + its_encode_cmd(cmd, GITS_CMD_CLEAR); + its_encode_devid(cmd, desc->its_clear_cmd.dev->device_id); + its_encode_event_id(cmd, desc->its_clear_cmd.event_id); + + its_fixup_cmd(cmd); + + return valid_col(col); +} + +static struct its_collection *its_build_invall_cmd(struct its_node *its, + struct its_cmd_block *cmd, + struct its_cmd_desc *desc) +{ + its_encode_cmd(cmd, GITS_CMD_INVALL); + its_encode_collection(cmd, desc->its_invall_cmd.col->col_id); + + its_fixup_cmd(cmd); + + return NULL; +} + +static struct its_vpe *its_build_vinvall_cmd(struct its_node *its, + struct its_cmd_block *cmd, + struct its_cmd_desc *desc) +{ + its_encode_cmd(cmd, GITS_CMD_VINVALL); + its_encode_vpeid(cmd, desc->its_vinvall_cmd.vpe->vpe_id); + + its_fixup_cmd(cmd); + + return valid_vpe(its, desc->its_vinvall_cmd.vpe); +} + +static struct its_vpe *its_build_vmapp_cmd(struct its_node *its, + struct its_cmd_block *cmd, + struct its_cmd_desc *desc) +{ + unsigned long vpt_addr, vconf_addr; + u64 target; + bool alloc; + + its_encode_cmd(cmd, GITS_CMD_VMAPP); + its_encode_vpeid(cmd, desc->its_vmapp_cmd.vpe->vpe_id); + its_encode_valid(cmd, desc->its_vmapp_cmd.valid); + + if (!desc->its_vmapp_cmd.valid) { + if (is_v4_1(its)) { + alloc = !atomic_dec_return(&desc->its_vmapp_cmd.vpe->vmapp_count); + its_encode_alloc(cmd, alloc); + } + + goto out; + } + + vpt_addr = virt_to_phys(page_address(desc->its_vmapp_cmd.vpe->vpt_page)); + target = desc->its_vmapp_cmd.col->target_address + its->vlpi_redist_offset; + + its_encode_target(cmd, target); + its_encode_vpt_addr(cmd, vpt_addr); + its_encode_vpt_size(cmd, LPI_NRBITS - 1); + + if (!is_v4_1(its)) + goto out; + + vconf_addr = virt_to_phys(page_address(desc->its_vmapp_cmd.vpe->its_vm->vprop_page)); + + alloc = !atomic_fetch_inc(&desc->its_vmapp_cmd.vpe->vmapp_count); + + its_encode_alloc(cmd, alloc); + + /* We can only signal PTZ when alloc==1. Why do we have two bits? */ + its_encode_ptz(cmd, alloc); + its_encode_vconf_addr(cmd, vconf_addr); + its_encode_vmapp_default_db(cmd, desc->its_vmapp_cmd.vpe->vpe_db_lpi); + +out: + its_fixup_cmd(cmd); + + return valid_vpe(its, desc->its_vmapp_cmd.vpe); +} + +static struct its_vpe *its_build_vmapti_cmd(struct its_node *its, + struct its_cmd_block *cmd, + struct its_cmd_desc *desc) +{ + u32 db; + + if (!is_v4_1(its) && desc->its_vmapti_cmd.db_enabled) + db = desc->its_vmapti_cmd.vpe->vpe_db_lpi; + else + db = 1023; + + its_encode_cmd(cmd, GITS_CMD_VMAPTI); + its_encode_devid(cmd, desc->its_vmapti_cmd.dev->device_id); + its_encode_vpeid(cmd, desc->its_vmapti_cmd.vpe->vpe_id); + its_encode_event_id(cmd, desc->its_vmapti_cmd.event_id); + its_encode_db_phys_id(cmd, db); + its_encode_virt_id(cmd, desc->its_vmapti_cmd.virt_id); + + its_fixup_cmd(cmd); + + return valid_vpe(its, desc->its_vmapti_cmd.vpe); +} + +static struct its_vpe *its_build_vmovi_cmd(struct its_node *its, + struct its_cmd_block *cmd, + struct its_cmd_desc *desc) +{ + u32 db; + + if (!is_v4_1(its) && desc->its_vmovi_cmd.db_enabled) + db = desc->its_vmovi_cmd.vpe->vpe_db_lpi; + else + db = 1023; + + its_encode_cmd(cmd, GITS_CMD_VMOVI); + its_encode_devid(cmd, desc->its_vmovi_cmd.dev->device_id); + its_encode_vpeid(cmd, desc->its_vmovi_cmd.vpe->vpe_id); + its_encode_event_id(cmd, desc->its_vmovi_cmd.event_id); + its_encode_db_phys_id(cmd, db); + its_encode_db_valid(cmd, true); + + its_fixup_cmd(cmd); + + return valid_vpe(its, desc->its_vmovi_cmd.vpe); +} + +static struct its_vpe *its_build_vmovp_cmd(struct its_node *its, + struct its_cmd_block *cmd, + struct its_cmd_desc *desc) +{ + u64 target; + + target = desc->its_vmovp_cmd.col->target_address + its->vlpi_redist_offset; + its_encode_cmd(cmd, GITS_CMD_VMOVP); + its_encode_seq_num(cmd, desc->its_vmovp_cmd.seq_num); + its_encode_its_list(cmd, desc->its_vmovp_cmd.its_list); + its_encode_vpeid(cmd, desc->its_vmovp_cmd.vpe->vpe_id); + its_encode_target(cmd, target); + + if (is_v4_1(its)) { + its_encode_db(cmd, true); + its_encode_vmovp_default_db(cmd, desc->its_vmovp_cmd.vpe->vpe_db_lpi); + } + + its_fixup_cmd(cmd); + + return valid_vpe(its, desc->its_vmovp_cmd.vpe); +} + +static struct its_vpe *its_build_vinv_cmd(struct its_node *its, + struct its_cmd_block *cmd, + struct its_cmd_desc *desc) +{ + struct its_vlpi_map *map; + + map = dev_event_to_vlpi_map(desc->its_inv_cmd.dev, + desc->its_inv_cmd.event_id); + + its_encode_cmd(cmd, GITS_CMD_INV); + its_encode_devid(cmd, desc->its_inv_cmd.dev->device_id); + its_encode_event_id(cmd, desc->its_inv_cmd.event_id); + + its_fixup_cmd(cmd); + + return valid_vpe(its, map->vpe); +} + +static struct its_vpe *its_build_vint_cmd(struct its_node *its, + struct its_cmd_block *cmd, + struct its_cmd_desc *desc) +{ + struct its_vlpi_map *map; + + map = dev_event_to_vlpi_map(desc->its_int_cmd.dev, + desc->its_int_cmd.event_id); + + its_encode_cmd(cmd, GITS_CMD_INT); + its_encode_devid(cmd, desc->its_int_cmd.dev->device_id); + its_encode_event_id(cmd, desc->its_int_cmd.event_id); + + its_fixup_cmd(cmd); + + return valid_vpe(its, map->vpe); +} + +static struct its_vpe *its_build_vclear_cmd(struct its_node *its, + struct its_cmd_block *cmd, + struct its_cmd_desc *desc) +{ + struct its_vlpi_map *map; + + map = dev_event_to_vlpi_map(desc->its_clear_cmd.dev, + desc->its_clear_cmd.event_id); + + its_encode_cmd(cmd, GITS_CMD_CLEAR); + its_encode_devid(cmd, desc->its_clear_cmd.dev->device_id); + its_encode_event_id(cmd, desc->its_clear_cmd.event_id); + + its_fixup_cmd(cmd); + + return valid_vpe(its, map->vpe); +} + +static struct its_vpe *its_build_invdb_cmd(struct its_node *its, + struct its_cmd_block *cmd, + struct its_cmd_desc *desc) +{ + if (WARN_ON(!is_v4_1(its))) + return NULL; + + its_encode_cmd(cmd, GITS_CMD_INVDB); + its_encode_vpeid(cmd, desc->its_invdb_cmd.vpe->vpe_id); + + its_fixup_cmd(cmd); + + return valid_vpe(its, desc->its_invdb_cmd.vpe); +} + +static struct its_vpe *its_build_vsgi_cmd(struct its_node *its, + struct its_cmd_block *cmd, + struct its_cmd_desc *desc) +{ + if (WARN_ON(!is_v4_1(its))) + return NULL; + + its_encode_cmd(cmd, GITS_CMD_VSGI); + its_encode_vpeid(cmd, desc->its_vsgi_cmd.vpe->vpe_id); + its_encode_sgi_intid(cmd, desc->its_vsgi_cmd.sgi); + its_encode_sgi_priority(cmd, desc->its_vsgi_cmd.priority); + its_encode_sgi_group(cmd, desc->its_vsgi_cmd.group); + its_encode_sgi_clear(cmd, desc->its_vsgi_cmd.clear); + its_encode_sgi_enable(cmd, desc->its_vsgi_cmd.enable); + + its_fixup_cmd(cmd); + + return valid_vpe(its, desc->its_vsgi_cmd.vpe); +} + +static u64 its_cmd_ptr_to_offset(struct its_node *its, + struct its_cmd_block *ptr) +{ + return (ptr - its->cmd_base) * sizeof(*ptr); +} + +static int its_queue_full(struct its_node *its) +{ + int widx; + int ridx; + + widx = its->cmd_write - its->cmd_base; + ridx = readl_relaxed(its->base + GITS_CREADR) / sizeof(struct its_cmd_block); + + /* This is incredibly unlikely to happen, unless the ITS locks up. */ + if (((widx + 1) % ITS_CMD_QUEUE_NR_ENTRIES) == ridx) + return 1; + + return 0; +} + +static struct its_cmd_block *its_allocate_entry(struct its_node *its) +{ + struct its_cmd_block *cmd; + u32 count = 1000000; /* 1s! */ + + while (its_queue_full(its)) { + count--; + if (!count) { + pr_err_ratelimited("ITS queue not draining\n"); + return NULL; + } + cpu_relax(); + udelay(1); + } + + cmd = its->cmd_write++; + + /* Handle queue wrapping */ + if (its->cmd_write == (its->cmd_base + ITS_CMD_QUEUE_NR_ENTRIES)) + its->cmd_write = its->cmd_base; + + /* Clear command */ + cmd->raw_cmd[0] = 0; + cmd->raw_cmd[1] = 0; + cmd->raw_cmd[2] = 0; + cmd->raw_cmd[3] = 0; + + return cmd; +} + +static struct its_cmd_block *its_post_commands(struct its_node *its) +{ + u64 wr = its_cmd_ptr_to_offset(its, its->cmd_write); + + writel_relaxed(wr, its->base + GITS_CWRITER); + + return its->cmd_write; +} + +static void its_flush_cmd(struct its_node *its, struct its_cmd_block *cmd) +{ + /* + * Make sure the commands written to memory are observable by + * the ITS. + */ + if (its->flags & ITS_FLAGS_CMDQ_NEEDS_FLUSHING) + gic_flush_dcache_to_poc(cmd, sizeof(*cmd)); + else + dsb(ishst); +} + +static int its_wait_for_range_completion(struct its_node *its, + u64 prev_idx, + struct its_cmd_block *to) +{ + u64 rd_idx, to_idx, linear_idx; + u32 count = 1000000; /* 1s! */ + + /* Linearize to_idx if the command set has wrapped around */ + to_idx = its_cmd_ptr_to_offset(its, to); + if (to_idx < prev_idx) + to_idx += ITS_CMD_QUEUE_SZ; + + linear_idx = prev_idx; + + while (1) { + s64 delta; + + rd_idx = readl_relaxed(its->base + GITS_CREADR); + + /* + * Compute the read pointer progress, taking the + * potential wrap-around into account. + */ + delta = rd_idx - prev_idx; + if (rd_idx < prev_idx) + delta += ITS_CMD_QUEUE_SZ; + + linear_idx += delta; + if (linear_idx >= to_idx) + break; + + count--; + if (!count) { + pr_err_ratelimited("ITS queue timeout (%llu %llu)\n", + to_idx, linear_idx); + return -1; + } + prev_idx = rd_idx; + cpu_relax(); + udelay(1); + } + + return 0; +} + +/* Warning, macro hell follows */ +#define BUILD_SINGLE_CMD_FUNC(name, buildtype, synctype, buildfn) \ +void name(struct its_node *its, \ + buildtype builder, \ + struct its_cmd_desc *desc) \ +{ \ + struct its_cmd_block *cmd, *sync_cmd, *next_cmd; \ + synctype *sync_obj; \ + unsigned long flags; \ + u64 rd_idx; \ + \ + raw_spin_lock_irqsave(&its->lock, flags); \ + \ + cmd = its_allocate_entry(its); \ + if (!cmd) { /* We're soooooo screewed... */ \ + raw_spin_unlock_irqrestore(&its->lock, flags); \ + return; \ + } \ + sync_obj = builder(its, cmd, desc); \ + its_flush_cmd(its, cmd); \ + \ + if (sync_obj) { \ + sync_cmd = its_allocate_entry(its); \ + if (!sync_cmd) \ + goto post; \ + \ + buildfn(its, sync_cmd, sync_obj); \ + its_flush_cmd(its, sync_cmd); \ + } \ + \ +post: \ + rd_idx = readl_relaxed(its->base + GITS_CREADR); \ + next_cmd = its_post_commands(its); \ + raw_spin_unlock_irqrestore(&its->lock, flags); \ + \ + if (its_wait_for_range_completion(its, rd_idx, next_cmd)) \ + pr_err_ratelimited("ITS cmd %ps failed\n", builder); \ +} + +static void its_build_sync_cmd(struct its_node *its, + struct its_cmd_block *sync_cmd, + struct its_collection *sync_col) +{ + its_encode_cmd(sync_cmd, GITS_CMD_SYNC); + its_encode_target(sync_cmd, sync_col->target_address); + + its_fixup_cmd(sync_cmd); +} + +static BUILD_SINGLE_CMD_FUNC(its_send_single_command, its_cmd_builder_t, + struct its_collection, its_build_sync_cmd) + +static void its_build_vsync_cmd(struct its_node *its, + struct its_cmd_block *sync_cmd, + struct its_vpe *sync_vpe) +{ + its_encode_cmd(sync_cmd, GITS_CMD_VSYNC); + its_encode_vpeid(sync_cmd, sync_vpe->vpe_id); + + its_fixup_cmd(sync_cmd); +} + +static BUILD_SINGLE_CMD_FUNC(its_send_single_vcommand, its_cmd_vbuilder_t, + struct its_vpe, its_build_vsync_cmd) + +static void its_send_int(struct its_device *dev, u32 event_id) +{ + struct its_cmd_desc desc; + + desc.its_int_cmd.dev = dev; + desc.its_int_cmd.event_id = event_id; + + its_send_single_command(dev->its, its_build_int_cmd, &desc); +} + +static void its_send_clear(struct its_device *dev, u32 event_id) +{ + struct its_cmd_desc desc; + + desc.its_clear_cmd.dev = dev; + desc.its_clear_cmd.event_id = event_id; + + its_send_single_command(dev->its, its_build_clear_cmd, &desc); +} + +static void its_send_inv(struct its_device *dev, u32 event_id) +{ + struct its_cmd_desc desc; + + desc.its_inv_cmd.dev = dev; + desc.its_inv_cmd.event_id = event_id; + + its_send_single_command(dev->its, its_build_inv_cmd, &desc); +} + +static void its_send_mapd(struct its_device *dev, int valid) +{ + struct its_cmd_desc desc; + + desc.its_mapd_cmd.dev = dev; + desc.its_mapd_cmd.valid = !!valid; + + its_send_single_command(dev->its, its_build_mapd_cmd, &desc); +} + +static void its_send_mapc(struct its_node *its, struct its_collection *col, + int valid) +{ + struct its_cmd_desc desc; + + desc.its_mapc_cmd.col = col; + desc.its_mapc_cmd.valid = !!valid; + + its_send_single_command(its, its_build_mapc_cmd, &desc); +} + +static void its_send_mapti(struct its_device *dev, u32 irq_id, u32 id) +{ + struct its_cmd_desc desc; + + desc.its_mapti_cmd.dev = dev; + desc.its_mapti_cmd.phys_id = irq_id; + desc.its_mapti_cmd.event_id = id; + + its_send_single_command(dev->its, its_build_mapti_cmd, &desc); +} + +static void its_send_movi(struct its_device *dev, + struct its_collection *col, u32 id) +{ + struct its_cmd_desc desc; + + desc.its_movi_cmd.dev = dev; + desc.its_movi_cmd.col = col; + desc.its_movi_cmd.event_id = id; + + its_send_single_command(dev->its, its_build_movi_cmd, &desc); +} + +static void its_send_discard(struct its_device *dev, u32 id) +{ + struct its_cmd_desc desc; + + desc.its_discard_cmd.dev = dev; + desc.its_discard_cmd.event_id = id; + + its_send_single_command(dev->its, its_build_discard_cmd, &desc); +} + +static void its_send_invall(struct its_node *its, struct its_collection *col) +{ + struct its_cmd_desc desc; + + desc.its_invall_cmd.col = col; + + its_send_single_command(its, its_build_invall_cmd, &desc); +} + +static void its_send_vmapti(struct its_device *dev, u32 id) +{ + struct its_vlpi_map *map = dev_event_to_vlpi_map(dev, id); + struct its_cmd_desc desc; + + desc.its_vmapti_cmd.vpe = map->vpe; + desc.its_vmapti_cmd.dev = dev; + desc.its_vmapti_cmd.virt_id = map->vintid; + desc.its_vmapti_cmd.event_id = id; + desc.its_vmapti_cmd.db_enabled = map->db_enabled; + + its_send_single_vcommand(dev->its, its_build_vmapti_cmd, &desc); +} + +static void its_send_vmovi(struct its_device *dev, u32 id) +{ + struct its_vlpi_map *map = dev_event_to_vlpi_map(dev, id); + struct its_cmd_desc desc; + + desc.its_vmovi_cmd.vpe = map->vpe; + desc.its_vmovi_cmd.dev = dev; + desc.its_vmovi_cmd.event_id = id; + desc.its_vmovi_cmd.db_enabled = map->db_enabled; + + its_send_single_vcommand(dev->its, its_build_vmovi_cmd, &desc); +} + +static void its_send_vmapp(struct its_node *its, + struct its_vpe *vpe, bool valid) +{ + struct its_cmd_desc desc; + + desc.its_vmapp_cmd.vpe = vpe; + desc.its_vmapp_cmd.valid = valid; + desc.its_vmapp_cmd.col = &its->collections[vpe->col_idx]; + + its_send_single_vcommand(its, its_build_vmapp_cmd, &desc); +} + +static void its_send_vmovp(struct its_vpe *vpe) +{ + struct its_cmd_desc desc = {}; + struct its_node *its; + unsigned long flags; + int col_id = vpe->col_idx; + + desc.its_vmovp_cmd.vpe = vpe; + + if (!its_list_map) { + its = list_first_entry(&its_nodes, struct its_node, entry); + desc.its_vmovp_cmd.col = &its->collections[col_id]; + its_send_single_vcommand(its, its_build_vmovp_cmd, &desc); + return; + } + + /* + * Yet another marvel of the architecture. If using the + * its_list "feature", we need to make sure that all ITSs + * receive all VMOVP commands in the same order. The only way + * to guarantee this is to make vmovp a serialization point. + * + * Wall <-- Head. + */ + raw_spin_lock_irqsave(&vmovp_lock, flags); + + desc.its_vmovp_cmd.seq_num = vmovp_seq_num++; + desc.its_vmovp_cmd.its_list = get_its_list(vpe->its_vm); + + /* Emit VMOVPs */ + list_for_each_entry(its, &its_nodes, entry) { + if (!is_v4(its)) + continue; + + if (!require_its_list_vmovp(vpe->its_vm, its)) + continue; + + desc.its_vmovp_cmd.col = &its->collections[col_id]; + its_send_single_vcommand(its, its_build_vmovp_cmd, &desc); + } + + raw_spin_unlock_irqrestore(&vmovp_lock, flags); +} + +static void its_send_vinvall(struct its_node *its, struct its_vpe *vpe) +{ + struct its_cmd_desc desc; + + desc.its_vinvall_cmd.vpe = vpe; + its_send_single_vcommand(its, its_build_vinvall_cmd, &desc); +} + +static void its_send_vinv(struct its_device *dev, u32 event_id) +{ + struct its_cmd_desc desc; + + /* + * There is no real VINV command. This is just a normal INV, + * with a VSYNC instead of a SYNC. + */ + desc.its_inv_cmd.dev = dev; + desc.its_inv_cmd.event_id = event_id; + + its_send_single_vcommand(dev->its, its_build_vinv_cmd, &desc); +} + +static void its_send_vint(struct its_device *dev, u32 event_id) +{ + struct its_cmd_desc desc; + + /* + * There is no real VINT command. This is just a normal INT, + * with a VSYNC instead of a SYNC. + */ + desc.its_int_cmd.dev = dev; + desc.its_int_cmd.event_id = event_id; + + its_send_single_vcommand(dev->its, its_build_vint_cmd, &desc); +} + +static void its_send_vclear(struct its_device *dev, u32 event_id) +{ + struct its_cmd_desc desc; + + /* + * There is no real VCLEAR command. This is just a normal CLEAR, + * with a VSYNC instead of a SYNC. + */ + desc.its_clear_cmd.dev = dev; + desc.its_clear_cmd.event_id = event_id; + + its_send_single_vcommand(dev->its, its_build_vclear_cmd, &desc); +} + +static void its_send_invdb(struct its_node *its, struct its_vpe *vpe) +{ + struct its_cmd_desc desc; + + desc.its_invdb_cmd.vpe = vpe; + its_send_single_vcommand(its, its_build_invdb_cmd, &desc); +} + +/* + * irqchip functions - assumes MSI, mostly. + */ +static void lpi_write_config(struct irq_data *d, u8 clr, u8 set) +{ + struct its_vlpi_map *map = get_vlpi_map(d); + irq_hw_number_t hwirq; + void *va; + u8 *cfg; + + if (map) { + va = page_address(map->vm->vprop_page); + hwirq = map->vintid; + + /* Remember the updated property */ + map->properties &= ~clr; + map->properties |= set | LPI_PROP_GROUP1; + } else { + va = gic_rdists->prop_table_va; + hwirq = d->hwirq; + } + + cfg = va + hwirq - 8192; + *cfg &= ~clr; + *cfg |= set | LPI_PROP_GROUP1; + + /* + * Make the above write visible to the redistributors. + * And yes, we're flushing exactly: One. Single. Byte. + * Humpf... + */ + if (gic_rdists->flags & RDIST_FLAGS_PROPBASE_NEEDS_FLUSHING) + gic_flush_dcache_to_poc(cfg, sizeof(*cfg)); + else + dsb(ishst); +} + +static void wait_for_syncr(void __iomem *rdbase) +{ + while (readl_relaxed(rdbase + GICR_SYNCR) & 1) + cpu_relax(); +} + +static void direct_lpi_inv(struct irq_data *d) +{ + struct its_vlpi_map *map = get_vlpi_map(d); + void __iomem *rdbase; + unsigned long flags; + u64 val; + int cpu; + + if (map) { + struct its_device *its_dev = irq_data_get_irq_chip_data(d); + + WARN_ON(!is_v4_1(its_dev->its)); + + val = GICR_INVLPIR_V; + val |= FIELD_PREP(GICR_INVLPIR_VPEID, map->vpe->vpe_id); + val |= FIELD_PREP(GICR_INVLPIR_INTID, map->vintid); + } else { + val = d->hwirq; + } + + /* Target the redistributor this LPI is currently routed to */ + cpu = irq_to_cpuid_lock(d, &flags); + raw_spin_lock(&gic_data_rdist_cpu(cpu)->rd_lock); + rdbase = per_cpu_ptr(gic_rdists->rdist, cpu)->rd_base; + gic_write_lpir(val, rdbase + GICR_INVLPIR); + + wait_for_syncr(rdbase); + raw_spin_unlock(&gic_data_rdist_cpu(cpu)->rd_lock); + irq_to_cpuid_unlock(d, flags); +} + +static void lpi_update_config(struct irq_data *d, u8 clr, u8 set) +{ + struct its_device *its_dev = irq_data_get_irq_chip_data(d); + + lpi_write_config(d, clr, set); + if (gic_rdists->has_direct_lpi && + (is_v4_1(its_dev->its) || !irqd_is_forwarded_to_vcpu(d))) + direct_lpi_inv(d); + else if (!irqd_is_forwarded_to_vcpu(d)) + its_send_inv(its_dev, its_get_event_id(d)); + else + its_send_vinv(its_dev, its_get_event_id(d)); +} + +static void its_vlpi_set_doorbell(struct irq_data *d, bool enable) +{ + struct its_device *its_dev = irq_data_get_irq_chip_data(d); + u32 event = its_get_event_id(d); + struct its_vlpi_map *map; + + /* + * GICv4.1 does away with the per-LPI nonsense, nothing to do + * here. + */ + if (is_v4_1(its_dev->its)) + return; + + map = dev_event_to_vlpi_map(its_dev, event); + + if (map->db_enabled == enable) + return; + + map->db_enabled = enable; + + /* + * More fun with the architecture: + * + * Ideally, we'd issue a VMAPTI to set the doorbell to its LPI + * value or to 1023, depending on the enable bit. But that + * would be issueing a mapping for an /existing/ DevID+EventID + * pair, which is UNPREDICTABLE. Instead, let's issue a VMOVI + * to the /same/ vPE, using this opportunity to adjust the + * doorbell. Mouahahahaha. We loves it, Precious. + */ + its_send_vmovi(its_dev, event); +} + +static void its_mask_irq(struct irq_data *d) +{ + if (irqd_is_forwarded_to_vcpu(d)) + its_vlpi_set_doorbell(d, false); + + lpi_update_config(d, LPI_PROP_ENABLED, 0); +} + +static void its_unmask_irq(struct irq_data *d) +{ + if (irqd_is_forwarded_to_vcpu(d)) + its_vlpi_set_doorbell(d, true); + + lpi_update_config(d, 0, LPI_PROP_ENABLED); +} + +static __maybe_unused u32 its_read_lpi_count(struct irq_data *d, int cpu) +{ + if (irqd_affinity_is_managed(d)) + return atomic_read(&per_cpu_ptr(&cpu_lpi_count_ft2500, cpu)->managed); + + return atomic_read(&per_cpu_ptr(&cpu_lpi_count_ft2500, cpu)->unmanaged); +} + +static void its_inc_lpi_count(struct irq_data *d, int cpu) +{ + if (irqd_affinity_is_managed(d)) + atomic_inc(&per_cpu_ptr(&cpu_lpi_count_ft2500, cpu)->managed); + else + atomic_inc(&per_cpu_ptr(&cpu_lpi_count_ft2500, cpu)->unmanaged); +} + +static void its_dec_lpi_count(struct irq_data *d, int cpu) +{ + if (irqd_affinity_is_managed(d)) + atomic_dec(&per_cpu_ptr(&cpu_lpi_count_ft2500, cpu)->managed); + else + atomic_dec(&per_cpu_ptr(&cpu_lpi_count_ft2500, cpu)->unmanaged); +} + +static unsigned int cpumask_pick_least_loaded(struct irq_data *d, + const struct cpumask *cpu_mask) +{ + unsigned int cpu = nr_cpu_ids, tmp; + int count = S32_MAX; + + for_each_cpu(tmp, cpu_mask) { + int this_count = its_read_lpi_count(d, tmp); + if (this_count < count) { + cpu = tmp; + count = this_count; + } + } + + return cpu; +} + +/* + * As suggested by Thomas Gleixner in: + * https://lore.kernel.org/r/87h80q2aoc.fsf@nanos.tec.linutronix.de + */ +static int its_select_cpu(struct irq_data *d, + const struct cpumask *aff_mask) +{ + struct its_device *its_dev = irq_data_get_irq_chip_data(d); + cpumask_var_t tmpmask; + int cpu, node; + + if (!alloc_cpumask_var(&tmpmask, GFP_ATOMIC)) + return -ENOMEM; + + node = its_dev->its->numa_node; + + if (!irqd_affinity_is_managed(d)) { + /* First try the NUMA node */ + if (node != NUMA_NO_NODE) { + /* + * Try the intersection of the affinity mask and the + * node mask (and the online mask, just to be safe). + */ + cpumask_and(tmpmask, cpumask_of_node(node), aff_mask); + cpumask_and(tmpmask, tmpmask, cpu_online_mask); + + /* + * Ideally, we would check if the mask is empty, and + * try again on the full node here. + * + * But it turns out that the way ACPI describes the + * affinity for ITSs only deals about memory, and + * not target CPUs, so it cannot describe a single + * ITS placed next to two NUMA nodes. + * + * Instead, just fallback on the online mask. This + * diverges from Thomas' suggestion above. + */ + cpu = cpumask_pick_least_loaded(d, tmpmask); + if (cpu < nr_cpu_ids) + goto out; + + /* If we can't cross sockets, give up */ + if ((its_dev->its->flags & ITS_FLAGS_WORKAROUND_CAVIUM_23144)) + goto out; + + /* If the above failed, expand the search */ + } + + /* Try the intersection of the affinity and online masks */ + cpumask_and(tmpmask, aff_mask, cpu_online_mask); + + /* If that doesn't fly, the online mask is the last resort */ + if (cpumask_empty(tmpmask)) + cpumask_copy(tmpmask, cpu_online_mask); + + cpu = cpumask_pick_least_loaded(d, tmpmask); + } else { + cpumask_and(tmpmask, irq_data_get_affinity_mask(d), cpu_online_mask); + + /* If we cannot cross sockets, limit the search to that node */ + if ((its_dev->its->flags & ITS_FLAGS_WORKAROUND_CAVIUM_23144) && + node != NUMA_NO_NODE) + cpumask_and(tmpmask, tmpmask, cpumask_of_node(node)); + + cpu = cpumask_pick_least_loaded(d, tmpmask); + } +out: + free_cpumask_var(tmpmask); + + pr_debug("IRQ%d -> %*pbl CPU%d\n", d->irq, cpumask_pr_args(aff_mask), cpu); + return cpu; +} + +#define MAX_MARS3_SKT_COUNT 8 + +static int its_cpumask_select(struct its_device *its_dev, + const struct cpumask *mask_val, + const struct cpumask *cpu_mask) +{ + unsigned int skt, skt_id, i; + phys_addr_t its_phys_base; + unsigned int cpu, cpus = 0; + + unsigned int skt_cpu_cnt[MAX_MARS3_SKT_COUNT] = {0}; + + for (i = 0; i < nr_cpu_ids; i++) { + skt = (cpu_logical_map(i) >> 16) & 0xff; + if ((skt >= 0) && (skt < MAX_MARS3_SKT_COUNT)) { + skt_cpu_cnt[skt]++; + } else if (0xff != skt) { + pr_err("socket address: %d is out of range.", skt); + } + } + + its_phys_base = its_dev->its->phys_base; + skt_id = (its_phys_base >> 41) & 0x7; + + if (0 != skt_id) { + for (i = 0; i < skt_id; i++) { + cpus += skt_cpu_cnt[i]; + } + } + + cpu = cpumask_any_and(mask_val, cpu_mask); + if ((cpu > cpus) && (cpu < (cpus + skt_cpu_cnt[skt_id]))) { + cpus = cpu; + } + + return cpus; +} + +static int its_set_affinity(struct irq_data *d, const struct cpumask *mask_val, + bool force) +{ + unsigned int cpu; + const struct cpumask *cpu_mask = cpu_online_mask; + struct its_device *its_dev = irq_data_get_irq_chip_data(d); + struct its_collection *target_col; + u32 id = its_get_event_id(d); + int prev_cpu; + unsigned int skt_t1, skt_t2, cpu_idx; + + /* A forwarded interrupt should use irq_set_vcpu_affinity */ + if (irqd_is_forwarded_to_vcpu(d)) + return -EINVAL; + + prev_cpu = its_dev->event_map.col_map[id]; + its_dec_lpi_count(d, prev_cpu); + + cpu_idx = its_cpumask_select(its_dev, mask_val, cpu_mask); + skt_t1 = (cpu_logical_map(cpu_idx) >> 16) & 0xff; + if (!force) + cpu = its_select_cpu(d, mask_val); + else + cpu = cpumask_pick_least_loaded(d, mask_val); + skt_t2 = (cpu_logical_map(cpu) >> 16) & 0xff; + if (skt_t1 != skt_t2) + cpu = cpu_idx; + + if (cpu < 0 || cpu >= nr_cpu_ids) + goto err; + + /* don't set the affinity when the target cpu is same as current one */ + if (cpu != prev_cpu) { + target_col = &its_dev->its->collections[cpu]; + its_send_movi(its_dev, target_col, id); + its_dev->event_map.col_map[id] = cpu; + irq_data_update_effective_affinity(d, cpumask_of(cpu)); + } + + its_inc_lpi_count(d, cpu); + + return IRQ_SET_MASK_OK_DONE; + +err: + its_inc_lpi_count(d, prev_cpu); + return -EINVAL; +} + +static u64 its_irq_get_msi_base(struct its_device *its_dev) +{ + struct its_node *its = its_dev->its; + + return its->phys_base + GITS_TRANSLATER; +} + +static void its_irq_compose_msi_msg(struct irq_data *d, struct msi_msg *msg) +{ + struct its_device *its_dev = irq_data_get_irq_chip_data(d); + struct its_node *its; + u64 addr; + + its = its_dev->its; + addr = its->get_msi_base(its_dev); + + msg->address_lo = lower_32_bits(addr); + msg->address_hi = upper_32_bits(addr); + msg->data = its_get_event_id(d); +} + +static int its_irq_set_irqchip_state(struct irq_data *d, + enum irqchip_irq_state which, + bool state) +{ + struct its_device *its_dev = irq_data_get_irq_chip_data(d); + u32 event = its_get_event_id(d); + + if (which != IRQCHIP_STATE_PENDING) + return -EINVAL; + + if (irqd_is_forwarded_to_vcpu(d)) { + if (state) + its_send_vint(its_dev, event); + else + its_send_vclear(its_dev, event); + } else { + if (state) + its_send_int(its_dev, event); + else + its_send_clear(its_dev, event); + } + + return 0; +} + +static int its_irq_retrigger(struct irq_data *d) +{ + return !its_irq_set_irqchip_state(d, IRQCHIP_STATE_PENDING, true); +} + +/* + * Two favourable cases: + * + * (a) Either we have a GICv4.1, and all vPEs have to be mapped at all times + * for vSGI delivery + * + * (b) Or the ITSs do not use a list map, meaning that VMOVP is cheap enough + * and we're better off mapping all VPEs always + * + * If neither (a) nor (b) is true, then we map vPEs on demand. + * + */ +static bool gic_requires_eager_mapping(void) +{ + if (!its_list_map || gic_rdists->has_rvpeid) + return true; + + return false; +} + +static void its_map_vm(struct its_node *its, struct its_vm *vm) +{ + unsigned long flags; + + if (gic_requires_eager_mapping()) + return; + + raw_spin_lock_irqsave(&vmovp_lock, flags); + + /* + * If the VM wasn't mapped yet, iterate over the vpes and get + * them mapped now. + */ + vm->vlpi_count[its->list_nr]++; + + if (vm->vlpi_count[its->list_nr] == 1) { + int i; + + for (i = 0; i < vm->nr_vpes; i++) { + struct its_vpe *vpe = vm->vpes[i]; + struct irq_data *d = irq_get_irq_data(vpe->irq); + + /* Map the VPE to the first possible CPU */ + vpe->col_idx = cpumask_first(cpu_online_mask); + its_send_vmapp(its, vpe, true); + its_send_vinvall(its, vpe); + irq_data_update_effective_affinity(d, cpumask_of(vpe->col_idx)); + } + } + + raw_spin_unlock_irqrestore(&vmovp_lock, flags); +} + +static void its_unmap_vm(struct its_node *its, struct its_vm *vm) +{ + unsigned long flags; + + /* Not using the ITS list? Everything is always mapped. */ + if (gic_requires_eager_mapping()) + return; + + raw_spin_lock_irqsave(&vmovp_lock, flags); + + if (!--vm->vlpi_count[its->list_nr]) { + int i; + + for (i = 0; i < vm->nr_vpes; i++) + its_send_vmapp(its, vm->vpes[i], false); + } + + raw_spin_unlock_irqrestore(&vmovp_lock, flags); +} + +static int its_vlpi_map(struct irq_data *d, struct its_cmd_info *info) +{ + struct its_device *its_dev = irq_data_get_irq_chip_data(d); + u32 event = its_get_event_id(d); + int ret = 0; + + if (!info->map) + return -EINVAL; + + raw_spin_lock(&its_dev->event_map.vlpi_lock); + + if (!its_dev->event_map.vm) { + struct its_vlpi_map *maps; + + maps = kcalloc(its_dev->event_map.nr_lpis, sizeof(*maps), + GFP_ATOMIC); + if (!maps) { + ret = -ENOMEM; + goto out; + } + + its_dev->event_map.vm = info->map->vm; + its_dev->event_map.vlpi_maps = maps; + } else if (its_dev->event_map.vm != info->map->vm) { + ret = -EINVAL; + goto out; + } + + /* Get our private copy of the mapping information */ + its_dev->event_map.vlpi_maps[event] = *info->map; + + if (irqd_is_forwarded_to_vcpu(d)) { + /* Already mapped, move it around */ + its_send_vmovi(its_dev, event); + } else { + /* Ensure all the VPEs are mapped on this ITS */ + its_map_vm(its_dev->its, info->map->vm); + + /* + * Flag the interrupt as forwarded so that we can + * start poking the virtual property table. + */ + irqd_set_forwarded_to_vcpu(d); + + /* Write out the property to the prop table */ + lpi_write_config(d, 0xff, info->map->properties); + + /* Drop the physical mapping */ + its_send_discard(its_dev, event); + + /* and install the virtual one */ + its_send_vmapti(its_dev, event); + + /* Increment the number of VLPIs */ + its_dev->event_map.nr_vlpis++; + } + +out: + raw_spin_unlock(&its_dev->event_map.vlpi_lock); + return ret; +} + +static int its_vlpi_get(struct irq_data *d, struct its_cmd_info *info) +{ + struct its_device *its_dev = irq_data_get_irq_chip_data(d); + struct its_vlpi_map *map; + int ret = 0; + + raw_spin_lock(&its_dev->event_map.vlpi_lock); + + map = get_vlpi_map(d); + + if (!its_dev->event_map.vm || !map) { + ret = -EINVAL; + goto out; + } + + /* Copy our mapping information to the incoming request */ + *info->map = *map; + +out: + raw_spin_unlock(&its_dev->event_map.vlpi_lock); + return ret; +} + +static int its_vlpi_unmap(struct irq_data *d) +{ + struct its_device *its_dev = irq_data_get_irq_chip_data(d); + u32 event = its_get_event_id(d); + int ret = 0; + + raw_spin_lock(&its_dev->event_map.vlpi_lock); + + if (!its_dev->event_map.vm || !irqd_is_forwarded_to_vcpu(d)) { + ret = -EINVAL; + goto out; + } + + /* Drop the virtual mapping */ + its_send_discard(its_dev, event); + + /* and restore the physical one */ + irqd_clr_forwarded_to_vcpu(d); + its_send_mapti(its_dev, d->hwirq, event); + lpi_update_config(d, 0xff, (LPI_PROP_DEFAULT_PRIO | + LPI_PROP_ENABLED | + LPI_PROP_GROUP1)); + + /* Potentially unmap the VM from this ITS */ + its_unmap_vm(its_dev->its, its_dev->event_map.vm); + + /* + * Drop the refcount and make the device available again if + * this was the last VLPI. + */ + if (!--its_dev->event_map.nr_vlpis) { + its_dev->event_map.vm = NULL; + kfree(its_dev->event_map.vlpi_maps); + } + +out: + raw_spin_unlock(&its_dev->event_map.vlpi_lock); + return ret; +} + +static int its_vlpi_prop_update(struct irq_data *d, struct its_cmd_info *info) +{ + struct its_device *its_dev = irq_data_get_irq_chip_data(d); + + if (!its_dev->event_map.vm || !irqd_is_forwarded_to_vcpu(d)) + return -EINVAL; + + if (info->cmd_type == PROP_UPDATE_AND_INV_VLPI) + lpi_update_config(d, 0xff, info->config); + else + lpi_write_config(d, 0xff, info->config); + its_vlpi_set_doorbell(d, !!(info->config & LPI_PROP_ENABLED)); + + return 0; +} + +static int its_irq_set_vcpu_affinity(struct irq_data *d, void *vcpu_info) +{ + struct its_device *its_dev = irq_data_get_irq_chip_data(d); + struct its_cmd_info *info = vcpu_info; + + /* Need a v4 ITS */ + if (!is_v4(its_dev->its)) + return -EINVAL; + + /* Unmap request? */ + if (!info) + return its_vlpi_unmap(d); + + switch (info->cmd_type) { + case MAP_VLPI: + return its_vlpi_map(d, info); + + case GET_VLPI: + return its_vlpi_get(d, info); + + case PROP_UPDATE_VLPI: + case PROP_UPDATE_AND_INV_VLPI: + return its_vlpi_prop_update(d, info); + + default: + return -EINVAL; + } +} + +static struct irq_chip its_irq_chip = { + .name = "ITS", + .irq_mask = its_mask_irq, + .irq_unmask = its_unmask_irq, + .irq_eoi = irq_chip_eoi_parent, + .irq_set_affinity = its_set_affinity, + .irq_compose_msi_msg = its_irq_compose_msi_msg, + .irq_set_irqchip_state = its_irq_set_irqchip_state, + .irq_retrigger = its_irq_retrigger, + .irq_set_vcpu_affinity = its_irq_set_vcpu_affinity, +}; + + +/* + * How we allocate LPIs: + * + * lpi_range_list contains ranges of LPIs that are to available to + * allocate from. To allocate LPIs, just pick the first range that + * fits the required allocation, and reduce it by the required + * amount. Once empty, remove the range from the list. + * + * To free a range of LPIs, add a free range to the list, sort it and + * merge the result if the new range happens to be adjacent to an + * already free block. + * + * The consequence of the above is that allocation is cost is low, but + * freeing is expensive. We assumes that freeing rarely occurs. + */ +#define ITS_MAX_LPI_NRBITS 16 /* 64K LPIs */ + +static DEFINE_MUTEX(lpi_range_lock); +static LIST_HEAD(lpi_range_list); + +struct lpi_range { + struct list_head entry; + u32 base_id; + u32 span; +}; + +static struct lpi_range *mk_lpi_range(u32 base, u32 span) +{ + struct lpi_range *range; + + range = kmalloc(sizeof(*range), GFP_KERNEL); + if (range) { + range->base_id = base; + range->span = span; + } + + return range; +} + +static int alloc_lpi_range(u32 nr_lpis, u32 *base) +{ + struct lpi_range *range, *tmp; + int err = -ENOSPC; + + mutex_lock(&lpi_range_lock); + + list_for_each_entry_safe(range, tmp, &lpi_range_list, entry) { + if (range->span >= nr_lpis) { + *base = range->base_id; + range->base_id += nr_lpis; + range->span -= nr_lpis; + + if (range->span == 0) { + list_del(&range->entry); + kfree(range); + } + + err = 0; + break; + } + } + + mutex_unlock(&lpi_range_lock); + + pr_debug("ITS: alloc %u:%u\n", *base, nr_lpis); + return err; +} + +static void merge_lpi_ranges(struct lpi_range *a, struct lpi_range *b) +{ + if (&a->entry == &lpi_range_list || &b->entry == &lpi_range_list) + return; + if (a->base_id + a->span != b->base_id) + return; + b->base_id = a->base_id; + b->span += a->span; + list_del(&a->entry); + kfree(a); +} + +static int free_lpi_range(u32 base, u32 nr_lpis) +{ + struct lpi_range *new, *old; + + new = mk_lpi_range(base, nr_lpis); + if (!new) + return -ENOMEM; + + mutex_lock(&lpi_range_lock); + + list_for_each_entry_reverse(old, &lpi_range_list, entry) { + if (old->base_id < base) + break; + } + /* + * old is the last element with ->base_id smaller than base, + * so new goes right after it. If there are no elements with + * ->base_id smaller than base, &old->entry ends up pointing + * at the head of the list, and inserting new it the start of + * the list is the right thing to do in that case as well. + */ + list_add(&new->entry, &old->entry); + /* + * Now check if we can merge with the preceding and/or + * following ranges. + */ + merge_lpi_ranges(old, new); + merge_lpi_ranges(new, list_next_entry(new, entry)); + + mutex_unlock(&lpi_range_lock); + return 0; +} + +static int __init its_lpi_init(u32 id_bits) +{ + u32 lpis = (1UL << id_bits) - 8192; + u32 numlpis; + int err; + + numlpis = 1UL << GICD_TYPER_NUM_LPIS(gic_rdists->gicd_typer); + + if (numlpis > 2 && !WARN_ON(numlpis > lpis)) { + lpis = numlpis; + pr_info("ITS: Using hypervisor restricted LPI range [%u]\n", + lpis); + } + + /* + * Initializing the allocator is just the same as freeing the + * full range of LPIs. + */ + err = free_lpi_range(8192, lpis); + pr_debug("ITS: Allocator initialized for %u LPIs\n", lpis); + return err; +} + +static unsigned long *its_lpi_alloc(int nr_irqs, u32 *base, int *nr_ids) +{ + unsigned long *bitmap = NULL; + int err = 0; + + do { + err = alloc_lpi_range(nr_irqs, base); + if (!err) + break; + + nr_irqs /= 2; + } while (nr_irqs > 0); + + if (!nr_irqs) + err = -ENOSPC; + + if (err) + goto out; + + bitmap = kcalloc(BITS_TO_LONGS(nr_irqs), sizeof (long), GFP_ATOMIC); + if (!bitmap) + goto out; + + *nr_ids = nr_irqs; + +out: + if (!bitmap) + *base = *nr_ids = 0; + + return bitmap; +} + +static void its_lpi_free(unsigned long *bitmap, u32 base, u32 nr_ids) +{ + WARN_ON(free_lpi_range(base, nr_ids)); + kfree(bitmap); +} + +static void gic_reset_prop_table(void *va) +{ + /* Priority 0xa0, Group-1, disabled */ + memset(va, LPI_PROP_DEFAULT_PRIO | LPI_PROP_GROUP1, LPI_PROPBASE_SZ); + + /* Make sure the GIC will observe the written configuration */ + gic_flush_dcache_to_poc(va, LPI_PROPBASE_SZ); +} + +static struct page *its_allocate_prop_table(gfp_t gfp_flags) +{ + struct page *prop_page; + + prop_page = alloc_pages(gfp_flags, get_order(LPI_PROPBASE_SZ)); + if (!prop_page) + return NULL; + + gic_reset_prop_table(page_address(prop_page)); + + return prop_page; +} + +static void its_free_prop_table(struct page *prop_page) +{ + free_pages((unsigned long)page_address(prop_page), + get_order(LPI_PROPBASE_SZ)); +} + +static bool gic_check_reserved_range(phys_addr_t addr, unsigned long size) +{ + phys_addr_t start, end, addr_end; + u64 i; + + /* + * We don't bother checking for a kdump kernel as by + * construction, the LPI tables are out of this kernel's + * memory map. + */ + if (is_kdump_kernel()) + return true; + + addr_end = addr + size - 1; + + for_each_reserved_mem_range(i, &start, &end) { + if (addr >= start && addr_end <= end) + return true; + } + + /* Not found, not a good sign... */ + pr_warn("GIC-2500: Expected reserved range [%pa:%pa], not found\n", + &addr, &addr_end); + add_taint(TAINT_CRAP, LOCKDEP_STILL_OK); + return false; +} + +static int gic_reserve_range(phys_addr_t addr, unsigned long size) +{ + if (efi_enabled(EFI_CONFIG_TABLES)) + return efi_mem_reserve_persistent(addr, size); + + return 0; +} + +static int __init its_setup_lpi_prop_table(void) +{ + if (gic_rdists->flags & RDIST_FLAGS_RD_TABLES_PREALLOCATED) { + u64 val; + + val = gicr_read_propbaser(gic_data_rdist_rd_base() + GICR_PROPBASER); + lpi_id_bits = (val & GICR_PROPBASER_IDBITS_MASK) + 1; + + gic_rdists->prop_table_pa = val & GENMASK_ULL(51, 12); + gic_rdists->prop_table_va = memremap(gic_rdists->prop_table_pa, + LPI_PROPBASE_SZ, + MEMREMAP_WB); + gic_reset_prop_table(gic_rdists->prop_table_va); + } else { + struct page *page; + + lpi_id_bits = min_t(u32, + GICD_TYPER_ID_BITS(gic_rdists->gicd_typer), + ITS_MAX_LPI_NRBITS); + page = its_allocate_prop_table(GFP_NOWAIT); + if (!page) { + pr_err("Failed to allocate PROPBASE\n"); + return -ENOMEM; + } + + gic_rdists->prop_table_pa = page_to_phys(page); + gic_rdists->prop_table_va = page_address(page); + WARN_ON(gic_reserve_range(gic_rdists->prop_table_pa, + LPI_PROPBASE_SZ)); + } + + pr_info("GIC-2500: using LPI property table @%pa\n", + &gic_rdists->prop_table_pa); + + return its_lpi_init(lpi_id_bits); +} + +static const char *its_base_type_string[] = { + [GITS_BASER_TYPE_DEVICE] = "Devices", + [GITS_BASER_TYPE_VCPU] = "Virtual CPUs", + [GITS_BASER_TYPE_RESERVED3] = "Reserved (3)", + [GITS_BASER_TYPE_COLLECTION] = "Interrupt Collections", + [GITS_BASER_TYPE_RESERVED5] = "Reserved (5)", + [GITS_BASER_TYPE_RESERVED6] = "Reserved (6)", + [GITS_BASER_TYPE_RESERVED7] = "Reserved (7)", +}; + +static u64 its_read_baser(struct its_node *its, struct its_baser *baser) +{ + u32 idx = baser - its->tables; + + return gits_read_baser(its->base + GITS_BASER + (idx << 3)); +} + +static void its_write_baser(struct its_node *its, struct its_baser *baser, + u64 val) +{ + u32 idx = baser - its->tables; + + gits_write_baser(val, its->base + GITS_BASER + (idx << 3)); + baser->val = its_read_baser(its, baser); +} + +static int its_setup_baser(struct its_node *its, struct its_baser *baser, + u64 cache, u64 shr, u32 order, bool indirect) +{ + u64 val = its_read_baser(its, baser); + u64 esz = GITS_BASER_ENTRY_SIZE(val); + u64 type = GITS_BASER_TYPE(val); + u64 baser_phys, tmp; + u32 alloc_pages, psz; + struct page *page; + void *base; + + psz = baser->psz; + alloc_pages = (PAGE_ORDER_TO_SIZE(order) / psz); + if (alloc_pages > GITS_BASER_PAGES_MAX) { + pr_warn("ITS@%pa: %s too large, reduce ITS pages %u->%u\n", + &its->phys_base, its_base_type_string[type], + alloc_pages, GITS_BASER_PAGES_MAX); + alloc_pages = GITS_BASER_PAGES_MAX; + order = get_order(GITS_BASER_PAGES_MAX * psz); + } + + page = alloc_pages_node(its->numa_node, GFP_KERNEL | __GFP_ZERO, order); + if (!page) + return -ENOMEM; + + base = (void *)page_address(page); + baser_phys = virt_to_phys(base); + + /* Check if the physical address of the memory is above 48bits */ + if (IS_ENABLED(CONFIG_ARM64_64K_PAGES) && (baser_phys >> 48)) { + + /* 52bit PA is supported only when PageSize=64K */ + if (psz != SZ_64K) { + pr_err("ITS: no 52bit PA support when psz=%d\n", psz); + free_pages((unsigned long)base, order); + return -ENXIO; + } + + /* Convert 52bit PA to 48bit field */ + baser_phys = GITS_BASER_PHYS_52_to_48(baser_phys); + } + +retry_baser: + val = (baser_phys | + (type << GITS_BASER_TYPE_SHIFT) | + ((esz - 1) << GITS_BASER_ENTRY_SIZE_SHIFT) | + ((alloc_pages - 1) << GITS_BASER_PAGES_SHIFT) | + cache | + shr | + GITS_BASER_VALID); + + val |= indirect ? GITS_BASER_INDIRECT : 0x0; + + switch (psz) { + case SZ_4K: + val |= GITS_BASER_PAGE_SIZE_4K; + break; + case SZ_16K: + val |= GITS_BASER_PAGE_SIZE_16K; + break; + case SZ_64K: + val |= GITS_BASER_PAGE_SIZE_64K; + break; + } + + its_write_baser(its, baser, val); + tmp = baser->val; + + if ((val ^ tmp) & GITS_BASER_SHAREABILITY_MASK) { + /* + * Shareability didn't stick. Just use + * whatever the read reported, which is likely + * to be the only thing this redistributor + * supports. If that's zero, make it + * non-cacheable as well. + */ + shr = tmp & GITS_BASER_SHAREABILITY_MASK; + if (!shr) { + cache = GITS_BASER_nC; + gic_flush_dcache_to_poc(base, PAGE_ORDER_TO_SIZE(order)); + } + goto retry_baser; + } + + if (val != tmp) { + pr_err("ITS@%pa: %s doesn't stick: %llx %llx\n", + &its->phys_base, its_base_type_string[type], + val, tmp); + free_pages((unsigned long)base, order); + return -ENXIO; + } + + baser->order = order; + baser->base = base; + baser->psz = psz; + tmp = indirect ? GITS_LVL1_ENTRY_SIZE : esz; + + pr_info("ITS@%pa: allocated %d %s @%lx (%s, esz %d, psz %dK, shr %d)\n", + &its->phys_base, (int)(PAGE_ORDER_TO_SIZE(order) / (int)tmp), + its_base_type_string[type], + (unsigned long)virt_to_phys(base), + indirect ? "indirect" : "flat", (int)esz, + psz / SZ_1K, (int)shr >> GITS_BASER_SHAREABILITY_SHIFT); + + return 0; +} + +static bool its_parse_indirect_baser(struct its_node *its, + struct its_baser *baser, + u32 *order, u32 ids) +{ + u64 tmp = its_read_baser(its, baser); + u64 type = GITS_BASER_TYPE(tmp); + u64 esz = GITS_BASER_ENTRY_SIZE(tmp); + u64 val = GITS_BASER_InnerShareable | GITS_BASER_RaWaWb; + u32 new_order = *order; + u32 psz = baser->psz; + bool indirect = false; + + /* No need to enable Indirection if memory requirement < (psz*2)bytes */ + if ((esz << ids) > (psz * 2)) { + /* + * Find out whether hw supports a single or two-level table by + * table by reading bit at offset '62' after writing '1' to it. + */ + its_write_baser(its, baser, val | GITS_BASER_INDIRECT); + indirect = !!(baser->val & GITS_BASER_INDIRECT); + + if (indirect) { + /* + * The size of the lvl2 table is equal to ITS page size + * which is 'psz'. For computing lvl1 table size, + * subtract ID bits that sparse lvl2 table from 'ids' + * which is reported by ITS hardware times lvl1 table + * entry size. + */ + ids -= ilog2(psz / (int)esz); + esz = GITS_LVL1_ENTRY_SIZE; + } + } + + /* + * Allocate as many entries as required to fit the + * range of device IDs that the ITS can grok... The ID + * space being incredibly sparse, this results in a + * massive waste of memory if two-level device table + * feature is not supported by hardware. + */ + new_order = max_t(u32, get_order(esz << ids), new_order); + if (new_order >= MAX_ORDER) { + new_order = MAX_ORDER - 1; + ids = ilog2(PAGE_ORDER_TO_SIZE(new_order) / (int)esz); + pr_warn("ITS@%pa: %s Table too large, reduce ids %llu->%u\n", + &its->phys_base, its_base_type_string[type], + device_ids(its), ids); + } + + *order = new_order; + + return indirect; +} + +static u32 compute_common_aff(u64 val) +{ + u32 aff, clpiaff; + + aff = FIELD_GET(GICR_TYPER_AFFINITY, val); + clpiaff = FIELD_GET(GICR_TYPER_COMMON_LPI_AFF, val); + + return aff & ~(GENMASK(31, 0) >> (clpiaff * 8)); +} + +static u32 compute_its_aff(struct its_node *its) +{ + u64 val; + u32 svpet; + + /* + * Reencode the ITS SVPET and MPIDR as a GICR_TYPER, and compute + * the resulting affinity. We then use that to see if this match + * our own affinity. + */ + svpet = FIELD_GET(GITS_TYPER_SVPET, its->typer); + val = FIELD_PREP(GICR_TYPER_COMMON_LPI_AFF, svpet); + val |= FIELD_PREP(GICR_TYPER_AFFINITY, its->mpidr); + return compute_common_aff(val); +} + +static struct its_node *find_sibling_its(struct its_node *cur_its) +{ + struct its_node *its; + u32 aff; + + if (!FIELD_GET(GITS_TYPER_SVPET, cur_its->typer)) + return NULL; + + aff = compute_its_aff(cur_its); + + list_for_each_entry(its, &its_nodes, entry) { + u64 baser; + + if (!is_v4_1(its) || its == cur_its) + continue; + + if (!FIELD_GET(GITS_TYPER_SVPET, its->typer)) + continue; + + if (aff != compute_its_aff(its)) + continue; + + /* GICv4.1 guarantees that the vPE table is GITS_BASER2 */ + baser = its->tables[2].val; + if (!(baser & GITS_BASER_VALID)) + continue; + + return its; + } + + return NULL; +} + +static void its_free_tables(struct its_node *its) +{ + int i; + + for (i = 0; i < GITS_BASER_NR_REGS; i++) { + if (its->tables[i].base) { + free_pages((unsigned long)its->tables[i].base, + its->tables[i].order); + its->tables[i].base = NULL; + } + } +} + +static int its_probe_baser_psz(struct its_node *its, struct its_baser *baser) +{ + u64 psz = SZ_64K; + + while (psz) { + u64 val, gpsz; + + val = its_read_baser(its, baser); + val &= ~GITS_BASER_PAGE_SIZE_MASK; + + switch (psz) { + case SZ_64K: + gpsz = GITS_BASER_PAGE_SIZE_64K; + break; + case SZ_16K: + gpsz = GITS_BASER_PAGE_SIZE_16K; + break; + case SZ_4K: + default: + gpsz = GITS_BASER_PAGE_SIZE_4K; + break; + } + + gpsz >>= GITS_BASER_PAGE_SIZE_SHIFT; + + val |= FIELD_PREP(GITS_BASER_PAGE_SIZE_MASK, gpsz); + its_write_baser(its, baser, val); + + if (FIELD_GET(GITS_BASER_PAGE_SIZE_MASK, baser->val) == gpsz) + break; + + switch (psz) { + case SZ_64K: + psz = SZ_16K; + break; + case SZ_16K: + psz = SZ_4K; + break; + case SZ_4K: + default: + return -1; + } + } + + baser->psz = psz; + return 0; +} + +static int its_alloc_tables(struct its_node *its) +{ + u64 shr = GITS_BASER_InnerShareable; + u64 cache = GITS_BASER_RaWaWb; + int err, i; + + if (its->flags & ITS_FLAGS_WORKAROUND_CAVIUM_22375) + /* erratum 24313: ignore memory access type */ + cache = GITS_BASER_nCnB; + + for (i = 0; i < GITS_BASER_NR_REGS; i++) { + struct its_baser *baser = its->tables + i; + u64 val = its_read_baser(its, baser); + u64 type = GITS_BASER_TYPE(val); + bool indirect = false; + u32 order; + + if (type == GITS_BASER_TYPE_NONE) + continue; + + if (its_probe_baser_psz(its, baser)) { + its_free_tables(its); + return -ENXIO; + } + + order = get_order(baser->psz); + + switch (type) { + case GITS_BASER_TYPE_DEVICE: + indirect = its_parse_indirect_baser(its, baser, &order, + device_ids(its)); + break; + + case GITS_BASER_TYPE_VCPU: + if (is_v4_1(its)) { + struct its_node *sibling; + + WARN_ON(i != 2); + if ((sibling = find_sibling_its(its))) { + *baser = sibling->tables[2]; + its_write_baser(its, baser, baser->val); + continue; + } + } + + indirect = its_parse_indirect_baser(its, baser, &order, + ITS_MAX_VPEID_BITS); + break; + } + + err = its_setup_baser(its, baser, cache, shr, order, indirect); + if (err < 0) { + its_free_tables(its); + return err; + } + + /* Update settings which will be used for next BASERn */ + cache = baser->val & GITS_BASER_CACHEABILITY_MASK; + shr = baser->val & GITS_BASER_SHAREABILITY_MASK; + } + + return 0; +} + +static u64 inherit_vpe_l1_table_from_its(void) +{ + struct its_node *its; + u64 val; + u32 aff; + + val = gic_read_typer(gic_data_rdist_rd_base() + GICR_TYPER); + aff = compute_common_aff(val); + + list_for_each_entry(its, &its_nodes, entry) { + u64 baser, addr; + + if (!is_v4_1(its)) + continue; + + if (!FIELD_GET(GITS_TYPER_SVPET, its->typer)) + continue; + + if (aff != compute_its_aff(its)) + continue; + + /* GICv4.1 guarantees that the vPE table is GITS_BASER2 */ + baser = its->tables[2].val; + if (!(baser & GITS_BASER_VALID)) + continue; + + /* We have a winner! */ + gic_data_rdist()->vpe_l1_base = its->tables[2].base; + + val = GICR_VPROPBASER_4_1_VALID; + if (baser & GITS_BASER_INDIRECT) + val |= GICR_VPROPBASER_4_1_INDIRECT; + val |= FIELD_PREP(GICR_VPROPBASER_4_1_PAGE_SIZE, + FIELD_GET(GITS_BASER_PAGE_SIZE_MASK, baser)); + switch (FIELD_GET(GITS_BASER_PAGE_SIZE_MASK, baser)) { + case GIC_PAGE_SIZE_64K: + addr = GITS_BASER_ADDR_48_to_52(baser); + break; + default: + addr = baser & GENMASK_ULL(47, 12); + break; + } + val |= FIELD_PREP(GICR_VPROPBASER_4_1_ADDR, addr >> 12); + val |= FIELD_PREP(GICR_VPROPBASER_SHAREABILITY_MASK, + FIELD_GET(GITS_BASER_SHAREABILITY_MASK, baser)); + val |= FIELD_PREP(GICR_VPROPBASER_INNER_CACHEABILITY_MASK, + FIELD_GET(GITS_BASER_INNER_CACHEABILITY_MASK, baser)); + val |= FIELD_PREP(GICR_VPROPBASER_4_1_SIZE, GITS_BASER_NR_PAGES(baser) - 1); + + return val; + } + + return 0; +} + +static u64 inherit_vpe_l1_table_from_rd(cpumask_t **mask) +{ + u32 aff; + u64 val; + int cpu; + + val = gic_read_typer(gic_data_rdist_rd_base() + GICR_TYPER); + aff = compute_common_aff(val); + + for_each_possible_cpu(cpu) { + void __iomem *base = gic_data_rdist_cpu(cpu)->rd_base; + + if (!base || cpu == smp_processor_id()) + continue; + + val = gic_read_typer(base + GICR_TYPER); + if (aff != compute_common_aff(val)) + continue; + + /* + * At this point, we have a victim. This particular CPU + * has already booted, and has an affinity that matches + * ours wrt CommonLPIAff. Let's use its own VPROPBASER. + * Make sure we don't write the Z bit in that case. + */ + val = gicr_read_vpropbaser(base + SZ_128K + GICR_VPROPBASER); + val &= ~GICR_VPROPBASER_4_1_Z; + + gic_data_rdist()->vpe_l1_base = gic_data_rdist_cpu(cpu)->vpe_l1_base; + *mask = gic_data_rdist_cpu(cpu)->vpe_table_mask; + + return val; + } + + return 0; +} + +static bool allocate_vpe_l2_table(int cpu, u32 id) +{ + void __iomem *base = gic_data_rdist_cpu(cpu)->rd_base; + unsigned int psz, esz, idx, npg, gpsz; + u64 val; + struct page *page; + __le64 *table; + + if (!gic_rdists->has_rvpeid) + return true; + + /* Skip non-present CPUs */ + if (!base) + return true; + + val = gicr_read_vpropbaser(base + SZ_128K + GICR_VPROPBASER); + + esz = FIELD_GET(GICR_VPROPBASER_4_1_ENTRY_SIZE, val) + 1; + gpsz = FIELD_GET(GICR_VPROPBASER_4_1_PAGE_SIZE, val); + npg = FIELD_GET(GICR_VPROPBASER_4_1_SIZE, val) + 1; + + switch (gpsz) { + default: + WARN_ON(1); + fallthrough; + case GIC_PAGE_SIZE_4K: + psz = SZ_4K; + break; + case GIC_PAGE_SIZE_16K: + psz = SZ_16K; + break; + case GIC_PAGE_SIZE_64K: + psz = SZ_64K; + break; + } + + /* Don't allow vpe_id that exceeds single, flat table limit */ + if (!(val & GICR_VPROPBASER_4_1_INDIRECT)) + return (id < (npg * psz / (esz * SZ_8))); + + /* Compute 1st level table index & check if that exceeds table limit */ + idx = id >> ilog2(psz / (esz * SZ_8)); + if (idx >= (npg * psz / GITS_LVL1_ENTRY_SIZE)) + return false; + + table = gic_data_rdist_cpu(cpu)->vpe_l1_base; + + /* Allocate memory for 2nd level table */ + if (!table[idx]) { + page = alloc_pages(GFP_KERNEL | __GFP_ZERO, get_order(psz)); + if (!page) + return false; + + /* Flush Lvl2 table to PoC if hw doesn't support coherency */ + if (!(val & GICR_VPROPBASER_SHAREABILITY_MASK)) + gic_flush_dcache_to_poc(page_address(page), psz); + + table[idx] = cpu_to_le64(page_to_phys(page) | GITS_BASER_VALID); + + /* Flush Lvl1 entry to PoC if hw doesn't support coherency */ + if (!(val & GICR_VPROPBASER_SHAREABILITY_MASK)) + gic_flush_dcache_to_poc(table + idx, GITS_LVL1_ENTRY_SIZE); + + /* Ensure updated table contents are visible to RD hardware */ + dsb(sy); + } + + return true; +} + +static int allocate_vpe_l1_table(void) +{ + void __iomem *vlpi_base = gic_data_rdist_vlpi_base(); + u64 val, gpsz, npg, pa; + unsigned int psz = SZ_64K; + unsigned int np, epp, esz; + struct page *page; + + if (!gic_rdists->has_rvpeid) + return 0; + + /* + * if VPENDBASER.Valid is set, disable any previously programmed + * VPE by setting PendingLast while clearing Valid. This has the + * effect of making sure no doorbell will be generated and we can + * then safely clear VPROPBASER.Valid. + */ + if (gicr_read_vpendbaser(vlpi_base + GICR_VPENDBASER) & GICR_VPENDBASER_Valid) + gicr_write_vpendbaser(GICR_VPENDBASER_PendingLast, + vlpi_base + GICR_VPENDBASER); + + /* + * If we can inherit the configuration from another RD, let's do + * so. Otherwise, we have to go through the allocation process. We + * assume that all RDs have the exact same requirements, as + * nothing will work otherwise. + */ + val = inherit_vpe_l1_table_from_rd(&gic_data_rdist()->vpe_table_mask); + if (val & GICR_VPROPBASER_4_1_VALID) + goto out; + + gic_data_rdist()->vpe_table_mask = kzalloc(sizeof(cpumask_t), GFP_ATOMIC); + if (!gic_data_rdist()->vpe_table_mask) + return -ENOMEM; + + val = inherit_vpe_l1_table_from_its(); + if (val & GICR_VPROPBASER_4_1_VALID) + goto out; + + /* First probe the page size */ + val = FIELD_PREP(GICR_VPROPBASER_4_1_PAGE_SIZE, GIC_PAGE_SIZE_64K); + gicr_write_vpropbaser(val, vlpi_base + GICR_VPROPBASER); + val = gicr_read_vpropbaser(vlpi_base + GICR_VPROPBASER); + gpsz = FIELD_GET(GICR_VPROPBASER_4_1_PAGE_SIZE, val); + esz = FIELD_GET(GICR_VPROPBASER_4_1_ENTRY_SIZE, val); + + switch (gpsz) { + default: + gpsz = GIC_PAGE_SIZE_4K; + fallthrough; + case GIC_PAGE_SIZE_4K: + psz = SZ_4K; + break; + case GIC_PAGE_SIZE_16K: + psz = SZ_16K; + break; + case GIC_PAGE_SIZE_64K: + psz = SZ_64K; + break; + } + + /* + * Start populating the register from scratch, including RO fields + * (which we want to print in debug cases...) + */ + val = 0; + val |= FIELD_PREP(GICR_VPROPBASER_4_1_PAGE_SIZE, gpsz); + val |= FIELD_PREP(GICR_VPROPBASER_4_1_ENTRY_SIZE, esz); + + /* How many entries per GIC page? */ + esz++; + epp = psz / (esz * SZ_8); + + /* + * If we need more than just a single L1 page, flag the table + * as indirect and compute the number of required L1 pages. + */ + if (epp < ITS_MAX_VPEID) { + int nl2; + + val |= GICR_VPROPBASER_4_1_INDIRECT; + + /* Number of L2 pages required to cover the VPEID space */ + nl2 = DIV_ROUND_UP(ITS_MAX_VPEID, epp); + + /* Number of L1 pages to point to the L2 pages */ + npg = DIV_ROUND_UP(nl2 * SZ_8, psz); + } else { + npg = 1; + } + + val |= FIELD_PREP(GICR_VPROPBASER_4_1_SIZE, npg - 1); + + /* Right, that's the number of CPU pages we need for L1 */ + np = DIV_ROUND_UP(npg * psz, PAGE_SIZE); + + pr_debug("np = %d, npg = %lld, psz = %d, epp = %d, esz = %d\n", + np, npg, psz, epp, esz); + page = alloc_pages(GFP_ATOMIC | __GFP_ZERO, get_order(np * PAGE_SIZE)); + if (!page) + return -ENOMEM; + + gic_data_rdist()->vpe_l1_base = page_address(page); + pa = virt_to_phys(page_address(page)); + WARN_ON(!IS_ALIGNED(pa, psz)); + + val |= FIELD_PREP(GICR_VPROPBASER_4_1_ADDR, pa >> 12); + val |= GICR_VPROPBASER_RaWb; + val |= GICR_VPROPBASER_InnerShareable; + val |= GICR_VPROPBASER_4_1_Z; + val |= GICR_VPROPBASER_4_1_VALID; + +out: + gicr_write_vpropbaser(val, vlpi_base + GICR_VPROPBASER); + cpumask_set_cpu(smp_processor_id(), gic_data_rdist()->vpe_table_mask); + + pr_debug("CPU%d: VPROPBASER = %llx %*pbl\n", + smp_processor_id(), val, + cpumask_pr_args(gic_data_rdist()->vpe_table_mask)); + + return 0; +} + +static int its_alloc_collections(struct its_node *its) +{ + int i; + + its->collections = kcalloc(nr_cpu_ids, sizeof(*its->collections), + GFP_KERNEL); + if (!its->collections) + return -ENOMEM; + + for (i = 0; i < nr_cpu_ids; i++) + its->collections[i].target_address = ~0ULL; + + return 0; +} + +static struct page *its_allocate_pending_table(gfp_t gfp_flags) +{ + struct page *pend_page; + + pend_page = alloc_pages(gfp_flags | __GFP_ZERO, + get_order(LPI_PENDBASE_SZ)); + if (!pend_page) + return NULL; + + /* Make sure the GIC will observe the zero-ed page */ + gic_flush_dcache_to_poc(page_address(pend_page), LPI_PENDBASE_SZ); + + return pend_page; +} + +static void its_free_pending_table(struct page *pt) +{ + free_pages((unsigned long)page_address(pt), get_order(LPI_PENDBASE_SZ)); +} + +/* + * Booting with kdump and LPIs enabled is generally fine. Any other + * case is wrong in the absence of firmware/EFI support. + */ +static bool enabled_lpis_allowed(void) +{ + phys_addr_t addr; + u64 val; + + /* Check whether the property table is in a reserved region */ + val = gicr_read_propbaser(gic_data_rdist_rd_base() + GICR_PROPBASER); + addr = val & GENMASK_ULL(51, 12); + + return gic_check_reserved_range(addr, LPI_PROPBASE_SZ); +} + +static int __init allocate_lpi_tables(void) +{ + u64 val; + int err, cpu; + + /* + * If LPIs are enabled while we run this from the boot CPU, + * flag the RD tables as pre-allocated if the stars do align. + */ + val = readl_relaxed(gic_data_rdist_rd_base() + GICR_CTLR); + if ((val & GICR_CTLR_ENABLE_LPIS) && enabled_lpis_allowed()) { + gic_rdists->flags |= (RDIST_FLAGS_RD_TABLES_PREALLOCATED | + RDIST_FLAGS_PROPBASE_NEEDS_FLUSHING); + pr_info("GIC-2500: Using preallocated redistributor tables\n"); + } + + err = its_setup_lpi_prop_table(); + if (err) + return err; + + /* + * We allocate all the pending tables anyway, as we may have a + * mix of RDs that have had LPIs enabled, and some that + * don't. We'll free the unused ones as each CPU comes online. + */ + for_each_possible_cpu(cpu) { + struct page *pend_page; + + pend_page = its_allocate_pending_table(GFP_NOWAIT); + if (!pend_page) { + pr_err("Failed to allocate PENDBASE for CPU%d\n", cpu); + return -ENOMEM; + } + + gic_data_rdist_cpu(cpu)->pend_page = pend_page; + } + + return 0; +} + +static u64 its_clear_vpend_valid(void __iomem *vlpi_base, u64 clr, u64 set) +{ + u32 count = 1000000; /* 1s! */ + bool clean; + u64 val; + + val = gicr_read_vpendbaser(vlpi_base + GICR_VPENDBASER); + val &= ~GICR_VPENDBASER_Valid; + val &= ~clr; + val |= set; + gicr_write_vpendbaser(val, vlpi_base + GICR_VPENDBASER); + + do { + val = gicr_read_vpendbaser(vlpi_base + GICR_VPENDBASER); + clean = !(val & GICR_VPENDBASER_Dirty); + if (!clean) { + count--; + cpu_relax(); + udelay(1); + } + } while (!clean && count); + + if (unlikely(val & GICR_VPENDBASER_Dirty)) { + pr_err_ratelimited("ITS virtual pending table not cleaning\n"); + val |= GICR_VPENDBASER_PendingLast; + } + + return val; +} + +static void its_cpu_init_lpis(void) +{ + void __iomem *rbase = gic_data_rdist_rd_base(); + struct page *pend_page; + phys_addr_t paddr; + u64 val, tmp; + + if (gic_data_rdist()->lpi_enabled) + return; + + val = readl_relaxed(rbase + GICR_CTLR); + if ((gic_rdists->flags & RDIST_FLAGS_RD_TABLES_PREALLOCATED) && + (val & GICR_CTLR_ENABLE_LPIS)) { + /* + * Check that we get the same property table on all + * RDs. If we don't, this is hopeless. + */ + paddr = gicr_read_propbaser(rbase + GICR_PROPBASER); + paddr &= GENMASK_ULL(51, 12); + if (WARN_ON(gic_rdists->prop_table_pa != paddr)) + add_taint(TAINT_CRAP, LOCKDEP_STILL_OK); + + paddr = gicr_read_pendbaser(rbase + GICR_PENDBASER); + paddr &= GENMASK_ULL(51, 16); + + WARN_ON(!gic_check_reserved_range(paddr, LPI_PENDBASE_SZ)); + its_free_pending_table(gic_data_rdist()->pend_page); + gic_data_rdist()->pend_page = NULL; + + goto out; + } + + pend_page = gic_data_rdist()->pend_page; + paddr = page_to_phys(pend_page); + WARN_ON(gic_reserve_range(paddr, LPI_PENDBASE_SZ)); + + /* set PROPBASE */ + val = (gic_rdists->prop_table_pa | + GICR_PROPBASER_InnerShareable | + GICR_PROPBASER_RaWaWb | + ((LPI_NRBITS - 1) & GICR_PROPBASER_IDBITS_MASK)); + + gicr_write_propbaser(val, rbase + GICR_PROPBASER); + tmp = gicr_read_propbaser(rbase + GICR_PROPBASER); + + if ((tmp ^ val) & GICR_PROPBASER_SHAREABILITY_MASK) { + if (!(tmp & GICR_PROPBASER_SHAREABILITY_MASK)) { + /* + * The HW reports non-shareable, we must + * remove the cacheability attributes as + * well. + */ + val &= ~(GICR_PROPBASER_SHAREABILITY_MASK | + GICR_PROPBASER_CACHEABILITY_MASK); + val |= GICR_PROPBASER_nC; + gicr_write_propbaser(val, rbase + GICR_PROPBASER); + } + pr_info_once("GIC: using cache flushing for LPI property table\n"); + gic_rdists->flags |= RDIST_FLAGS_PROPBASE_NEEDS_FLUSHING; + } + + /* set PENDBASE */ + val = (page_to_phys(pend_page) | + GICR_PENDBASER_InnerShareable | + GICR_PENDBASER_RaWaWb); + + gicr_write_pendbaser(val, rbase + GICR_PENDBASER); + tmp = gicr_read_pendbaser(rbase + GICR_PENDBASER); + + if (!(tmp & GICR_PENDBASER_SHAREABILITY_MASK)) { + /* + * The HW reports non-shareable, we must remove the + * cacheability attributes as well. + */ + val &= ~(GICR_PENDBASER_SHAREABILITY_MASK | + GICR_PENDBASER_CACHEABILITY_MASK); + val |= GICR_PENDBASER_nC; + gicr_write_pendbaser(val, rbase + GICR_PENDBASER); + } + + /* Enable LPIs */ + val = readl_relaxed(rbase + GICR_CTLR); + val |= GICR_CTLR_ENABLE_LPIS; + writel_relaxed(val, rbase + GICR_CTLR); + + if (gic_rdists->has_vlpis && !gic_rdists->has_rvpeid) { + void __iomem *vlpi_base = gic_data_rdist_vlpi_base(); + + /* + * It's possible for CPU to receive VLPIs before it is + * sheduled as a vPE, especially for the first CPU, and the + * VLPI with INTID larger than 2^(IDbits+1) will be considered + * as out of range and dropped by GIC. + * So we initialize IDbits to known value to avoid VLPI drop. + */ + val = (LPI_NRBITS - 1) & GICR_VPROPBASER_IDBITS_MASK; + pr_debug("GICv4: CPU%d: Init IDbits to 0x%llx for GICR_VPROPBASER\n", + smp_processor_id(), val); + gicr_write_vpropbaser(val, vlpi_base + GICR_VPROPBASER); + + /* + * Also clear Valid bit of GICR_VPENDBASER, in case some + * ancient programming gets left in and has possibility of + * corrupting memory. + */ + val = its_clear_vpend_valid(vlpi_base, 0, 0); + } + + if (allocate_vpe_l1_table()) { + /* + * If the allocation has failed, we're in massive trouble. + * Disable direct injection, and pray that no VM was + * already running... + */ + gic_rdists->has_rvpeid = false; + gic_rdists->has_vlpis = false; + } + + /* Make sure the GIC has seen the above */ + dsb(sy); +out: + gic_data_rdist()->lpi_enabled = true; + pr_info("GIC-2500: CPU%d: using %s LPI pending table @%pa\n", + smp_processor_id(), + gic_data_rdist()->pend_page ? "allocated" : "reserved", + &paddr); +} + +static void its_cpu_init_collection(struct its_node *its) +{ + int cpu = smp_processor_id(); + u64 target; + unsigned long mpid; + phys_addr_t its_phys_base; + unsigned long skt_id; + + /* avoid cross node collections and its mapping */ + if (its->flags & ITS_FLAGS_WORKAROUND_CAVIUM_23144) { + struct device_node *cpu_node; + + cpu_node = of_get_cpu_node(cpu, NULL); + if (its->numa_node != NUMA_NO_NODE && + its->numa_node != of_node_to_nid(cpu_node)) + return; + } + + mpid = cpu_logical_map(cpu); + its_phys_base = its->phys_base; + skt_id = (its_phys_base >> 41) & 0x7; + + /* + * We now have to bind each collection to its target + * redistributor. + */ + if (gic_read_typer(its->base + GITS_TYPER) & GITS_TYPER_PTA) { + /* + * This ITS wants the physical address of the + * redistributor. + */ + target = gic_data_rdist()->phys_base; + } else { + /* This ITS wants a linear CPU number. */ + target = gic_read_typer(gic_data_rdist_rd_base() + GICR_TYPER); + target = GICR_TYPER_CPU_NUMBER(target) << 16; + } + + /* Perform collection mapping */ + its->collections[cpu].target_address = target; + its->collections[cpu].col_id = cpu % 64; + + its_send_mapc(its, &its->collections[cpu], 1); + its_send_invall(its, &its->collections[cpu]); +} + +static void its_cpu_init_collections(void) +{ + struct its_node *its; + + raw_spin_lock(&its_lock); + + list_for_each_entry(its, &its_nodes, entry) + its_cpu_init_collection(its); + + raw_spin_unlock(&its_lock); +} + +static struct its_device *its_find_device(struct its_node *its, u32 dev_id) +{ + struct its_device *its_dev = NULL, *tmp; + unsigned long flags; + + raw_spin_lock_irqsave(&its->lock, flags); + + list_for_each_entry(tmp, &its->its_device_list, entry) { + if (tmp->device_id == dev_id) { + its_dev = tmp; + break; + } + } + + raw_spin_unlock_irqrestore(&its->lock, flags); + + return its_dev; +} + +static struct its_baser *its_get_baser(struct its_node *its, u32 type) +{ + int i; + + for (i = 0; i < GITS_BASER_NR_REGS; i++) { + if (GITS_BASER_TYPE(its->tables[i].val) == type) + return &its->tables[i]; + } + + return NULL; +} + +static bool its_alloc_table_entry(struct its_node *its, + struct its_baser *baser, u32 id) +{ + struct page *page; + u32 esz, idx; + __le64 *table; + + /* Don't allow device id that exceeds single, flat table limit */ + esz = GITS_BASER_ENTRY_SIZE(baser->val); + if (!(baser->val & GITS_BASER_INDIRECT)) + return (id < (PAGE_ORDER_TO_SIZE(baser->order) / esz)); + + /* Compute 1st level table index & check if that exceeds table limit */ + idx = id >> ilog2(baser->psz / esz); + if (idx >= (PAGE_ORDER_TO_SIZE(baser->order) / GITS_LVL1_ENTRY_SIZE)) + return false; + + table = baser->base; + + /* Allocate memory for 2nd level table */ + if (!table[idx]) { + page = alloc_pages_node(its->numa_node, GFP_KERNEL | __GFP_ZERO, + get_order(baser->psz)); + if (!page) + return false; + + /* Flush Lvl2 table to PoC if hw doesn't support coherency */ + if (!(baser->val & GITS_BASER_SHAREABILITY_MASK)) + gic_flush_dcache_to_poc(page_address(page), baser->psz); + + table[idx] = cpu_to_le64(page_to_phys(page) | GITS_BASER_VALID); + + /* Flush Lvl1 entry to PoC if hw doesn't support coherency */ + if (!(baser->val & GITS_BASER_SHAREABILITY_MASK)) + gic_flush_dcache_to_poc(table + idx, GITS_LVL1_ENTRY_SIZE); + + /* Ensure updated table contents are visible to ITS hardware */ + dsb(sy); + } + + return true; +} + +static bool its_alloc_device_table(struct its_node *its, u32 dev_id) +{ + struct its_baser *baser; + + baser = its_get_baser(its, GITS_BASER_TYPE_DEVICE); + + /* Don't allow device id that exceeds ITS hardware limit */ + if (!baser) + return (ilog2(dev_id) < device_ids(its)); + + return its_alloc_table_entry(its, baser, dev_id); +} + +static bool its_alloc_vpe_table(u32 vpe_id) +{ + struct its_node *its; + int cpu; + + /* + * Make sure the L2 tables are allocated on *all* v4 ITSs. We + * could try and only do it on ITSs corresponding to devices + * that have interrupts targeted at this VPE, but the + * complexity becomes crazy (and you have tons of memory + * anyway, right?). + */ + list_for_each_entry(its, &its_nodes, entry) { + struct its_baser *baser; + + if (!is_v4(its)) + continue; + + baser = its_get_baser(its, GITS_BASER_TYPE_VCPU); + if (!baser) + return false; + + if (!its_alloc_table_entry(its, baser, vpe_id)) + return false; + } + + /* Non v4.1? No need to iterate RDs and go back early. */ + if (!gic_rdists->has_rvpeid) + return true; + + /* + * Make sure the L2 tables are allocated for all copies of + * the L1 table on *all* v4.1 RDs. + */ + for_each_possible_cpu(cpu) { + if (!allocate_vpe_l2_table(cpu, vpe_id)) + return false; + } + + return true; +} + +static struct its_device *its_create_device(struct its_node *its, u32 dev_id, + int nvecs, bool alloc_lpis) +{ + struct its_device *dev; + unsigned long *lpi_map = NULL; + unsigned long flags; + u16 *col_map = NULL; + void *itt; + int lpi_base; + int nr_lpis; + int nr_ites; + int sz; + + if (!its_alloc_device_table(its, dev_id)) + return NULL; + + if (WARN_ON(!is_power_of_2(nvecs))) + nvecs = roundup_pow_of_two(nvecs); + + dev = kzalloc(sizeof(*dev), GFP_KERNEL); + /* + * Even if the device wants a single LPI, the ITT must be + * sized as a power of two (and you need at least one bit...). + */ + nr_ites = max(2, nvecs); + sz = nr_ites * (FIELD_GET(GITS_TYPER_ITT_ENTRY_SIZE, its->typer) + 1); + sz = max(sz, ITS_ITT_ALIGN) + ITS_ITT_ALIGN - 1; + itt = kzalloc_node(sz, GFP_KERNEL, its->numa_node); + if (alloc_lpis) { + lpi_map = its_lpi_alloc(nvecs, &lpi_base, &nr_lpis); + if (lpi_map) + col_map = kcalloc(nr_lpis, sizeof(*col_map), + GFP_KERNEL); + } else { + col_map = kcalloc(nr_ites, sizeof(*col_map), GFP_KERNEL); + nr_lpis = 0; + lpi_base = 0; + } + + if (!dev || !itt || !col_map || (!lpi_map && alloc_lpis)) { + kfree(dev); + kfree(itt); + kfree(lpi_map); + kfree(col_map); + return NULL; + } + + gic_flush_dcache_to_poc(itt, sz); + + dev->its = its; + dev->itt = itt; + dev->nr_ites = nr_ites; + dev->event_map.lpi_map = lpi_map; + dev->event_map.col_map = col_map; + dev->event_map.lpi_base = lpi_base; + dev->event_map.nr_lpis = nr_lpis; + raw_spin_lock_init(&dev->event_map.vlpi_lock); + dev->device_id = dev_id; + INIT_LIST_HEAD(&dev->entry); + + raw_spin_lock_irqsave(&its->lock, flags); + list_add(&dev->entry, &its->its_device_list); + raw_spin_unlock_irqrestore(&its->lock, flags); + + /* Map device to its ITT */ + its_send_mapd(dev, 1); + + return dev; +} + +static void its_free_device(struct its_device *its_dev) +{ + unsigned long flags; + + raw_spin_lock_irqsave(&its_dev->its->lock, flags); + list_del(&its_dev->entry); + raw_spin_unlock_irqrestore(&its_dev->its->lock, flags); + kfree(its_dev->event_map.col_map); + kfree(its_dev->itt); + kfree(its_dev); +} + +static int its_alloc_device_irq(struct its_device *dev, int nvecs, irq_hw_number_t *hwirq) +{ + int idx; + + /* Find a free LPI region in lpi_map and allocate them. */ + idx = bitmap_find_free_region(dev->event_map.lpi_map, + dev->event_map.nr_lpis, + get_count_order(nvecs)); + if (idx < 0) + return -ENOSPC; + + *hwirq = dev->event_map.lpi_base + idx; + + return 0; +} + +static int its_msi_prepare(struct irq_domain *domain, struct device *dev, + int nvec, msi_alloc_info_t *info) +{ + struct its_node *its; + struct its_device *its_dev; + struct msi_domain_info *msi_info; + u32 dev_id; + int err = 0; + + /* + * We ignore "dev" entirely, and rely on the dev_id that has + * been passed via the scratchpad. This limits this domain's + * usefulness to upper layers that definitely know that they + * are built on top of the ITS. + */ + dev_id = info->scratchpad[0].ul; + + msi_info = msi_get_domain_info(domain); + its = msi_info->data; + + if (!gic_rdists->has_direct_lpi && + vpe_proxy.dev && + vpe_proxy.dev->its == its && + dev_id == vpe_proxy.dev->device_id) { + /* Bad luck. Get yourself a better implementation */ + WARN_ONCE(1, "DevId %x clashes with GICv4 VPE proxy device\n", + dev_id); + return -EINVAL; + } + + mutex_lock(&its->dev_alloc_lock); + its_dev = its_find_device(its, dev_id); + if (its_dev) { + /* + * We already have seen this ID, probably through + * another alias (PCI bridge of some sort). No need to + * create the device. + */ + its_dev->shared = true; + pr_debug("Reusing ITT for devID %x\n", dev_id); + goto out; + } + + its_dev = its_create_device(its, dev_id, nvec, true); + if (!its_dev) { + err = -ENOMEM; + goto out; + } + + pr_debug("ITT %d entries, %d bits\n", nvec, ilog2(nvec)); +out: + mutex_unlock(&its->dev_alloc_lock); + info->scratchpad[0].ptr = its_dev; + return err; +} + +static struct msi_domain_ops its_msi_domain_ops = { + .msi_prepare = its_msi_prepare, +}; + +static int its_irq_gic_domain_alloc(struct irq_domain *domain, + unsigned int virq, + irq_hw_number_t hwirq) +{ + struct irq_fwspec fwspec; + + if (irq_domain_get_of_node(domain->parent)) { + fwspec.fwnode = domain->parent->fwnode; + fwspec.param_count = 3; + fwspec.param[0] = GIC_IRQ_TYPE_LPI; + fwspec.param[1] = hwirq; + fwspec.param[2] = IRQ_TYPE_EDGE_RISING; + } else if (is_fwnode_irqchip(domain->parent->fwnode)) { + fwspec.fwnode = domain->parent->fwnode; + fwspec.param_count = 2; + fwspec.param[0] = hwirq; + fwspec.param[1] = IRQ_TYPE_EDGE_RISING; + } else { + return -EINVAL; + } + + return irq_domain_alloc_irqs_parent(domain, virq, 1, &fwspec); +} + +static int its_irq_domain_alloc(struct irq_domain *domain, unsigned int virq, + unsigned int nr_irqs, void *args) +{ + msi_alloc_info_t *info = args; + struct its_device *its_dev = info->scratchpad[0].ptr; + struct its_node *its = its_dev->its; + struct irq_data *irqd; + irq_hw_number_t hwirq; + int err; + int i; + + err = its_alloc_device_irq(its_dev, nr_irqs, &hwirq); + if (err) + return err; + + err = iommu_dma_prepare_msi(info->desc, its->get_msi_base(its_dev)); + if (err) + return err; + + for (i = 0; i < nr_irqs; i++) { + err = its_irq_gic_domain_alloc(domain, virq + i, hwirq + i); + if (err) + return err; + + irq_domain_set_hwirq_and_chip(domain, virq + i, + hwirq + i, &its_irq_chip, its_dev); + irqd = irq_get_irq_data(virq + i); + irqd_set_single_target(irqd); + irqd_set_affinity_on_activate(irqd); + pr_debug("ID:%d pID:%d vID:%d\n", + (int)(hwirq + i - its_dev->event_map.lpi_base), + (int)(hwirq + i), virq + i); + } + + return 0; +} + +static int its_cpumask_first(struct its_device *its_dev, + const struct cpumask *cpu_mask) +{ + unsigned int skt, skt_id, i; + phys_addr_t its_phys_base; + unsigned int cpu, cpus = 0; + + unsigned int skt_cpu_cnt[MAX_MARS3_SKT_COUNT] = {0}; + + for (i = 0; i < nr_cpu_ids; i++) { + skt = (cpu_logical_map(i) >> 16) & 0xff; + if ((skt >= 0) && (skt < MAX_MARS3_SKT_COUNT)) { + skt_cpu_cnt[skt]++; + } else if (0xff != skt) { + pr_err("socket address: %d is out of range.", skt); + } + } + + its_phys_base = its_dev->its->phys_base; + skt_id = (its_phys_base >> 41) & 0x7; + + if (0 != skt_id) { + for (i = 0; i < skt_id; i++) { + cpus += skt_cpu_cnt[i]; + } + } + + cpu = cpumask_first(cpu_mask); + if ((cpu > cpus) && (cpu < (cpus + skt_cpu_cnt[skt_id]))) { + cpus = cpu; + } + + return cpus; +} + +static int its_irq_domain_activate(struct irq_domain *domain, + struct irq_data *d, bool reserve) +{ + struct its_device *its_dev = irq_data_get_irq_chip_data(d); + u32 event = its_get_event_id(d); + const struct cpumask *cpu_mask = cpu_online_mask; + int cpu; + + cpu = its_cpumask_first(its_dev, cpu_mask); + + if (cpu < 0 || cpu >= nr_cpu_ids) + return -EINVAL; + + its_inc_lpi_count(d, cpu); + its_dev->event_map.col_map[event] = cpu; + irq_data_update_effective_affinity(d, cpumask_of(cpu)); + + /* Map the GIC IRQ and event to the device */ + its_send_mapti(its_dev, d->hwirq, event); + return 0; +} + +static void its_irq_domain_deactivate(struct irq_domain *domain, + struct irq_data *d) +{ + struct its_device *its_dev = irq_data_get_irq_chip_data(d); + u32 event = its_get_event_id(d); + + its_dec_lpi_count(d, its_dev->event_map.col_map[event]); + /* Stop the delivery of interrupts */ + its_send_discard(its_dev, event); +} + +static void its_irq_domain_free(struct irq_domain *domain, unsigned int virq, + unsigned int nr_irqs) +{ + struct irq_data *d = irq_domain_get_irq_data(domain, virq); + struct its_device *its_dev = irq_data_get_irq_chip_data(d); + struct its_node *its = its_dev->its; + int i; + + bitmap_release_region(its_dev->event_map.lpi_map, + its_get_event_id(irq_domain_get_irq_data(domain, virq)), + get_count_order(nr_irqs)); + + for (i = 0; i < nr_irqs; i++) { + struct irq_data *data = irq_domain_get_irq_data(domain, + virq + i); + /* Nuke the entry in the domain */ + irq_domain_reset_irq_data(data); + } + + mutex_lock(&its->dev_alloc_lock); + + /* + * If all interrupts have been freed, start mopping the + * floor. This is conditionned on the device not being shared. + */ + if (!its_dev->shared && + bitmap_empty(its_dev->event_map.lpi_map, + its_dev->event_map.nr_lpis)) { + its_lpi_free(its_dev->event_map.lpi_map, + its_dev->event_map.lpi_base, + its_dev->event_map.nr_lpis); + + /* Unmap device/itt */ + its_send_mapd(its_dev, 0); + its_free_device(its_dev); + } + + mutex_unlock(&its->dev_alloc_lock); + + irq_domain_free_irqs_parent(domain, virq, nr_irqs); +} + +static const struct irq_domain_ops its_domain_ops = { + .alloc = its_irq_domain_alloc, + .free = its_irq_domain_free, + .activate = its_irq_domain_activate, + .deactivate = its_irq_domain_deactivate, +}; + +/* + * This is insane. + * + * If a GICv4.0 doesn't implement Direct LPIs (which is extremely + * likely), the only way to perform an invalidate is to use a fake + * device to issue an INV command, implying that the LPI has first + * been mapped to some event on that device. Since this is not exactly + * cheap, we try to keep that mapping around as long as possible, and + * only issue an UNMAP if we're short on available slots. + * + * Broken by design(tm). + * + * GICv4.1, on the other hand, mandates that we're able to invalidate + * by writing to a MMIO register. It doesn't implement the whole of + * DirectLPI, but that's good enough. And most of the time, we don't + * even have to invalidate anything, as the redistributor can be told + * whether to generate a doorbell or not (we thus leave it enabled, + * always). + */ +static void its_vpe_db_proxy_unmap_locked(struct its_vpe *vpe) +{ + /* GICv4.1 doesn't use a proxy, so nothing to do here */ + if (gic_rdists->has_rvpeid) + return; + + /* Already unmapped? */ + if (vpe->vpe_proxy_event == -1) + return; + + its_send_discard(vpe_proxy.dev, vpe->vpe_proxy_event); + vpe_proxy.vpes[vpe->vpe_proxy_event] = NULL; + + /* + * We don't track empty slots at all, so let's move the + * next_victim pointer if we can quickly reuse that slot + * instead of nuking an existing entry. Not clear that this is + * always a win though, and this might just generate a ripple + * effect... Let's just hope VPEs don't migrate too often. + */ + if (vpe_proxy.vpes[vpe_proxy.next_victim]) + vpe_proxy.next_victim = vpe->vpe_proxy_event; + + vpe->vpe_proxy_event = -1; +} + +static void its_vpe_db_proxy_unmap(struct its_vpe *vpe) +{ + /* GICv4.1 doesn't use a proxy, so nothing to do here */ + if (gic_rdists->has_rvpeid) + return; + + if (!gic_rdists->has_direct_lpi) { + unsigned long flags; + + raw_spin_lock_irqsave(&vpe_proxy.lock, flags); + its_vpe_db_proxy_unmap_locked(vpe); + raw_spin_unlock_irqrestore(&vpe_proxy.lock, flags); + } +} + +static void its_vpe_db_proxy_map_locked(struct its_vpe *vpe) +{ + /* GICv4.1 doesn't use a proxy, so nothing to do here */ + if (gic_rdists->has_rvpeid) + return; + + /* Already mapped? */ + if (vpe->vpe_proxy_event != -1) + return; + + /* This slot was already allocated. Kick the other VPE out. */ + if (vpe_proxy.vpes[vpe_proxy.next_victim]) + its_vpe_db_proxy_unmap_locked(vpe_proxy.vpes[vpe_proxy.next_victim]); + + /* Map the new VPE instead */ + vpe_proxy.vpes[vpe_proxy.next_victim] = vpe; + vpe->vpe_proxy_event = vpe_proxy.next_victim; + vpe_proxy.next_victim = (vpe_proxy.next_victim + 1) % vpe_proxy.dev->nr_ites; + + vpe_proxy.dev->event_map.col_map[vpe->vpe_proxy_event] = vpe->col_idx; + its_send_mapti(vpe_proxy.dev, vpe->vpe_db_lpi, vpe->vpe_proxy_event); +} + +static void its_vpe_db_proxy_move(struct its_vpe *vpe, int from, int to) +{ + unsigned long flags; + struct its_collection *target_col; + + /* GICv4.1 doesn't use a proxy, so nothing to do here */ + if (gic_rdists->has_rvpeid) + return; + + if (gic_rdists->has_direct_lpi) { + void __iomem *rdbase; + + rdbase = per_cpu_ptr(gic_rdists->rdist, from)->rd_base; + gic_write_lpir(vpe->vpe_db_lpi, rdbase + GICR_CLRLPIR); + wait_for_syncr(rdbase); + + return; + } + + raw_spin_lock_irqsave(&vpe_proxy.lock, flags); + + its_vpe_db_proxy_map_locked(vpe); + + target_col = &vpe_proxy.dev->its->collections[to]; + its_send_movi(vpe_proxy.dev, target_col, vpe->vpe_proxy_event); + vpe_proxy.dev->event_map.col_map[vpe->vpe_proxy_event] = to; + + raw_spin_unlock_irqrestore(&vpe_proxy.lock, flags); +} + +static int its_vpe_set_affinity(struct irq_data *d, + const struct cpumask *mask_val, + bool force) +{ + struct its_vpe *vpe = irq_data_get_irq_chip_data(d); + int from, cpu = cpumask_first(mask_val); + unsigned long flags; + + /* + * Changing affinity is mega expensive, so let's be as lazy as + * we can and only do it if we really have to. Also, if mapped + * into the proxy device, we need to move the doorbell + * interrupt to its new location. + * + * Another thing is that changing the affinity of a vPE affects + * *other interrupts* such as all the vLPIs that are routed to + * this vPE. This means that the irq_desc lock is not enough to + * protect us, and that we must ensure nobody samples vpe->col_idx + * during the update, hence the lock below which must also be + * taken on any vLPI handling path that evaluates vpe->col_idx. + */ + from = vpe_to_cpuid_lock(vpe, &flags); + if (from == cpu) + goto out; + + vpe->col_idx = cpu; + + /* + * GICv4.1 allows us to skip VMOVP if moving to a cpu whose RD + * is sharing its VPE table with the current one. + */ + if (gic_data_rdist_cpu(cpu)->vpe_table_mask && + cpumask_test_cpu(from, gic_data_rdist_cpu(cpu)->vpe_table_mask)) + goto out; + + its_send_vmovp(vpe); + its_vpe_db_proxy_move(vpe, from, cpu); + +out: + irq_data_update_effective_affinity(d, cpumask_of(cpu)); + vpe_to_cpuid_unlock(vpe, flags); + + return IRQ_SET_MASK_OK_DONE; +} + +static void its_wait_vpt_parse_complete(void) +{ + void __iomem *vlpi_base = gic_data_rdist_vlpi_base(); + u64 val; + + if (!gic_rdists->has_vpend_valid_dirty) + return; + + WARN_ON_ONCE(readq_relaxed_poll_timeout_atomic(vlpi_base + GICR_VPENDBASER, + val, + !(val & GICR_VPENDBASER_Dirty), + 10, 500)); +} + +static void its_vpe_schedule(struct its_vpe *vpe) +{ + void __iomem *vlpi_base = gic_data_rdist_vlpi_base(); + u64 val; + + /* Schedule the VPE */ + val = virt_to_phys(page_address(vpe->its_vm->vprop_page)) & + GENMASK_ULL(51, 12); + val |= (LPI_NRBITS - 1) & GICR_VPROPBASER_IDBITS_MASK; + val |= GICR_VPROPBASER_RaWb; + val |= GICR_VPROPBASER_InnerShareable; + gicr_write_vpropbaser(val, vlpi_base + GICR_VPROPBASER); + + val = virt_to_phys(page_address(vpe->vpt_page)) & + GENMASK_ULL(51, 16); + val |= GICR_VPENDBASER_RaWaWb; + val |= GICR_VPENDBASER_InnerShareable; + /* + * There is no good way of finding out if the pending table is + * empty as we can race against the doorbell interrupt very + * easily. So in the end, vpe->pending_last is only an + * indication that the vcpu has something pending, not one + * that the pending table is empty. A good implementation + * would be able to read its coarse map pretty quickly anyway, + * making this a tolerable issue. + */ + val |= GICR_VPENDBASER_PendingLast; + val |= vpe->idai ? GICR_VPENDBASER_IDAI : 0; + val |= GICR_VPENDBASER_Valid; + gicr_write_vpendbaser(val, vlpi_base + GICR_VPENDBASER); + + its_wait_vpt_parse_complete(); +} + +static void its_vpe_deschedule(struct its_vpe *vpe) +{ + void __iomem *vlpi_base = gic_data_rdist_vlpi_base(); + u64 val; + + val = its_clear_vpend_valid(vlpi_base, 0, 0); + + vpe->idai = !!(val & GICR_VPENDBASER_IDAI); + vpe->pending_last = !!(val & GICR_VPENDBASER_PendingLast); +} + +static void its_vpe_invall(struct its_vpe *vpe) +{ + struct its_node *its; + + list_for_each_entry(its, &its_nodes, entry) { + if (!is_v4(its)) + continue; + + if (its_list_map && !vpe->its_vm->vlpi_count[its->list_nr]) + continue; + + /* + * Sending a VINVALL to a single ITS is enough, as all + * we need is to reach the redistributors. + */ + its_send_vinvall(its, vpe); + return; + } +} + +static int its_vpe_set_vcpu_affinity(struct irq_data *d, void *vcpu_info) +{ + struct its_vpe *vpe = irq_data_get_irq_chip_data(d); + struct its_cmd_info *info = vcpu_info; + + switch (info->cmd_type) { + case SCHEDULE_VPE: + its_vpe_schedule(vpe); + return 0; + + case DESCHEDULE_VPE: + its_vpe_deschedule(vpe); + return 0; + + case INVALL_VPE: + its_vpe_invall(vpe); + return 0; + + default: + return -EINVAL; + } +} + +static void its_vpe_send_cmd(struct its_vpe *vpe, + void (*cmd)(struct its_device *, u32)) +{ + unsigned long flags; + + raw_spin_lock_irqsave(&vpe_proxy.lock, flags); + + its_vpe_db_proxy_map_locked(vpe); + cmd(vpe_proxy.dev, vpe->vpe_proxy_event); + + raw_spin_unlock_irqrestore(&vpe_proxy.lock, flags); +} + +static void its_vpe_send_inv(struct irq_data *d) +{ + struct its_vpe *vpe = irq_data_get_irq_chip_data(d); + + if (gic_rdists->has_direct_lpi) { + void __iomem *rdbase; + + /* Target the redistributor this VPE is currently known on */ + raw_spin_lock(&gic_data_rdist_cpu(vpe->col_idx)->rd_lock); + rdbase = per_cpu_ptr(gic_rdists->rdist, vpe->col_idx)->rd_base; + gic_write_lpir(d->parent_data->hwirq, rdbase + GICR_INVLPIR); + wait_for_syncr(rdbase); + raw_spin_unlock(&gic_data_rdist_cpu(vpe->col_idx)->rd_lock); + } else { + its_vpe_send_cmd(vpe, its_send_inv); + } +} + +static void its_vpe_mask_irq(struct irq_data *d) +{ + /* + * We need to unmask the LPI, which is described by the parent + * irq_data. Instead of calling into the parent (which won't + * exactly do the right thing, let's simply use the + * parent_data pointer. Yes, I'm naughty. + */ + lpi_write_config(d->parent_data, LPI_PROP_ENABLED, 0); + its_vpe_send_inv(d); +} + +static void its_vpe_unmask_irq(struct irq_data *d) +{ + /* Same hack as above... */ + lpi_write_config(d->parent_data, 0, LPI_PROP_ENABLED); + its_vpe_send_inv(d); +} + +static int its_vpe_set_irqchip_state(struct irq_data *d, + enum irqchip_irq_state which, + bool state) +{ + struct its_vpe *vpe = irq_data_get_irq_chip_data(d); + + if (which != IRQCHIP_STATE_PENDING) + return -EINVAL; + + if (gic_rdists->has_direct_lpi) { + void __iomem *rdbase; + + rdbase = per_cpu_ptr(gic_rdists->rdist, vpe->col_idx)->rd_base; + if (state) { + gic_write_lpir(vpe->vpe_db_lpi, rdbase + GICR_SETLPIR); + } else { + gic_write_lpir(vpe->vpe_db_lpi, rdbase + GICR_CLRLPIR); + wait_for_syncr(rdbase); + } + } else { + if (state) + its_vpe_send_cmd(vpe, its_send_int); + else + its_vpe_send_cmd(vpe, its_send_clear); + } + + return 0; +} + +static int its_vpe_retrigger(struct irq_data *d) +{ + return !its_vpe_set_irqchip_state(d, IRQCHIP_STATE_PENDING, true); +} + +static struct irq_chip its_vpe_irq_chip = { + .name = "GICv4-vpe", + .irq_mask = its_vpe_mask_irq, + .irq_unmask = its_vpe_unmask_irq, + .irq_eoi = irq_chip_eoi_parent, + .irq_set_affinity = its_vpe_set_affinity, + .irq_retrigger = its_vpe_retrigger, + .irq_set_irqchip_state = its_vpe_set_irqchip_state, + .irq_set_vcpu_affinity = its_vpe_set_vcpu_affinity, +}; + +static struct its_node *find_4_1_its(void) +{ + static struct its_node *its = NULL; + + if (!its) { + list_for_each_entry(its, &its_nodes, entry) { + if (is_v4_1(its)) + return its; + } + + /* Oops? */ + its = NULL; + } + + return its; +} + +static void its_vpe_4_1_send_inv(struct irq_data *d) +{ + struct its_vpe *vpe = irq_data_get_irq_chip_data(d); + struct its_node *its; + + /* + * GICv4.1 wants doorbells to be invalidated using the + * INVDB command in order to be broadcast to all RDs. Send + * it to the first valid ITS, and let the HW do its magic. + */ + its = find_4_1_its(); + if (its) + its_send_invdb(its, vpe); +} + +static void its_vpe_4_1_mask_irq(struct irq_data *d) +{ + lpi_write_config(d->parent_data, LPI_PROP_ENABLED, 0); + its_vpe_4_1_send_inv(d); +} + +static void its_vpe_4_1_unmask_irq(struct irq_data *d) +{ + lpi_write_config(d->parent_data, 0, LPI_PROP_ENABLED); + its_vpe_4_1_send_inv(d); +} + +static void its_vpe_4_1_schedule(struct its_vpe *vpe, + struct its_cmd_info *info) +{ + void __iomem *vlpi_base = gic_data_rdist_vlpi_base(); + u64 val = 0; + + /* Schedule the VPE */ + val |= GICR_VPENDBASER_Valid; + val |= info->g0en ? GICR_VPENDBASER_4_1_VGRP0EN : 0; + val |= info->g1en ? GICR_VPENDBASER_4_1_VGRP1EN : 0; + val |= FIELD_PREP(GICR_VPENDBASER_4_1_VPEID, vpe->vpe_id); + + gicr_write_vpendbaser(val, vlpi_base + GICR_VPENDBASER); + + its_wait_vpt_parse_complete(); +} + +static void its_vpe_4_1_deschedule(struct its_vpe *vpe, + struct its_cmd_info *info) +{ + void __iomem *vlpi_base = gic_data_rdist_vlpi_base(); + u64 val; + + if (info->req_db) { + unsigned long flags; + + /* + * vPE is going to block: make the vPE non-resident with + * PendingLast clear and DB set. The GIC guarantees that if + * we read-back PendingLast clear, then a doorbell will be + * delivered when an interrupt comes. + * + * Note the locking to deal with the concurrent update of + * pending_last from the doorbell interrupt handler that can + * run concurrently. + */ + raw_spin_lock_irqsave(&vpe->vpe_lock, flags); + val = its_clear_vpend_valid(vlpi_base, + GICR_VPENDBASER_PendingLast, + GICR_VPENDBASER_4_1_DB); + vpe->pending_last = !!(val & GICR_VPENDBASER_PendingLast); + raw_spin_unlock_irqrestore(&vpe->vpe_lock, flags); + } else { + /* + * We're not blocking, so just make the vPE non-resident + * with PendingLast set, indicating that we'll be back. + */ + val = its_clear_vpend_valid(vlpi_base, + 0, + GICR_VPENDBASER_PendingLast); + vpe->pending_last = true; + } +} + +static void its_vpe_4_1_invall(struct its_vpe *vpe) +{ + void __iomem *rdbase; + unsigned long flags; + u64 val; + int cpu; + + val = GICR_INVALLR_V; + val |= FIELD_PREP(GICR_INVALLR_VPEID, vpe->vpe_id); + + /* Target the redistributor this vPE is currently known on */ + cpu = vpe_to_cpuid_lock(vpe, &flags); + raw_spin_lock(&gic_data_rdist_cpu(cpu)->rd_lock); + rdbase = per_cpu_ptr(gic_rdists->rdist, cpu)->rd_base; + gic_write_lpir(val, rdbase + GICR_INVALLR); + + wait_for_syncr(rdbase); + raw_spin_unlock(&gic_data_rdist_cpu(cpu)->rd_lock); + vpe_to_cpuid_unlock(vpe, flags); +} + +static int its_vpe_4_1_set_vcpu_affinity(struct irq_data *d, void *vcpu_info) +{ + struct its_vpe *vpe = irq_data_get_irq_chip_data(d); + struct its_cmd_info *info = vcpu_info; + + switch (info->cmd_type) { + case SCHEDULE_VPE: + its_vpe_4_1_schedule(vpe, info); + return 0; + + case DESCHEDULE_VPE: + its_vpe_4_1_deschedule(vpe, info); + return 0; + + case INVALL_VPE: + its_vpe_4_1_invall(vpe); + return 0; + + default: + return -EINVAL; + } +} + +static struct irq_chip its_vpe_4_1_irq_chip = { + .name = "GICv4.1-vpe", + .irq_mask = its_vpe_4_1_mask_irq, + .irq_unmask = its_vpe_4_1_unmask_irq, + .irq_eoi = irq_chip_eoi_parent, + .irq_set_affinity = its_vpe_set_affinity, + .irq_set_vcpu_affinity = its_vpe_4_1_set_vcpu_affinity, +}; + +static void its_configure_sgi(struct irq_data *d, bool clear) +{ + struct its_vpe *vpe = irq_data_get_irq_chip_data(d); + struct its_cmd_desc desc; + + desc.its_vsgi_cmd.vpe = vpe; + desc.its_vsgi_cmd.sgi = d->hwirq; + desc.its_vsgi_cmd.priority = vpe->sgi_config[d->hwirq].priority; + desc.its_vsgi_cmd.enable = vpe->sgi_config[d->hwirq].enabled; + desc.its_vsgi_cmd.group = vpe->sgi_config[d->hwirq].group; + desc.its_vsgi_cmd.clear = clear; + + /* + * GICv4.1 allows us to send VSGI commands to any ITS as long as the + * destination VPE is mapped there. Since we map them eagerly at + * activation time, we're pretty sure the first GICv4.1 ITS will do. + */ + its_send_single_vcommand(find_4_1_its(), its_build_vsgi_cmd, &desc); +} + +static void its_sgi_mask_irq(struct irq_data *d) +{ + struct its_vpe *vpe = irq_data_get_irq_chip_data(d); + + vpe->sgi_config[d->hwirq].enabled = false; + its_configure_sgi(d, false); +} + +static void its_sgi_unmask_irq(struct irq_data *d) +{ + struct its_vpe *vpe = irq_data_get_irq_chip_data(d); + + vpe->sgi_config[d->hwirq].enabled = true; + its_configure_sgi(d, false); +} + +static int its_sgi_set_affinity(struct irq_data *d, + const struct cpumask *mask_val, + bool force) +{ + /* + * There is no notion of affinity for virtual SGIs, at least + * not on the host (since they can only be targetting a vPE). + * Tell the kernel we've done whatever it asked for. + */ + irq_data_update_effective_affinity(d, mask_val); + return IRQ_SET_MASK_OK; +} + +static int its_sgi_set_irqchip_state(struct irq_data *d, + enum irqchip_irq_state which, + bool state) +{ + if (which != IRQCHIP_STATE_PENDING) + return -EINVAL; + + if (state) { + struct its_vpe *vpe = irq_data_get_irq_chip_data(d); + struct its_node *its = find_4_1_its(); + u64 val; + + val = FIELD_PREP(GITS_SGIR_VPEID, vpe->vpe_id); + val |= FIELD_PREP(GITS_SGIR_VINTID, d->hwirq); + writeq_relaxed(val, its->sgir_base + GITS_SGIR - SZ_128K); + } else { + its_configure_sgi(d, true); + } + + return 0; +} + +static int its_sgi_get_irqchip_state(struct irq_data *d, + enum irqchip_irq_state which, bool *val) +{ + struct its_vpe *vpe = irq_data_get_irq_chip_data(d); + void __iomem *base; + unsigned long flags; + u32 count = 1000000; /* 1s! */ + u32 status; + int cpu; + + if (which != IRQCHIP_STATE_PENDING) + return -EINVAL; + + /* + * Locking galore! We can race against two different events: + * + * - Concurent vPE affinity change: we must make sure it cannot + * happen, or we'll talk to the wrong redistributor. This is + * identical to what happens with vLPIs. + * + * - Concurrent VSGIPENDR access: As it involves accessing two + * MMIO registers, this must be made atomic one way or another. + */ + cpu = vpe_to_cpuid_lock(vpe, &flags); + raw_spin_lock(&gic_data_rdist_cpu(cpu)->rd_lock); + base = gic_data_rdist_cpu(cpu)->rd_base + SZ_128K; + writel_relaxed(vpe->vpe_id, base + GICR_VSGIR); + do { + status = readl_relaxed(base + GICR_VSGIPENDR); + if (!(status & GICR_VSGIPENDR_BUSY)) + goto out; + + count--; + if (!count) { + pr_err_ratelimited("Unable to get SGI status\n"); + goto out; + } + cpu_relax(); + udelay(1); + } while (count); + +out: + raw_spin_unlock(&gic_data_rdist_cpu(cpu)->rd_lock); + vpe_to_cpuid_unlock(vpe, flags); + + if (!count) + return -ENXIO; + + *val = !!(status & (1 << d->hwirq)); + + return 0; +} + +static int its_sgi_set_vcpu_affinity(struct irq_data *d, void *vcpu_info) +{ + struct its_vpe *vpe = irq_data_get_irq_chip_data(d); + struct its_cmd_info *info = vcpu_info; + + switch (info->cmd_type) { + case PROP_UPDATE_VSGI: + vpe->sgi_config[d->hwirq].priority = info->priority; + vpe->sgi_config[d->hwirq].group = info->group; + its_configure_sgi(d, false); + return 0; + + default: + return -EINVAL; + } +} + +static struct irq_chip its_sgi_irq_chip = { + .name = "GICv4.1-sgi", + .irq_mask = its_sgi_mask_irq, + .irq_unmask = its_sgi_unmask_irq, + .irq_set_affinity = its_sgi_set_affinity, + .irq_set_irqchip_state = its_sgi_set_irqchip_state, + .irq_get_irqchip_state = its_sgi_get_irqchip_state, + .irq_set_vcpu_affinity = its_sgi_set_vcpu_affinity, +}; + +static int its_sgi_irq_domain_alloc(struct irq_domain *domain, + unsigned int virq, unsigned int nr_irqs, + void *args) +{ + struct its_vpe *vpe = args; + int i; + + /* Yes, we do want 16 SGIs */ + WARN_ON(nr_irqs != 16); + + for (i = 0; i < 16; i++) { + vpe->sgi_config[i].priority = 0; + vpe->sgi_config[i].enabled = false; + vpe->sgi_config[i].group = false; + + irq_domain_set_hwirq_and_chip(domain, virq + i, i, + &its_sgi_irq_chip, vpe); + irq_set_status_flags(virq + i, IRQ_DISABLE_UNLAZY); + } + + return 0; +} + +static void its_sgi_irq_domain_free(struct irq_domain *domain, + unsigned int virq, + unsigned int nr_irqs) +{ + /* Nothing to do */ +} + +static int its_sgi_irq_domain_activate(struct irq_domain *domain, + struct irq_data *d, bool reserve) +{ + /* Write out the initial SGI configuration */ + its_configure_sgi(d, false); + return 0; +} + +static void its_sgi_irq_domain_deactivate(struct irq_domain *domain, + struct irq_data *d) +{ + struct its_vpe *vpe = irq_data_get_irq_chip_data(d); + + /* + * The VSGI command is awkward: + * + * - To change the configuration, CLEAR must be set to false, + * leaving the pending bit unchanged. + * - To clear the pending bit, CLEAR must be set to true, leaving + * the configuration unchanged. + * + * You just can't do both at once, hence the two commands below. + */ + vpe->sgi_config[d->hwirq].enabled = false; + its_configure_sgi(d, false); + its_configure_sgi(d, true); +} + +static const struct irq_domain_ops its_sgi_domain_ops = { + .alloc = its_sgi_irq_domain_alloc, + .free = its_sgi_irq_domain_free, + .activate = its_sgi_irq_domain_activate, + .deactivate = its_sgi_irq_domain_deactivate, +}; + +static int its_vpe_id_alloc(void) +{ + return ida_simple_get(&its_vpeid_ida, 0, ITS_MAX_VPEID, GFP_KERNEL); +} + +static void its_vpe_id_free(u16 id) +{ + ida_simple_remove(&its_vpeid_ida, id); +} + +static int its_vpe_init(struct its_vpe *vpe) +{ + struct page *vpt_page; + int vpe_id; + + /* Allocate vpe_id */ + vpe_id = its_vpe_id_alloc(); + if (vpe_id < 0) + return vpe_id; + + /* Allocate VPT */ + vpt_page = its_allocate_pending_table(GFP_KERNEL); + if (!vpt_page) { + its_vpe_id_free(vpe_id); + return -ENOMEM; + } + + if (!its_alloc_vpe_table(vpe_id)) { + its_vpe_id_free(vpe_id); + its_free_pending_table(vpt_page); + return -ENOMEM; + } + + raw_spin_lock_init(&vpe->vpe_lock); + vpe->vpe_id = vpe_id; + vpe->vpt_page = vpt_page; + if (gic_rdists->has_rvpeid) + atomic_set(&vpe->vmapp_count, 0); + else + vpe->vpe_proxy_event = -1; + + return 0; +} + +static void its_vpe_teardown(struct its_vpe *vpe) +{ + its_vpe_db_proxy_unmap(vpe); + its_vpe_id_free(vpe->vpe_id); + its_free_pending_table(vpe->vpt_page); +} + +static void its_vpe_irq_domain_free(struct irq_domain *domain, + unsigned int virq, + unsigned int nr_irqs) +{ + struct its_vm *vm = domain->host_data; + int i; + + irq_domain_free_irqs_parent(domain, virq, nr_irqs); + + for (i = 0; i < nr_irqs; i++) { + struct irq_data *data = irq_domain_get_irq_data(domain, + virq + i); + struct its_vpe *vpe = irq_data_get_irq_chip_data(data); + + BUG_ON(vm != vpe->its_vm); + + clear_bit(data->hwirq, vm->db_bitmap); + its_vpe_teardown(vpe); + irq_domain_reset_irq_data(data); + } + + if (bitmap_empty(vm->db_bitmap, vm->nr_db_lpis)) { + its_lpi_free(vm->db_bitmap, vm->db_lpi_base, vm->nr_db_lpis); + its_free_prop_table(vm->vprop_page); + } +} + +static int its_vpe_irq_domain_alloc(struct irq_domain *domain, unsigned int virq, + unsigned int nr_irqs, void *args) +{ + struct irq_chip *irqchip = &its_vpe_irq_chip; + struct its_vm *vm = args; + unsigned long *bitmap; + struct page *vprop_page; + int base, nr_ids, i, err = 0; + + BUG_ON(!vm); + + bitmap = its_lpi_alloc(roundup_pow_of_two(nr_irqs), &base, &nr_ids); + if (!bitmap) + return -ENOMEM; + + if (nr_ids < nr_irqs) { + its_lpi_free(bitmap, base, nr_ids); + return -ENOMEM; + } + + vprop_page = its_allocate_prop_table(GFP_KERNEL); + if (!vprop_page) { + its_lpi_free(bitmap, base, nr_ids); + return -ENOMEM; + } + + vm->db_bitmap = bitmap; + vm->db_lpi_base = base; + vm->nr_db_lpis = nr_ids; + vm->vprop_page = vprop_page; + + if (gic_rdists->has_rvpeid) + irqchip = &its_vpe_4_1_irq_chip; + + for (i = 0; i < nr_irqs; i++) { + vm->vpes[i]->vpe_db_lpi = base + i; + err = its_vpe_init(vm->vpes[i]); + if (err) + break; + err = its_irq_gic_domain_alloc(domain, virq + i, + vm->vpes[i]->vpe_db_lpi); + if (err) + break; + irq_domain_set_hwirq_and_chip(domain, virq + i, i, + irqchip, vm->vpes[i]); + set_bit(i, bitmap); + } + + if (err) { + if (i > 0) + its_vpe_irq_domain_free(domain, virq, i - 1); + + its_lpi_free(bitmap, base, nr_ids); + its_free_prop_table(vprop_page); + } + + return err; +} + +static int its_vpe_irq_domain_activate(struct irq_domain *domain, + struct irq_data *d, bool reserve) +{ + struct its_vpe *vpe = irq_data_get_irq_chip_data(d); + struct its_node *its; + + /* + * If we use the list map, we issue VMAPP on demand... Unless + * we're on a GICv4.1 and we eagerly map the VPE on all ITSs + * so that VSGIs can work. + */ + if (!gic_requires_eager_mapping()) + return 0; + + /* Map the VPE to the first possible CPU */ + vpe->col_idx = cpumask_first(cpu_online_mask); + + list_for_each_entry(its, &its_nodes, entry) { + if (!is_v4(its)) + continue; + + its_send_vmapp(its, vpe, true); + its_send_vinvall(its, vpe); + } + + irq_data_update_effective_affinity(d, cpumask_of(vpe->col_idx)); + + return 0; +} + +static void its_vpe_irq_domain_deactivate(struct irq_domain *domain, + struct irq_data *d) +{ + struct its_vpe *vpe = irq_data_get_irq_chip_data(d); + struct its_node *its; + + /* + * If we use the list map on GICv4.0, we unmap the VPE once no + * VLPIs are associated with the VM. + */ + if (!gic_requires_eager_mapping()) + return; + + list_for_each_entry(its, &its_nodes, entry) { + if (!is_v4(its)) + continue; + + its_send_vmapp(its, vpe, false); + } +} + +static const struct irq_domain_ops its_vpe_domain_ops = { + .alloc = its_vpe_irq_domain_alloc, + .free = its_vpe_irq_domain_free, + .activate = its_vpe_irq_domain_activate, + .deactivate = its_vpe_irq_domain_deactivate, +}; + +static int its_force_quiescent(void __iomem *base) +{ + u32 count = 1000000; /* 1s */ + u32 val; + + val = readl_relaxed(base + GITS_CTLR); + /* + * GIC architecture specification requires the ITS to be both + * disabled and quiescent for writes to GITS_BASER<n> or + * GITS_CBASER to not have UNPREDICTABLE results. + */ + if ((val & GITS_CTLR_QUIESCENT) && !(val & GITS_CTLR_ENABLE)) + return 0; + + /* Disable the generation of all interrupts to this ITS */ + val &= ~(GITS_CTLR_ENABLE | GITS_CTLR_ImDe); + writel_relaxed(val, base + GITS_CTLR); + + /* Poll GITS_CTLR and wait until ITS becomes quiescent */ + while (1) { + val = readl_relaxed(base + GITS_CTLR); + if (val & GITS_CTLR_QUIESCENT) + return 0; + + count--; + if (!count) + return -EBUSY; + + cpu_relax(); + udelay(1); + } +} + +static bool __maybe_unused its_enable_quirk_cavium_22375(void *data) +{ + struct its_node *its = data; + + /* erratum 22375: only alloc 8MB table size (20 bits) */ + its->typer &= ~GITS_TYPER_DEVBITS; + its->typer |= FIELD_PREP(GITS_TYPER_DEVBITS, 20 - 1); + its->flags |= ITS_FLAGS_WORKAROUND_CAVIUM_22375; + + return true; +} + +static bool __maybe_unused its_enable_quirk_cavium_23144(void *data) +{ + struct its_node *its = data; + + its->flags |= ITS_FLAGS_WORKAROUND_CAVIUM_23144; + + return true; +} + +static bool __maybe_unused its_enable_quirk_qdf2400_e0065(void *data) +{ + struct its_node *its = data; + + /* On QDF2400, the size of the ITE is 16Bytes */ + its->typer &= ~GITS_TYPER_ITT_ENTRY_SIZE; + its->typer |= FIELD_PREP(GITS_TYPER_ITT_ENTRY_SIZE, 16 - 1); + + return true; +} + +static u64 its_irq_get_msi_base_pre_its(struct its_device *its_dev) +{ + struct its_node *its = its_dev->its; + + /* + * The Socionext Synquacer SoC has a so-called 'pre-ITS', + * which maps 32-bit writes targeted at a separate window of + * size '4 << device_id_bits' onto writes to GITS_TRANSLATER + * with device ID taken from bits [device_id_bits + 1:2] of + * the window offset. + */ + return its->pre_its_base + (its_dev->device_id << 2); +} + +static bool __maybe_unused its_enable_quirk_socionext_synquacer(void *data) +{ + struct its_node *its = data; + u32 pre_its_window[2]; + u32 ids; + + if (!fwnode_property_read_u32_array(its->fwnode_handle, + "socionext,synquacer-pre-its", + pre_its_window, + ARRAY_SIZE(pre_its_window))) { + + its->pre_its_base = pre_its_window[0]; + its->get_msi_base = its_irq_get_msi_base_pre_its; + + ids = ilog2(pre_its_window[1]) - 2; + if (device_ids(its) > ids) { + its->typer &= ~GITS_TYPER_DEVBITS; + its->typer |= FIELD_PREP(GITS_TYPER_DEVBITS, ids - 1); + } + + /* the pre-ITS breaks isolation, so disable MSI remapping */ + its->msi_domain_flags &= ~IRQ_DOMAIN_FLAG_MSI_REMAP; + return true; + } + return false; +} + +static bool __maybe_unused its_enable_quirk_hip07_161600802(void *data) +{ + struct its_node *its = data; + + /* + * Hip07 insists on using the wrong address for the VLPI + * page. Trick it into doing the right thing... + */ + its->vlpi_redist_offset = SZ_128K; + return true; +} + +static const struct gic_quirk its_quirks[] = { +#ifdef CONFIG_CAVIUM_ERRATUM_22375 + { + .desc = "ITS: Cavium errata 22375, 24313", + .iidr = 0xa100034c, /* ThunderX pass 1.x */ + .mask = 0xffff0fff, + .init = its_enable_quirk_cavium_22375, + }, +#endif +#ifdef CONFIG_CAVIUM_ERRATUM_23144 + { + .desc = "ITS: Cavium erratum 23144", + .iidr = 0xa100034c, /* ThunderX pass 1.x */ + .mask = 0xffff0fff, + .init = its_enable_quirk_cavium_23144, + }, +#endif +#ifdef CONFIG_QCOM_QDF2400_ERRATUM_0065 + { + .desc = "ITS: QDF2400 erratum 0065", + .iidr = 0x00001070, /* QDF2400 ITS rev 1.x */ + .mask = 0xffffffff, + .init = its_enable_quirk_qdf2400_e0065, + }, +#endif +#ifdef CONFIG_SOCIONEXT_SYNQUACER_PREITS + { + /* + * The Socionext Synquacer SoC incorporates ARM's own GIC-500 + * implementation, but with a 'pre-ITS' added that requires + * special handling in software. + */ + .desc = "ITS: Socionext Synquacer pre-ITS", + .iidr = 0x0001143b, + .mask = 0xffffffff, + .init = its_enable_quirk_socionext_synquacer, + }, +#endif +#ifdef CONFIG_HISILICON_ERRATUM_161600802 + { + .desc = "ITS: Hip07 erratum 161600802", + .iidr = 0x00000004, + .mask = 0xffffffff, + .init = its_enable_quirk_hip07_161600802, + }, +#endif + { + } +}; + +static void its_enable_quirks(struct its_node *its) +{ + u32 iidr = readl_relaxed(its->base + GITS_IIDR); + + gic_enable_quirks(iidr, its_quirks, its); +} + +static int its_save_disable(void) +{ + struct its_node *its; + int err = 0; + + raw_spin_lock(&its_lock); + list_for_each_entry(its, &its_nodes, entry) { + void __iomem *base; + + base = its->base; + its->ctlr_save = readl_relaxed(base + GITS_CTLR); + err = its_force_quiescent(base); + if (err) { + pr_err("ITS@%pa: failed to quiesce: %d\n", + &its->phys_base, err); + writel_relaxed(its->ctlr_save, base + GITS_CTLR); + goto err; + } + + its->cbaser_save = gits_read_cbaser(base + GITS_CBASER); + } + +err: + if (err) { + list_for_each_entry_continue_reverse(its, &its_nodes, entry) { + void __iomem *base; + + base = its->base; + writel_relaxed(its->ctlr_save, base + GITS_CTLR); + } + } + raw_spin_unlock(&its_lock); + + return err; +} + +static void its_restore_enable(void) +{ + struct its_node *its; + int ret; + + raw_spin_lock(&its_lock); + list_for_each_entry(its, &its_nodes, entry) { + void __iomem *base; + int i; + + base = its->base; + + /* + * Make sure that the ITS is disabled. If it fails to quiesce, + * don't restore it since writing to CBASER or BASER<n> + * registers is undefined according to the GIC v3 ITS + * Specification. + * + * Firmware resuming with the ITS enabled is terminally broken. + */ + WARN_ON(readl_relaxed(base + GITS_CTLR) & GITS_CTLR_ENABLE); + ret = its_force_quiescent(base); + if (ret) { + pr_err("ITS@%pa: failed to quiesce on resume: %d\n", + &its->phys_base, ret); + continue; + } + + gits_write_cbaser(its->cbaser_save, base + GITS_CBASER); + + /* + * Writing CBASER resets CREADR to 0, so make CWRITER and + * cmd_write line up with it. + */ + its->cmd_write = its->cmd_base; + gits_write_cwriter(0, base + GITS_CWRITER); + + /* Restore GITS_BASER from the value cache. */ + for (i = 0; i < GITS_BASER_NR_REGS; i++) { + struct its_baser *baser = &its->tables[i]; + + if (!(baser->val & GITS_BASER_VALID)) + continue; + + its_write_baser(its, baser, baser->val); + } + writel_relaxed(its->ctlr_save, base + GITS_CTLR); + + /* + * Reinit the collection if it's stored in the ITS. This is + * indicated by the col_id being less than the HCC field. + * CID < HCC as specified in the GIC v3 Documentation. + */ + if (its->collections[smp_processor_id()].col_id < + GITS_TYPER_HCC(gic_read_typer(base + GITS_TYPER))) + its_cpu_init_collection(its); + } + raw_spin_unlock(&its_lock); +} + +static struct syscore_ops its_syscore_ops = { + .suspend = its_save_disable, + .resume = its_restore_enable, +}; + +static int its_init_domain(struct fwnode_handle *handle, struct its_node *its) +{ + struct irq_domain *inner_domain; + struct msi_domain_info *info; + + info = kzalloc(sizeof(*info), GFP_KERNEL); + if (!info) + return -ENOMEM; + + inner_domain = irq_domain_create_tree(handle, &its_domain_ops, its); + if (!inner_domain) { + kfree(info); + return -ENOMEM; + } + + inner_domain->parent = its_parent; + irq_domain_update_bus_token(inner_domain, DOMAIN_BUS_NEXUS); + inner_domain->flags |= its->msi_domain_flags; + info->ops = &its_msi_domain_ops; + info->data = its; + inner_domain->host_data = info; + + return 0; +} + +static int its_init_vpe_domain(void) +{ + struct its_node *its; + u32 devid; + int entries; + + if (gic_rdists->has_direct_lpi) { + pr_info("ITS: Using DirectLPI for VPE invalidation\n"); + return 0; + } + + /* Any ITS will do, even if not v4 */ + its = list_first_entry(&its_nodes, struct its_node, entry); + + entries = roundup_pow_of_two(nr_cpu_ids); + vpe_proxy.vpes = kcalloc(entries, sizeof(*vpe_proxy.vpes), + GFP_KERNEL); + if (!vpe_proxy.vpes) { + pr_err("ITS: Can't allocate GICv4 proxy device array\n"); + return -ENOMEM; + } + + /* Use the last possible DevID */ + devid = GENMASK(device_ids(its) - 1, 0); + vpe_proxy.dev = its_create_device(its, devid, entries, false); + if (!vpe_proxy.dev) { + kfree(vpe_proxy.vpes); + pr_err("ITS: Can't allocate GICv4 proxy device\n"); + return -ENOMEM; + } + + BUG_ON(entries > vpe_proxy.dev->nr_ites); + + raw_spin_lock_init(&vpe_proxy.lock); + vpe_proxy.next_victim = 0; + pr_info("ITS: Allocated DevID %x as GICv4 proxy device (%d slots)\n", + devid, vpe_proxy.dev->nr_ites); + + return 0; +} + +static int __init its_compute_its_list_map(struct resource *res, + void __iomem *its_base) +{ + int its_number; + u32 ctlr; + + /* + * This is assumed to be done early enough that we're + * guaranteed to be single-threaded, hence no + * locking. Should this change, we should address + * this. + */ + its_number = find_first_zero_bit(&its_list_map, GICv4_ITS_LIST_MAX); + if (its_number >= GICv4_ITS_LIST_MAX) { + pr_err("ITS@%pa: No ITSList entry available!\n", + &res->start); + return -EINVAL; + } + + ctlr = readl_relaxed(its_base + GITS_CTLR); + ctlr &= ~GITS_CTLR_ITS_NUMBER; + ctlr |= its_number << GITS_CTLR_ITS_NUMBER_SHIFT; + writel_relaxed(ctlr, its_base + GITS_CTLR); + ctlr = readl_relaxed(its_base + GITS_CTLR); + if ((ctlr & GITS_CTLR_ITS_NUMBER) != (its_number << GITS_CTLR_ITS_NUMBER_SHIFT)) { + its_number = ctlr & GITS_CTLR_ITS_NUMBER; + its_number >>= GITS_CTLR_ITS_NUMBER_SHIFT; + } + + if (test_and_set_bit(its_number, &its_list_map)) { + pr_err("ITS@%pa: Duplicate ITSList entry %d\n", + &res->start, its_number); + return -EINVAL; + } + + return its_number; +} + +static int __init its_probe_one(struct resource *res, + struct fwnode_handle *handle, int numa_node) +{ + struct its_node *its; + void __iomem *its_base; + u32 val, ctlr; + u64 baser, tmp, typer; + struct page *page; + int err; + + its_base = ioremap(res->start, SZ_64K); + if (!its_base) { + pr_warn("ITS@%pa: Unable to map ITS registers\n", &res->start); + return -ENOMEM; + } + + val = readl_relaxed(its_base + GITS_PIDR2) & GIC_PIDR2_ARCH_MASK; + if (val != 0x30 && val != 0x40) { + pr_warn("ITS@%pa: No ITS detected, giving up\n", &res->start); + err = -ENODEV; + goto out_unmap; + } + + err = its_force_quiescent(its_base); + if (err) { + pr_warn("ITS@%pa: Failed to quiesce, giving up\n", &res->start); + goto out_unmap; + } + + pr_info("ITS %pR\n", res); + + its = kzalloc(sizeof(*its), GFP_KERNEL); + if (!its) { + err = -ENOMEM; + goto out_unmap; + } + + raw_spin_lock_init(&its->lock); + mutex_init(&its->dev_alloc_lock); + INIT_LIST_HEAD(&its->entry); + INIT_LIST_HEAD(&its->its_device_list); + typer = gic_read_typer(its_base + GITS_TYPER); + its->typer = typer; + its->base = its_base; + its->phys_base = res->start; + if (is_v4(its)) { + if (!(typer & GITS_TYPER_VMOVP)) { + err = its_compute_its_list_map(res, its_base); + if (err < 0) + goto out_free_its; + + its->list_nr = err; + + pr_info("ITS@%pa: Using ITS number %d\n", + &res->start, err); + } else { + pr_info("ITS@%pa: Single VMOVP capable\n", &res->start); + } + + if (is_v4_1(its)) { + u32 svpet = FIELD_GET(GITS_TYPER_SVPET, typer); + + its->sgir_base = ioremap(res->start + SZ_128K, SZ_64K); + if (!its->sgir_base) { + err = -ENOMEM; + goto out_free_its; + } + + its->mpidr = readl_relaxed(its_base + GITS_MPIDR); + + pr_info("ITS@%pa: Using GICv4.1 mode %08x %08x\n", + &res->start, its->mpidr, svpet); + } + } + + its->numa_node = numa_node; + + page = alloc_pages_node(its->numa_node, GFP_KERNEL | __GFP_ZERO, + get_order(ITS_CMD_QUEUE_SZ)); + if (!page) { + err = -ENOMEM; + goto out_unmap_sgir; + } + its->cmd_base = (void *)page_address(page); + its->cmd_write = its->cmd_base; + its->fwnode_handle = handle; + its->get_msi_base = its_irq_get_msi_base; + its->msi_domain_flags = IRQ_DOMAIN_FLAG_MSI_REMAP; + + its_enable_quirks(its); + + err = its_alloc_tables(its); + if (err) + goto out_free_cmd; + + err = its_alloc_collections(its); + if (err) + goto out_free_tables; + + baser = (virt_to_phys(its->cmd_base) | + GITS_CBASER_RaWaWb | + GITS_CBASER_InnerShareable | + (ITS_CMD_QUEUE_SZ / SZ_4K - 1) | + GITS_CBASER_VALID); + + gits_write_cbaser(baser, its->base + GITS_CBASER); + tmp = gits_read_cbaser(its->base + GITS_CBASER); + + if ((tmp ^ baser) & GITS_CBASER_SHAREABILITY_MASK) { + if (!(tmp & GITS_CBASER_SHAREABILITY_MASK)) { + /* + * The HW reports non-shareable, we must + * remove the cacheability attributes as + * well. + */ + baser &= ~(GITS_CBASER_SHAREABILITY_MASK | + GITS_CBASER_CACHEABILITY_MASK); + baser |= GITS_CBASER_nC; + gits_write_cbaser(baser, its->base + GITS_CBASER); + } + pr_info("ITS: using cache flushing for cmd queue\n"); + its->flags |= ITS_FLAGS_CMDQ_NEEDS_FLUSHING; + } + + gits_write_cwriter(0, its->base + GITS_CWRITER); + ctlr = readl_relaxed(its->base + GITS_CTLR); + ctlr |= GITS_CTLR_ENABLE; + if (is_v4(its)) + ctlr |= GITS_CTLR_ImDe; + writel_relaxed(ctlr, its->base + GITS_CTLR); + + err = its_init_domain(handle, its); + if (err) + goto out_free_tables; + + raw_spin_lock(&its_lock); + list_add(&its->entry, &its_nodes); + raw_spin_unlock(&its_lock); + + return 0; + +out_free_tables: + its_free_tables(its); +out_free_cmd: + free_pages((unsigned long)its->cmd_base, get_order(ITS_CMD_QUEUE_SZ)); +out_unmap_sgir: + if (its->sgir_base) + iounmap(its->sgir_base); +out_free_its: + kfree(its); +out_unmap: + iounmap(its_base); + pr_err("ITS@%pa: failed probing (%d)\n", &res->start, err); + return err; +} + +static bool gic_rdists_supports_plpis(void) +{ + return !!(gic_read_typer(gic_data_rdist_rd_base() + GICR_TYPER) & GICR_TYPER_PLPIS); +} + +static int redist_disable_lpis(void) +{ + void __iomem *rbase = gic_data_rdist_rd_base(); + u64 timeout = USEC_PER_SEC; + u64 val; + + if (!gic_rdists_supports_plpis()) { + pr_info("CPU%d: LPIs not supported\n", smp_processor_id()); + return -ENXIO; + } + + val = readl_relaxed(rbase + GICR_CTLR); + if (!(val & GICR_CTLR_ENABLE_LPIS)) + return 0; + + /* + * If coming via a CPU hotplug event, we don't need to disable + * LPIs before trying to re-enable them. They are already + * configured and all is well in the world. + * + * If running with preallocated tables, there is nothing to do. + */ + if (gic_data_rdist()->lpi_enabled || + (gic_rdists->flags & RDIST_FLAGS_RD_TABLES_PREALLOCATED)) + return 0; + + /* + * From that point on, we only try to do some damage control. + */ + pr_warn("GIC-2500: CPU%d: Booted with LPIs enabled, memory probably corrupted\n", + smp_processor_id()); + add_taint(TAINT_CRAP, LOCKDEP_STILL_OK); + + /* Disable LPIs */ + val &= ~GICR_CTLR_ENABLE_LPIS; + writel_relaxed(val, rbase + GICR_CTLR); + + /* Make sure any change to GICR_CTLR is observable by the GIC */ + dsb(sy); + + /* + * Software must observe RWP==0 after clearing GICR_CTLR.EnableLPIs + * from 1 to 0 before programming GICR_PEND{PROP}BASER registers. + * Error out if we time out waiting for RWP to clear. + */ + while (readl_relaxed(rbase + GICR_CTLR) & GICR_CTLR_RWP) { + if (!timeout) { + pr_err("CPU%d: Timeout while disabling LPIs\n", + smp_processor_id()); + return -ETIMEDOUT; + } + udelay(1); + timeout--; + } + + /* + * After it has been written to 1, it is IMPLEMENTATION + * DEFINED whether GICR_CTLR.EnableLPI becomes RES1 or can be + * cleared to 0. Error out if clearing the bit failed. + */ + if (readl_relaxed(rbase + GICR_CTLR) & GICR_CTLR_ENABLE_LPIS) { + pr_err("CPU%d: Failed to disable LPIs\n", smp_processor_id()); + return -EBUSY; + } + + return 0; +} + +int phytium_its_cpu_init(void) +{ + if (!list_empty(&its_nodes)) { + int ret; + + ret = redist_disable_lpis(); + if (ret) + return ret; + + its_cpu_init_lpis(); + its_cpu_init_collections(); + } + + return 0; +} + +static const struct of_device_id its_device_id[] = { + { .compatible = "arm,gic-phytium-2500-its", }, + {}, +}; + +static int __init its_of_probe(struct device_node *node) +{ + struct device_node *np; + struct resource res; + + for (np = of_find_matching_node(node, its_device_id); np; + np = of_find_matching_node(np, its_device_id)) { + if (!of_device_is_available(np)) + continue; + if (!of_property_read_bool(np, "msi-controller")) { + pr_warn("%pOF: no msi-controller property, ITS ignored\n", + np); + continue; + } + + if (of_address_to_resource(np, 0, &res)) { + pr_warn("%pOF: no regs?\n", np); + continue; + } + + its_probe_one(&res, &np->fwnode, of_node_to_nid(np)); + } + return 0; +} + +#ifdef CONFIG_ACPI + +#define ACPI_GICV3_ITS_MEM_SIZE (SZ_128K) + +#ifdef CONFIG_ACPI_NUMA +struct its_srat_map { + /* numa node id */ + u32 numa_node; + /* GIC ITS ID */ + u32 its_id; +}; + +static struct its_srat_map *its_srat_maps __initdata; +static int its_in_srat __initdata; + +static int __init acpi_get_its_numa_node(u32 its_id) +{ + int i; + + for (i = 0; i < its_in_srat; i++) { + if (its_id == its_srat_maps[i].its_id) + return its_srat_maps[i].numa_node; + } + return NUMA_NO_NODE; +} + +static int __init gic_acpi_match_srat_its(union acpi_subtable_headers *header, + const unsigned long end) +{ + return 0; +} + +static int __init gic_acpi_parse_srat_its(union acpi_subtable_headers *header, + const unsigned long end) +{ + int node; + struct acpi_srat_gic_its_affinity *its_affinity; + + its_affinity = (struct acpi_srat_gic_its_affinity *)header; + if (!its_affinity) + return -EINVAL; + + if (its_affinity->header.length < sizeof(*its_affinity)) { + pr_err("SRAT: Invalid header length %d in ITS affinity\n", + its_affinity->header.length); + return -EINVAL; + } + + /* + * Note that in theory a new proximity node could be created by this + * entry as it is an SRAT resource allocation structure. + * We do not currently support doing so. + */ + node = pxm_to_node(its_affinity->proximity_domain); + + if (node == NUMA_NO_NODE || node >= MAX_NUMNODES) { + pr_err("SRAT: Invalid NUMA node %d in ITS affinity\n", node); + return 0; + } + + its_srat_maps[its_in_srat].numa_node = node; + its_srat_maps[its_in_srat].its_id = its_affinity->its_id; + its_in_srat++; + pr_info("SRAT: PXM %d -> ITS %d -> Node %d\n", + its_affinity->proximity_domain, its_affinity->its_id, node); + + return 0; +} + +static void __init acpi_table_parse_srat_its(void) +{ + int count; + + count = acpi_table_parse_entries(ACPI_SIG_SRAT, + sizeof(struct acpi_table_srat), + ACPI_SRAT_TYPE_GIC_ITS_AFFINITY, + gic_acpi_match_srat_its, 0); + if (count <= 0) + return; + + its_srat_maps = kmalloc_array(count, sizeof(struct its_srat_map), + GFP_KERNEL); + if (!its_srat_maps) { + pr_warn("SRAT: Failed to allocate memory for its_srat_maps!\n"); + return; + } + + acpi_table_parse_entries(ACPI_SIG_SRAT, + sizeof(struct acpi_table_srat), + ACPI_SRAT_TYPE_GIC_ITS_AFFINITY, + gic_acpi_parse_srat_its, 0); +} + +/* free the its_srat_maps after ITS probing */ +static void __init acpi_its_srat_maps_free(void) +{ + kfree(its_srat_maps); +} +#else +static void __init acpi_table_parse_srat_its(void) { } +static int __init acpi_get_its_numa_node(u32 its_id) { return NUMA_NO_NODE; } +static void __init acpi_its_srat_maps_free(void) { } +#endif + +static int __init gic_acpi_parse_madt_its(union acpi_subtable_headers *header, + const unsigned long end) +{ + struct acpi_madt_generic_translator *its_entry; + struct fwnode_handle *dom_handle; + struct resource res; + int err; + + its_entry = (struct acpi_madt_generic_translator *)header; + memset(&res, 0, sizeof(res)); + res.start = its_entry->base_address; + res.end = its_entry->base_address + ACPI_GICV3_ITS_MEM_SIZE - 1; + res.flags = IORESOURCE_MEM; + + dom_handle = irq_domain_alloc_fwnode(&res.start); + if (!dom_handle) { + pr_err("ITS@%pa: Unable to allocate GIC-phytium-2500 ITS domain token\n", + &res.start); + return -ENOMEM; + } + + err = iort_register_domain_token(its_entry->translation_id, res.start, + dom_handle); + if (err) { + pr_err("ITS@%pa: Unable to register GIC-phytium-2500 ITS domain token (ITS ID %d) to IORT\n", + &res.start, its_entry->translation_id); + goto dom_err; + } + + err = its_probe_one(&res, dom_handle, + acpi_get_its_numa_node(its_entry->translation_id)); + if (!err) + return 0; + + iort_deregister_domain_token(its_entry->translation_id); +dom_err: + irq_domain_free_fwnode(dom_handle); + return err; +} + +static void __init its_acpi_probe(void) +{ + acpi_table_parse_srat_its(); + acpi_table_parse_madt(ACPI_MADT_TYPE_GENERIC_TRANSLATOR, + gic_acpi_parse_madt_its, 0); + acpi_its_srat_maps_free(); +} +#else +static void __init its_acpi_probe(void) { } +#endif + +int __init phytium_its_init(struct fwnode_handle *handle, struct rdists *rdists, + struct irq_domain *parent_domain) +{ + struct device_node *of_node; + struct its_node *its; + bool has_v4 = false; + bool has_v4_1 = false; + int err; + + gic_rdists = rdists; + + its_parent = parent_domain; + of_node = to_of_node(handle); + if (of_node) + its_of_probe(of_node); + else + its_acpi_probe(); + + if (list_empty(&its_nodes)) { + pr_warn("ITS: No ITS available, not enabling LPIs\n"); + return -ENXIO; + } + + err = allocate_lpi_tables(); + if (err) + return err; + + list_for_each_entry(its, &its_nodes, entry) { + has_v4 |= is_v4(its); + has_v4_1 |= is_v4_1(its); + } + + /* Don't bother with inconsistent systems */ + if (WARN_ON(!has_v4_1 && rdists->has_rvpeid)) + rdists->has_rvpeid = false; + + if (has_v4 & rdists->has_vlpis) { + const struct irq_domain_ops *sgi_ops; + + if (has_v4_1) + sgi_ops = &its_sgi_domain_ops; + else + sgi_ops = NULL; + + if (its_init_vpe_domain() || + its_init_v4(parent_domain, &its_vpe_domain_ops, sgi_ops)) { + rdists->has_vlpis = false; + pr_err("ITS: Disabling GICv4 support\n"); + } + } + + register_syscore_ops(&its_syscore_ops); + + return 0; +} diff --git a/drivers/irqchip/irq-gic-phytium-2500.c b/drivers/irqchip/irq-gic-phytium-2500.c new file mode 100644 index 000000000000..ba0545fcee56 --- /dev/null +++ b/drivers/irqchip/irq-gic-phytium-2500.c @@ -0,0 +1,2503 @@ +/* + * Copyright (C) 2020 Phytium Corporation. + * Author: Wang Yinfeng wangyinfeng@phytium.com.cn + * Chen Baozi chenbaozi@phytium.com.cn + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see http://www.gnu.org/licenses/. + */ + + +#define pr_fmt(fmt) "GIC-2500: " fmt + +#include <linux/acpi.h> +#include <linux/cpu.h> +#include <linux/cpu_pm.h> +#include <linux/delay.h> +#include <linux/interrupt.h> +#include <linux/irqdomain.h> +#include <linux/of.h> +#include <linux/of_address.h> +#include <linux/of_irq.h> +#include <linux/percpu.h> +#include <linux/refcount.h> +#include <linux/slab.h> + +#include <linux/irqchip.h> +#include <linux/irqchip/arm-gic-common.h> +#include <linux/irqchip/arm-gic-phytium-2500.h> +#include <linux/irqchip/irq-partition-percpu.h> + +#include <asm/cputype.h> +#include <asm/exception.h> +#include <asm/smp_plat.h> +#include <asm/virt.h> + +#include "irq-gic-common.h" + +#define MAX_MARS3_SOC_COUNT 8 +#define MARS3_ADDR_SKTID_SHIFT 41 + +struct gic_dist_desc { + void __iomem *dist_base; + phys_addr_t phys_base; + unsigned long size; + +}; + +static struct gic_dist_desc mars3_gic_dists[MAX_MARS3_SOC_COUNT] __read_mostly; + +static unsigned int mars3_sockets_bitmap = 0x1; + +#define mars3_irq_to_skt(hwirq) (((hwirq) - 32) % 8) + +#define GICD_INT_NMI_PRI (GICD_INT_DEF_PRI & ~0x80) + +#define FLAGS_WORKAROUND_GICR_WAKER_MSM8996 (1ULL << 0) +#define FLAGS_WORKAROUND_CAVIUM_ERRATUM_38539 (1ULL << 1) + +#define GIC_IRQ_TYPE_PARTITION (GIC_IRQ_TYPE_LPI + 1) + +struct redist_region { + void __iomem *redist_base; + phys_addr_t phys_base; + bool single_redist; +}; + +struct gic_chip_data { + struct fwnode_handle *fwnode; + void __iomem *dist_base; + struct redist_region *redist_regions; + struct rdists rdists; + struct irq_domain *domain; + u64 redist_stride; + u32 nr_redist_regions; + u64 flags; + bool has_rss; + unsigned int ppi_nr; + struct partition_desc **ppi_descs; +}; + +static struct gic_chip_data gic_data __read_mostly; +static DEFINE_STATIC_KEY_TRUE(supports_deactivate_key); + +#define GIC_ID_NR (1U << GICD_TYPER_ID_BITS(gic_data.rdists.gicd_typer)) +#define GIC_LINE_NR min(GICD_TYPER_SPIS(gic_data.rdists.gicd_typer), 1020U) +#define GIC_ESPI_NR GICD_TYPER_ESPIS(gic_data.rdists.gicd_typer) + +/* + * The behaviours of RPR and PMR registers differ depending on the value of + * SCR_EL3.FIQ, and the behaviour of non-secure priority registers of the + * distributor and redistributors depends on whether security is enabled in the + * GIC. + * + * When security is enabled, non-secure priority values from the (re)distributor + * are presented to the GIC CPUIF as follow: + * (GIC_(R)DIST_PRI[irq] >> 1) | 0x80; + * + * If SCR_EL3.FIQ == 1, the values writen to/read from PMR and RPR at non-secure + * EL1 are subject to a similar operation thus matching the priorities presented + * from the (re)distributor when security is enabled. When SCR_EL3.FIQ == 0, + * these values are unchanched by the GIC. + * + * see GICv3/GICv4 Architecture Specification (IHI0069D): + * - section 4.8.1 Non-secure accesses to register fields for Secure interrupt + * priorities. + * - Figure 4-7 Secure read of the priority field for a Non-secure Group 1 + * interrupt. + */ +static DEFINE_STATIC_KEY_FALSE(supports_pseudo_nmis_ft2500); + +#ifndef CONFIG_ARM_GIC_V3 +/* + * Global static key controlling whether an update to PMR allowing more + * interrupts requires to be propagated to the redistributor (DSB SY). + * And this needs to be exported for modules to be able to enable + * interrupts... + */ +DEFINE_STATIC_KEY_FALSE(gic_pmr_sync); +EXPORT_SYMBOL(gic_pmr_sync); + +DEFINE_STATIC_KEY_FALSE(gic_nonsecure_priorities); +EXPORT_SYMBOL(gic_nonsecure_priorities); +#else +extern struct static_key_false gic_pmr_sync; +extern struct static_key_false gic_nonsecure_priorities; +#endif + +/* ppi_nmi_refs[n] == number of cpus having ppi[n + 16] set as NMI */ +static refcount_t *ppi_nmi_refs; + +static struct gic_kvm_info gic_v3_kvm_info; +static DEFINE_PER_CPU(bool, has_rss_ft2500); + +#define MPIDR_RS(mpidr) (((mpidr) & 0xF0UL) >> 4) +#define gic_data_rdist() (this_cpu_ptr(gic_data.rdists.rdist)) +#define gic_data_rdist_rd_base() (gic_data_rdist()->rd_base) +#define gic_data_rdist_sgi_base() (gic_data_rdist_rd_base() + SZ_64K) + +/* Our default, arbitrary priority value. Linux only uses one anyway. */ +#define DEFAULT_PMR_VALUE 0xf0 + +enum gic_intid_range { + SGI_RANGE, + PPI_RANGE, + SPI_RANGE, + EPPI_RANGE, + ESPI_RANGE, + LPI_RANGE, + __INVALID_RANGE__ +}; + +static enum gic_intid_range __get_intid_range(irq_hw_number_t hwirq) +{ + switch (hwirq) { + case 0 ... 15: + return SGI_RANGE; + case 16 ... 31: + return PPI_RANGE; + case 32 ... 1019: + return SPI_RANGE; + case EPPI_BASE_INTID ... (EPPI_BASE_INTID + 63): + return EPPI_RANGE; + case ESPI_BASE_INTID ... (ESPI_BASE_INTID + 1023): + return ESPI_RANGE; + case 8192 ... GENMASK(23, 0): + return LPI_RANGE; + default: + return __INVALID_RANGE__; + } +} + +static enum gic_intid_range get_intid_range(struct irq_data *d) +{ + return __get_intid_range(d->hwirq); +} + +static inline unsigned int gic_irq(struct irq_data *d) +{ + return d->hwirq; +} + +static inline bool gic_irq_in_rdist(struct irq_data *d) +{ + switch (get_intid_range(d)) { + case SGI_RANGE: + case PPI_RANGE: + case EPPI_RANGE: + return true; + default: + return false; + } +} + +static inline void __iomem *gic_dist_base(struct irq_data *d) +{ + switch (get_intid_range(d)) { + case SGI_RANGE: + case PPI_RANGE: + case EPPI_RANGE: + /* SGI+PPI -> SGI_base for this CPU */ + return gic_data_rdist_sgi_base(); + + case SPI_RANGE: + case ESPI_RANGE: + /* SPI -> dist_base */ + return gic_data.dist_base; + + default: + return NULL; + } +} + +static void gic_do_wait_for_rwp(void __iomem *base) +{ + u32 count = 1000000; /* 1s! */ + + while (readl_relaxed(base + GICD_CTLR) & GICD_CTLR_RWP) { + count--; + if (!count) { + pr_err_ratelimited("RWP timeout, gone fishing\n"); + return; + } + cpu_relax(); + udelay(1); + } +} + +/* Wait for completion of a distributor change */ +static void gic_dist_wait_for_rwp(void) +{ + gic_do_wait_for_rwp(gic_data.dist_base); +} + +/* Wait for completion of a redistributor change */ +static void gic_redist_wait_for_rwp(void) +{ + gic_do_wait_for_rwp(gic_data_rdist_rd_base()); +} + +#ifdef CONFIG_ARM64 + +static u64 __maybe_unused gic_read_iar(void) +{ + if (cpus_have_const_cap(ARM64_WORKAROUND_CAVIUM_23154)) + return gic_read_iar_cavium_thunderx(); + else + return gic_read_iar_common(); +} +#endif + +static void gic_enable_redist(bool enable) +{ + void __iomem *rbase; + u32 count = 1000000; /* 1s! */ + u32 val; + unsigned long mpidr; + int i; + + if (gic_data.flags & FLAGS_WORKAROUND_GICR_WAKER_MSM8996) + return; + + rbase = gic_data_rdist_rd_base(); + + val = readl_relaxed(rbase + GICR_WAKER); + if (enable) + /* Wake up this CPU redistributor */ + val &= ~GICR_WAKER_ProcessorSleep; + else + val |= GICR_WAKER_ProcessorSleep; + writel_relaxed(val, rbase + GICR_WAKER); + + if (!enable) { /* Check that GICR_WAKER is writeable */ + val = readl_relaxed(rbase + GICR_WAKER); + if (!(val & GICR_WAKER_ProcessorSleep)) + return; /* No PM support in this redistributor */ + } + + while (--count) { + val = readl_relaxed(rbase + GICR_WAKER); + if (enable ^ (bool)(val & GICR_WAKER_ChildrenAsleep)) + break; + cpu_relax(); + udelay(1); + } + if (!count) + pr_err_ratelimited("redistributor failed to %s...\n", + enable ? "wakeup" : "sleep"); + + mpidr = (unsigned long)cpu_logical_map(smp_processor_id()); + + if (mpidr & 0xFFFF) // either Aff1 or Aff0 is not zero + return; + + rbase = rbase + 64*SZ_128K; // skip 64 Redistributors + + for (i = 0; i < 4; i++) { + val = readl_relaxed(rbase + GICR_WAKER); + if (enable) + /* Wake up this CPU redistributor */ + val &= ~GICR_WAKER_ProcessorSleep; + else + val |= GICR_WAKER_ProcessorSleep; + writel_relaxed(val, rbase + GICR_WAKER); + + if (!enable) { /* Check that GICR_WAKER is writeable */ + val = readl_relaxed(rbase + GICR_WAKER); + if (!(val & GICR_WAKER_ProcessorSleep)) + return; /* No PM support in this redistributor */ + } + + count = 1000000; /* 1s! */ + while (--count) { + val = readl_relaxed(rbase + GICR_WAKER); + if (enable ^ (bool)(val & GICR_WAKER_ChildrenAsleep)) + break; + cpu_relax(); + udelay(1); + }; + if (!count) + pr_err_ratelimited("CPU MPIDR 0x%lx: redistributor %d failed to %s...\n", mpidr, 64 + i, + enable ? "wakeup" : "sleep"); + + rbase = rbase + SZ_128K; // next redistributor + } + +} + +/* + * Routines to disable, enable, EOI and route interrupts + */ +static u32 convert_offset_index(struct irq_data *d, u32 offset, u32 *index) +{ + switch (get_intid_range(d)) { + case SGI_RANGE: + case PPI_RANGE: + case SPI_RANGE: + *index = d->hwirq; + return offset; + case EPPI_RANGE: + /* + * Contrary to the ESPI range, the EPPI range is contiguous + * to the PPI range in the registers, so let's adjust the + * displacement accordingly. Consistency is overrated. + */ + *index = d->hwirq - EPPI_BASE_INTID + 32; + return offset; + case ESPI_RANGE: + *index = d->hwirq - ESPI_BASE_INTID; + switch (offset) { + case GICD_ISENABLER: + return GICD_ISENABLERnE; + case GICD_ICENABLER: + return GICD_ICENABLERnE; + case GICD_ISPENDR: + return GICD_ISPENDRnE; + case GICD_ICPENDR: + return GICD_ICPENDRnE; + case GICD_ISACTIVER: + return GICD_ISACTIVERnE; + case GICD_ICACTIVER: + return GICD_ICACTIVERnE; + case GICD_IPRIORITYR: + return GICD_IPRIORITYRnE; + case GICD_ICFGR: + return GICD_ICFGRnE; + case GICD_IROUTER: + return GICD_IROUTERnE; + default: + break; + } + break; + default: + break; + } + + WARN_ON(1); + *index = d->hwirq; + return offset; +} + +static int gic_peek_irq(struct irq_data *d, u32 offset) +{ + void __iomem *base; + u32 index, mask; + + offset = convert_offset_index(d, offset, &index); + mask = 1 << (index % 32); + + if (gic_irq_in_rdist(d)) + base = gic_data_rdist_sgi_base(); + else { + unsigned int skt; + + skt = mars3_irq_to_skt(gic_irq(d)); + base = mars3_gic_dists[skt].dist_base; + } + + return !!(readl_relaxed(base + offset + (index / 32) * 4) & mask); +} + +static void gic_poke_irq(struct irq_data *d, u32 offset) +{ + void __iomem *base; + + unsigned long mpidr; + void __iomem *rbase; + int i; + unsigned int skt; + u32 index, mask; + + offset = convert_offset_index(d, offset, &index); + mask = 1 << (index % 32); + + if (gic_irq_in_rdist(d)) { + base = gic_data_rdist_sgi_base(); + + writel_relaxed(mask, base + offset + (index / 32) * 4); + gic_redist_wait_for_rwp(); + + mpidr = (unsigned long)cpu_logical_map(smp_processor_id()); + + if ((mpidr & 0xFFFF) == 0) { // both Aff1 and Aff0 are zero + rbase = base + 64*SZ_128K; // skip 64 Redistributors + + for (i = 0; i < 4; i++) { + writel_relaxed(mask, rbase + offset + (index / 32) * 4); + gic_do_wait_for_rwp(rbase - SZ_64K); // RD from SGI base + rbase = rbase + SZ_128K; + } + } // core 0 of each socket + } else { + skt = mars3_irq_to_skt(gic_irq(d)); + base = mars3_gic_dists[skt].dist_base; + writel_relaxed(mask, base + offset + (index / 32) * 4); + gic_do_wait_for_rwp(base); + } +} + +static void gic_mask_irq(struct irq_data *d) +{ + gic_poke_irq(d, GICD_ICENABLER); +} + +static void gic_eoimode1_mask_irq(struct irq_data *d) +{ + gic_mask_irq(d); + /* + * When masking a forwarded interrupt, make sure it is + * deactivated as well. + * + * This ensures that an interrupt that is getting + * disabled/masked will not get "stuck", because there is + * noone to deactivate it (guest is being terminated). + */ + if (irqd_is_forwarded_to_vcpu(d)) + gic_poke_irq(d, GICD_ICACTIVER); +} + +static void gic_unmask_irq(struct irq_data *d) +{ + gic_poke_irq(d, GICD_ISENABLER); +} + +static inline bool gic_supports_nmi_ft2500(void) +{ + return IS_ENABLED(CONFIG_ARM64_PSEUDO_NMI) && + static_branch_likely(&supports_pseudo_nmis_ft2500); +} + +static int gic_irq_set_irqchip_state(struct irq_data *d, + enum irqchip_irq_state which, bool val) +{ + u32 reg; + + if (d->hwirq >= 8192) /* SGI/PPI/SPI only */ + return -EINVAL; + + switch (which) { + case IRQCHIP_STATE_PENDING: + reg = val ? GICD_ISPENDR : GICD_ICPENDR; + break; + + case IRQCHIP_STATE_ACTIVE: + reg = val ? GICD_ISACTIVER : GICD_ICACTIVER; + break; + + case IRQCHIP_STATE_MASKED: + reg = val ? GICD_ICENABLER : GICD_ISENABLER; + break; + + default: + return -EINVAL; + } + + gic_poke_irq(d, reg); + return 0; +} + +static int gic_irq_get_irqchip_state(struct irq_data *d, + enum irqchip_irq_state which, bool *val) +{ + if (d->hwirq >= 8192) /* PPI/SPI only */ + return -EINVAL; + + switch (which) { + case IRQCHIP_STATE_PENDING: + *val = gic_peek_irq(d, GICD_ISPENDR); + break; + + case IRQCHIP_STATE_ACTIVE: + *val = gic_peek_irq(d, GICD_ISACTIVER); + break; + + case IRQCHIP_STATE_MASKED: + *val = !gic_peek_irq(d, GICD_ISENABLER); + break; + + default: + return -EINVAL; + } + + return 0; +} + +static void gic_irq_set_prio(struct irq_data *d, u8 prio) +{ + void __iomem *base = gic_dist_base(d); + u32 offset, index; + + offset = convert_offset_index(d, GICD_IPRIORITYR, &index); + + writeb_relaxed(prio, base + offset + index); +} + +static u32 gic_get_ppi_index(struct irq_data *d) +{ + switch (get_intid_range(d)) { + case PPI_RANGE: + return d->hwirq - 16; + case EPPI_RANGE: + return d->hwirq - EPPI_BASE_INTID + 16; + default: + unreachable(); + } +} + +static int gic_irq_nmi_setup(struct irq_data *d) +{ + struct irq_desc *desc = irq_to_desc(d->irq); + + if (!gic_supports_nmi_ft2500()) + return -EINVAL; + + if (gic_peek_irq(d, GICD_ISENABLER)) { + pr_err("Cannot set NMI property of enabled IRQ %u\n", d->irq); + return -EINVAL; + } + + /* + * A secondary irq_chip should be in charge of LPI request, + * it should not be possible to get there + */ + if (WARN_ON(gic_irq(d) >= 8192)) + return -EINVAL; + + /* desc lock should already be held */ + if (gic_irq_in_rdist(d)) { + u32 idx = gic_get_ppi_index(d); + + /* Setting up PPI as NMI, only switch handler for first NMI */ + if (!refcount_inc_not_zero(&ppi_nmi_refs[idx])) { + refcount_set(&ppi_nmi_refs[idx], 1); + desc->handle_irq = handle_percpu_devid_fasteoi_nmi; + } + } else { + desc->handle_irq = handle_fasteoi_nmi; + } + + gic_irq_set_prio(d, GICD_INT_NMI_PRI); + + return 0; +} + +static void gic_irq_nmi_teardown(struct irq_data *d) +{ + struct irq_desc *desc = irq_to_desc(d->irq); + + if (WARN_ON(!gic_supports_nmi_ft2500())) + return; + + if (gic_peek_irq(d, GICD_ISENABLER)) { + pr_err("Cannot set NMI property of enabled IRQ %u\n", d->irq); + return; + } + + /* + * A secondary irq_chip should be in charge of LPI request, + * it should not be possible to get there + */ + if (WARN_ON(gic_irq(d) >= 8192)) + return; + + /* desc lock should already be held */ + if (gic_irq_in_rdist(d)) { + u32 idx = gic_get_ppi_index(d); + + /* Tearing down NMI, only switch handler for last NMI */ + if (refcount_dec_and_test(&ppi_nmi_refs[idx])) + desc->handle_irq = handle_percpu_devid_irq; + } else { + desc->handle_irq = handle_fasteoi_irq; + } + + gic_irq_set_prio(d, GICD_INT_DEF_PRI); +} + +static void gic_eoi_irq(struct irq_data *d) +{ + gic_write_eoir(gic_irq(d)); +} + +static void gic_eoimode1_eoi_irq(struct irq_data *d) +{ + /* + * No need to deactivate an LPI, or an interrupt that + * is is getting forwarded to a vcpu. + */ + if (gic_irq(d) >= 8192 || irqd_is_forwarded_to_vcpu(d)) + return; + gic_write_dir(gic_irq(d)); +} + +static int gic_set_type(struct irq_data *d, unsigned int type) +{ + enum gic_intid_range range; + unsigned int irq = gic_irq(d); + void __iomem *base; + u32 offset, index; + int ret; + + unsigned long mpidr; + int i; + void __iomem *rbase; + unsigned int skt; + range = get_intid_range(d); + /* Interrupt configuration for SGIs can't be changed */ + if (range == SGI_RANGE) + return type != IRQ_TYPE_EDGE_RISING ? -EINVAL : 0; + + /* SPIs have restrictions on the supported types */ + if ((range == SPI_RANGE || range == ESPI_RANGE) && + type != IRQ_TYPE_LEVEL_HIGH && type != IRQ_TYPE_EDGE_RISING) + return -EINVAL; + + offset = convert_offset_index(d, GICD_ICFGR, &index); + + if (gic_irq_in_rdist(d)) { + base = gic_data_rdist_sgi_base(); + ret = gic_configure_irq(index, type, base + offset, gic_redist_wait_for_rwp); + + mpidr = (unsigned long)cpu_logical_map(smp_processor_id()); + + if ((mpidr & 0xFFFF) == 0) { // both Aff1 and Aff0 are zero + rbase = base + 64*SZ_128K; // skip 64 Redistributors + + for (i = 0; i < 4; i++) { + ret = gic_configure_irq(index, type, rbase + offset, NULL); + gic_do_wait_for_rwp(rbase - SZ_64K); + rbase = rbase + SZ_128K; + } + } + } else { + skt = mars3_irq_to_skt(gic_irq(d)); + base = mars3_gic_dists[skt].dist_base; + ret = gic_configure_irq(index, type, base + offset, NULL); + gic_do_wait_for_rwp(base); + } + + + if (ret && (range == PPI_RANGE || range == EPPI_RANGE)) { + /* Misconfigured PPIs are usually not fatal */ + pr_warn("GIC: PPI INTID%d is secure or misconfigured\n", irq); + ret = 0; + } + + return ret; +} + +static int gic_irq_set_vcpu_affinity(struct irq_data *d, void *vcpu) +{ + if (get_intid_range(d) == SGI_RANGE) + return -EINVAL; + + if (vcpu) + irqd_set_forwarded_to_vcpu(d); + else + irqd_clr_forwarded_to_vcpu(d); + return 0; +} + +static u64 gic_mpidr_to_affinity(unsigned long mpidr) +{ + u64 aff; + + aff = ((u64)MPIDR_AFFINITY_LEVEL(mpidr, 3) << 32 | + MPIDR_AFFINITY_LEVEL(mpidr, 2) << 16 | + MPIDR_AFFINITY_LEVEL(mpidr, 1) << 8 | + MPIDR_AFFINITY_LEVEL(mpidr, 0)); + + return aff; +} + +static void gic_deactivate_unhandled(u32 irqnr) +{ + if (static_branch_likely(&supports_deactivate_key)) { + if (irqnr < 8192) + gic_write_dir(irqnr); + } else { + gic_write_eoir(irqnr); + } +} + +static inline void gic_handle_nmi(u32 irqnr, struct pt_regs *regs) +{ + bool irqs_enabled = interrupts_enabled(regs); + int err; + + if (irqs_enabled) + nmi_enter(); + + if (static_branch_likely(&supports_deactivate_key)) + gic_write_eoir(irqnr); + /* + * Leave the PSR.I bit set to prevent other NMIs to be + * received while handling this one. + * PSR.I will be restored when we ERET to the + * interrupted context. + */ + err = handle_domain_nmi(gic_data.domain, irqnr, regs); + if (err) + gic_deactivate_unhandled(irqnr); + + if (irqs_enabled) + nmi_exit(); +} + +static asmlinkage void __exception_irq_entry gic_handle_irq(struct pt_regs *regs) +{ + u32 irqnr; + + irqnr = gic_read_iar(); + + if (gic_supports_nmi_ft2500() && + unlikely(gic_read_rpr() == GICD_INT_NMI_PRI)) { + gic_handle_nmi(irqnr, regs); + return; + } + + if (gic_prio_masking_enabled()) { + gic_pmr_mask_irqs(); + gic_arch_enable_irqs(); + } + + /* Check for special IDs first */ + if ((irqnr >= 1020 && irqnr <= 1023)) + return; + + if (static_branch_likely(&supports_deactivate_key)) + gic_write_eoir(irqnr); + else + isb(); + + if (handle_domain_irq(gic_data.domain, irqnr, regs)) { + WARN_ONCE(true, "Unexpected interrupt received!\n"); + gic_deactivate_unhandled(irqnr); + } +} + +static u32 gic_get_pribits(void) +{ + u32 pribits; + + pribits = gic_read_ctlr(); + pribits &= ICC_CTLR_EL1_PRI_BITS_MASK; + pribits >>= ICC_CTLR_EL1_PRI_BITS_SHIFT; + pribits++; + + return pribits; +} + +static bool gic_has_group0(void) +{ + u32 val; + u32 old_pmr; + + old_pmr = gic_read_pmr(); + + /* + * Let's find out if Group0 is under control of EL3 or not by + * setting the highest possible, non-zero priority in PMR. + * + * If SCR_EL3.FIQ is set, the priority gets shifted down in + * order for the CPU interface to set bit 7, and keep the + * actual priority in the non-secure range. In the process, it + * looses the least significant bit and the actual priority + * becomes 0x80. Reading it back returns 0, indicating that + * we're don't have access to Group0. + */ + gic_write_pmr(BIT(8 - gic_get_pribits())); + val = gic_read_pmr(); + + gic_write_pmr(old_pmr); + + return val != 0; +} + +static void __init gic_dist_init(void) +{ + unsigned int i; + u64 affinity; + void __iomem *base = gic_data.dist_base; + u32 val; + + unsigned int skt; + + for (skt = 0; skt < MAX_MARS3_SOC_COUNT; skt++) { + + if ((((unsigned int)1 << skt) & mars3_sockets_bitmap) == 0) + continue; + + base = mars3_gic_dists[skt].dist_base; + + /* Disable the distributor */ + writel_relaxed(0, base + GICD_CTLR); + gic_do_wait_for_rwp(base); + + /* + * Configure SPIs as non-secure Group-1. This will only matter + * if the GIC only has a single security state. This will not + * do the right thing if the kernel is running in secure mode, + * but that's not the intended use case anyway. + */ + for (i = 32; i < GIC_LINE_NR; i += 32) + writel_relaxed(~0, base + GICD_IGROUPR + i / 8); + + /* Extended SPI range, not handled by the GICv2/GICv3 common code */ + for (i = 0; i < GIC_ESPI_NR; i += 32) { + writel_relaxed(~0U, base + GICD_ICENABLERnE + i / 8); + writel_relaxed(~0U, base + GICD_ICACTIVERnE + i / 8); + } + + for (i = 0; i < GIC_ESPI_NR; i += 32) + writel_relaxed(~0U, base + GICD_IGROUPRnE + i / 8); + + for (i = 0; i < GIC_ESPI_NR; i += 16) + writel_relaxed(0, base + GICD_ICFGRnE + i / 4); + + for (i = 0; i < GIC_ESPI_NR; i += 4) + writel_relaxed(GICD_INT_DEF_PRI_X4, base + GICD_IPRIORITYRnE + i); + + /* Now do the common stuff, and wait for the distributor to drain */ + gic_dist_config(base, GIC_LINE_NR, NULL); + gic_do_wait_for_rwp(base); // do sync outside of gic_dist_config + + val = GICD_CTLR_ARE_NS | GICD_CTLR_ENABLE_G1A | GICD_CTLR_ENABLE_G1; + if (gic_data.rdists.gicd_typer2 & GICD_TYPER2_nASSGIcap) { + pr_info("Enabling SGIs without active state\n"); + val |= GICD_CTLR_nASSGIreq; + } + + /* Enable distributor with ARE, Group1 */ + writel_relaxed(val, base + GICD_CTLR); + + /* + * Set all global interrupts to the boot CPU only. ARE must be + * enabled. + */ + affinity = gic_mpidr_to_affinity(cpu_logical_map(smp_processor_id())); + for (i = 32; i < GIC_LINE_NR; i++) + gic_write_irouter(affinity, base + GICD_IROUTER + i * 8); + + for (i = 0; i < GIC_ESPI_NR; i++) + gic_write_irouter(affinity, base + GICD_IROUTERnE + i * 8); + } +} + +static int gic_iterate_rdists(int (*fn)(struct redist_region *, void __iomem *)) +{ + int ret = -ENODEV; + int i; + + for (i = 0; i < gic_data.nr_redist_regions; i++) { + void __iomem *ptr = gic_data.redist_regions[i].redist_base; + u64 typer; + u32 reg; + + reg = readl_relaxed(ptr + GICR_PIDR2) & GIC_PIDR2_ARCH_MASK; + if (reg != GIC_PIDR2_ARCH_GICv3 && + reg != GIC_PIDR2_ARCH_GICv4) { /* We're in trouble... */ + pr_warn("No redistributor present @%p\n", ptr); + break; + } + + do { + typer = gic_read_typer(ptr + GICR_TYPER); + ret = fn(gic_data.redist_regions + i, ptr); + if (!ret) + return 0; + + if (gic_data.redist_regions[i].single_redist) + break; + + if (gic_data.redist_stride) { + ptr += gic_data.redist_stride; + } else { + ptr += SZ_64K * 2; /* Skip RD_base + SGI_base */ + if (typer & GICR_TYPER_VLPIS) + ptr += SZ_64K * 2; /* Skip VLPI_base + reserved page */ + } + } while (!(typer & GICR_TYPER_LAST)); + } + + return ret ? -ENODEV : 0; +} + +static int __gic_populate_rdist(struct redist_region *region, void __iomem *ptr) +{ + unsigned long mpidr = cpu_logical_map(smp_processor_id()); + u64 typer; + u32 aff; + u32 aff2_skt; + u32 redist_skt; + + /* + * Convert affinity to a 32bit value that can be matched to + * GICR_TYPER bits [63:32]. + */ + aff = (MPIDR_AFFINITY_LEVEL(mpidr, 1) << 8 | + MPIDR_AFFINITY_LEVEL(mpidr, 0)); + + aff2_skt = MPIDR_AFFINITY_LEVEL(mpidr, 2) & 0x7; + redist_skt = (((u64)region->phys_base >> MARS3_ADDR_SKTID_SHIFT) & 0x7); + + if (aff2_skt != redist_skt) { + return 1; + } + + typer = gic_read_typer(ptr + GICR_TYPER); + if ((typer >> 32) == aff) { + u64 offset = ptr - region->redist_base; + raw_spin_lock_init(&gic_data_rdist()->rd_lock); + gic_data_rdist_rd_base() = ptr; + gic_data_rdist()->phys_base = region->phys_base + offset; + + pr_info("CPU%d: found redistributor %lx region %d:%pa\n", + smp_processor_id(), mpidr, + (int)(region - gic_data.redist_regions), + &gic_data_rdist()->phys_base); + return 0; + } + + /* Try next one */ + return 1; +} + +static int gic_populate_rdist(void) +{ + if (gic_iterate_rdists(__gic_populate_rdist) == 0) + return 0; + + /* We couldn't even deal with ourselves... */ + WARN(true, "CPU%d: mpidr %lx has no re-distributor!\n", + smp_processor_id(), + (unsigned long)cpu_logical_map(smp_processor_id())); + return -ENODEV; +} + +static int __gic_update_rdist_properties(struct redist_region *region, + void __iomem *ptr) +{ + u64 typer = gic_read_typer(ptr + GICR_TYPER); + + gic_data.rdists.has_vlpis &= !!(typer & GICR_TYPER_VLPIS); + + /* RVPEID implies some form of DirectLPI, no matter what the doc says... :-/ */ + gic_data.rdists.has_rvpeid &= !!(typer & GICR_TYPER_RVPEID); + gic_data.rdists.has_direct_lpi &= (!!(typer & GICR_TYPER_DirectLPIS) | + gic_data.rdists.has_rvpeid); + gic_data.rdists.has_vpend_valid_dirty &= !!(typer & GICR_TYPER_DIRTY); + + /* Detect non-sensical configurations */ + if (WARN_ON_ONCE(gic_data.rdists.has_rvpeid && !gic_data.rdists.has_vlpis)) { + gic_data.rdists.has_direct_lpi = false; + gic_data.rdists.has_vlpis = false; + gic_data.rdists.has_rvpeid = false; + } + + gic_data.ppi_nr = min(GICR_TYPER_NR_PPIS(typer), gic_data.ppi_nr); + + return 1; +} + +static void gic_update_rdist_properties(void) +{ + gic_data.ppi_nr = UINT_MAX; + gic_iterate_rdists(__gic_update_rdist_properties); + if (WARN_ON(gic_data.ppi_nr == UINT_MAX)) + gic_data.ppi_nr = 0; + pr_info("%d PPIs implemented\n", gic_data.ppi_nr); + if (gic_data.rdists.has_vlpis) + pr_info("GICv4 features: %s%s%s\n", + gic_data.rdists.has_direct_lpi ? "DirectLPI " : "", + gic_data.rdists.has_rvpeid ? "RVPEID " : "", + gic_data.rdists.has_vpend_valid_dirty ? "Valid+Dirty " : ""); +} + +/* Check whether it's single security state view */ +static inline bool gic_dist_security_disabled(void) +{ + return readl_relaxed(gic_data.dist_base + GICD_CTLR) & GICD_CTLR_DS; +} + +static void gic_cpu_sys_reg_init(void) +{ + int i, cpu = smp_processor_id(); + u64 mpidr = cpu_logical_map(cpu); + u64 need_rss = MPIDR_RS(mpidr); + bool group0; + u32 pribits; + + /* + * Need to check that the SRE bit has actually been set. If + * not, it means that SRE is disabled at EL2. We're going to + * die painfully, and there is nothing we can do about it. + * + * Kindly inform the luser. + */ + if (!gic_enable_sre()) + pr_err("GIC: unable to set SRE (disabled at EL2), panic ahead\n"); + + pribits = gic_get_pribits(); + + group0 = gic_has_group0(); + + /* Set priority mask register */ + if (!gic_prio_masking_enabled()) { + write_gicreg(DEFAULT_PMR_VALUE, ICC_PMR_EL1); + } else if (gic_supports_nmi_ft2500()) { + /* + * Mismatch configuration with boot CPU, the system is likely + * to die as interrupt masking will not work properly on all + * CPUs + * + * The boot CPU calls this function before enabling NMI support, + * and as a result we'll never see this warning in the boot path + * for that CPU. + */ + if (static_branch_unlikely(&gic_nonsecure_priorities)) + WARN_ON(!group0 || gic_dist_security_disabled()); + else + WARN_ON(group0 && !gic_dist_security_disabled()); + } + + /* + * Some firmwares hand over to the kernel with the BPR changed from + * its reset value (and with a value large enough to prevent + * any pre-emptive interrupts from working at all). Writing a zero + * to BPR restores is reset value. + */ + gic_write_bpr1(0); + + if (static_branch_likely(&supports_deactivate_key)) { + /* EOI drops priority only (mode 1) */ + gic_write_ctlr(ICC_CTLR_EL1_EOImode_drop); + } else { + /* EOI deactivates interrupt too (mode 0) */ + gic_write_ctlr(ICC_CTLR_EL1_EOImode_drop_dir); + } + + /* Always whack Group0 before Group1 */ + if (group0) { + switch (pribits) { + case 8: + case 7: + write_gicreg(0, ICC_AP0R3_EL1); + write_gicreg(0, ICC_AP0R2_EL1); + fallthrough; + case 6: + write_gicreg(0, ICC_AP0R1_EL1); + fallthrough; + case 5: + case 4: + write_gicreg(0, ICC_AP0R0_EL1); + } + + isb(); + } + + switch (pribits) { + case 8: + case 7: + write_gicreg(0, ICC_AP1R3_EL1); + write_gicreg(0, ICC_AP1R2_EL1); + fallthrough; + case 6: + write_gicreg(0, ICC_AP1R1_EL1); + fallthrough; + case 5: + case 4: + write_gicreg(0, ICC_AP1R0_EL1); + } + + isb(); + + /* ... and let's hit the road... */ + gic_write_grpen1(1); + + /* Keep the RSS capability status in per_cpu variable */ + per_cpu(has_rss_ft2500, cpu) = !!(gic_read_ctlr() & ICC_CTLR_EL1_RSS); + + /* Check all the CPUs have capable of sending SGIs to other CPUs */ + for_each_online_cpu(i) { + bool have_rss = per_cpu(has_rss_ft2500, i) && per_cpu(has_rss_ft2500, cpu); + + need_rss |= MPIDR_RS(cpu_logical_map(i)); + if (need_rss && (!have_rss)) + pr_crit("CPU%d (%lx) can't SGI CPU%d (%lx), no RSS\n", + cpu, (unsigned long)mpidr, + i, (unsigned long)cpu_logical_map(i)); + } + + /** + * GIC spec says, when ICC_CTLR_EL1.RSS==1 and GICD_TYPER.RSS==0, + * writing ICC_ASGI1R_EL1 register with RS != 0 is a CONSTRAINED + * UNPREDICTABLE choice of : + * - The write is ignored. + * - The RS field is treated as 0. + */ + if (need_rss && (!gic_data.has_rss)) + pr_crit_once("RSS is required but GICD doesn't support it\n"); +} + +static bool gicv3_nolpi; + +static int __init gicv3_nolpi_cfg(char *buf) +{ + return strtobool(buf, &gicv3_nolpi); +} +early_param("irqchip.gicv3_nolpi", gicv3_nolpi_cfg); + +static int gic_dist_supports_lpis(void) +{ + return (IS_ENABLED(CONFIG_ARM_GIC_V3_ITS) && + !!(readl_relaxed(gic_data.dist_base + GICD_TYPER) & GICD_TYPER_LPIS) && + !gicv3_nolpi); +} + +static void gic_cpu_init(void) +{ + void __iomem *rbase; + int i; + unsigned long mpidr; + + /* Register ourselves with the rest of the world */ + if (gic_populate_rdist()) + return; + + gic_enable_redist(true); + + WARN((gic_data.ppi_nr > 16 || GIC_ESPI_NR != 0) && + !(gic_read_ctlr() & ICC_CTLR_EL1_ExtRange), + "Distributor has extended ranges, but CPU%d doesn't\n", + smp_processor_id()); + + rbase = gic_data_rdist_sgi_base(); + + /* Configure SGIs/PPIs as non-secure Group-1 */ + for (i = 0; i < gic_data.ppi_nr + 16; i += 32) + writel_relaxed(~0, rbase + GICR_IGROUPR0 + i / 8); + + gic_cpu_config(rbase, gic_data.ppi_nr + 16, gic_redist_wait_for_rwp); + + mpidr = (unsigned long)cpu_logical_map(smp_processor_id()); + + if ((mpidr & 0xFFFF) == 0) { // both Aff1 and Aff0 is zero + rbase = rbase + 64*SZ_128K; // skip 64 Redistributors + + for (i = 0; i < 4; i++) { + /* Configure SGIs/PPIs as non-secure Group-1 */ + writel_relaxed(~0, rbase + GICR_IGROUPR0); + + gic_cpu_config(rbase, gic_data.ppi_nr + 16, NULL); + gic_do_wait_for_rwp(rbase - SZ_64K); + + rbase = rbase + SZ_128K; + + } + + } + + /* initialise system registers */ + gic_cpu_sys_reg_init(); +} + +#ifdef CONFIG_SMP + +#define MPIDR_TO_SGI_RS(mpidr) (MPIDR_RS(mpidr) << ICC_SGI1R_RS_SHIFT) +#define MPIDR_TO_SGI_CLUSTER_ID(mpidr) ((mpidr) & ~0xFUL) + +static int gic_starting_cpu(unsigned int cpu) +{ + gic_cpu_init(); + + if (gic_dist_supports_lpis()) + phytium_its_cpu_init(); + + return 0; +} + +static u16 gic_compute_target_list(int *base_cpu, const struct cpumask *mask, + unsigned long cluster_id) +{ + int next_cpu, cpu = *base_cpu; + unsigned long mpidr = cpu_logical_map(cpu); + u16 tlist = 0; + + while (cpu < nr_cpu_ids) { + tlist |= 1 << (mpidr & 0xf); + + next_cpu = cpumask_next(cpu, mask); + if (next_cpu >= nr_cpu_ids) + goto out; + cpu = next_cpu; + + mpidr = cpu_logical_map(cpu); + + if (cluster_id != MPIDR_TO_SGI_CLUSTER_ID(mpidr)) { + cpu--; + goto out; + } + } +out: + *base_cpu = cpu; + return tlist; +} + +#define MPIDR_TO_SGI_AFFINITY(cluster_id, level) \ + (MPIDR_AFFINITY_LEVEL(cluster_id, level) \ + << ICC_SGI1R_AFFINITY_## level ##_SHIFT) + +static void gic_send_sgi(u64 cluster_id, u16 tlist, unsigned int irq) +{ + u64 val; + + val = (MPIDR_TO_SGI_AFFINITY(cluster_id, 3) | + MPIDR_TO_SGI_AFFINITY(cluster_id, 2) | + irq << ICC_SGI1R_SGI_ID_SHIFT | + MPIDR_TO_SGI_AFFINITY(cluster_id, 1) | + MPIDR_TO_SGI_RS(cluster_id) | + tlist << ICC_SGI1R_TARGET_LIST_SHIFT); + + pr_devel("CPU%d: ICC_SGI1R_EL1 %llx\n", smp_processor_id(), val); + gic_write_sgi1r(val); +} + +static void gic_ipi_send_mask(struct irq_data *d, const struct cpumask *mask) +{ + int cpu; + + if (WARN_ON(d->hwirq >= 16)) + return; + + /* + * Ensure that stores to Normal memory are visible to the + * other CPUs before issuing the IPI. + */ + wmb(); + + for_each_cpu(cpu, mask) { + u64 cluster_id = MPIDR_TO_SGI_CLUSTER_ID(cpu_logical_map(cpu)); + u16 tlist; + + tlist = gic_compute_target_list(&cpu, mask, cluster_id); + gic_send_sgi(cluster_id, tlist, d->hwirq); + } + + /* Force the above writes to ICC_SGI1R_EL1 to be executed */ + isb(); +} + +static void __init gic_smp_init(void) +{ + struct irq_fwspec sgi_fwspec = { + .fwnode = gic_data.fwnode, + .param_count = 1, + }; + int base_sgi; + + cpuhp_setup_state_nocalls(CPUHP_AP_IRQ_GIC_STARTING, + "irqchip/arm/gicv3:starting", + gic_starting_cpu, NULL); + + /* Register all 8 non-secure SGIs */ + base_sgi = __irq_domain_alloc_irqs(gic_data.domain, -1, 8, + NUMA_NO_NODE, &sgi_fwspec, + false, NULL); + if (WARN_ON(base_sgi <= 0)) + return; + + set_smp_ipi_range(base_sgi, 8); +} + +static int gic_cpumask_select(struct irq_data *d, const struct cpumask *mask_val) +{ + unsigned int skt, irq_skt, i; + unsigned int cpu, cpus = 0; + + unsigned int skt_cpu_cnt[MAX_MARS3_SOC_COUNT] = {0}; + + for (i = 0; i < nr_cpu_ids; i++) { + skt = (cpu_logical_map(i) >> 16) & 0xff; + if ((skt >= 0) && (skt < MAX_MARS3_SOC_COUNT)) { + skt_cpu_cnt[skt]++; + } else if (0xff != skt) { + pr_err("socket address: %d is out of range.", skt); + } + } + + irq_skt = mars3_irq_to_skt(gic_irq(d)); + + if (0 != irq_skt) { + for (i = 0; i < irq_skt; i++) { + cpus += skt_cpu_cnt[i]; + } + } + + cpu = cpumask_any_and(mask_val, cpu_online_mask); + if ((cpu > cpus) && (cpu < (cpus + skt_cpu_cnt[irq_skt]))) { + cpus = cpu; + } + + return cpus; +} + +static int gic_set_affinity(struct irq_data *d, const struct cpumask *mask_val, + bool force) +{ + unsigned int cpu; + u32 offset, index; + void __iomem *reg; + int enabled; + u64 val; + unsigned int skt; + + if (force) + cpu = cpumask_first(mask_val); + else + cpu = gic_cpumask_select(d, mask_val); + + if (cpu >= nr_cpu_ids) + return -EINVAL; + + if (gic_irq_in_rdist(d)) + return -EINVAL; + + /* If interrupt was enabled, disable it first */ + enabled = gic_peek_irq(d, GICD_ISENABLER); + if (enabled) + gic_mask_irq(d); + + offset = convert_offset_index(d, GICD_IROUTER, &index); + + skt = mars3_irq_to_skt(gic_irq(d)); + reg = mars3_gic_dists[skt].dist_base + offset + GICD_IROUTER + (index * 8); + val = gic_mpidr_to_affinity(cpu_logical_map(cpu)); + + gic_write_irouter(val, reg); + + /* + * If the interrupt was enabled, enabled it again. Otherwise, + * just wait for the distributor to have digested our changes. + */ + if (enabled) + gic_unmask_irq(d); + else + gic_dist_wait_for_rwp(); + + irq_data_update_effective_affinity(d, cpumask_of(cpu)); + + return IRQ_SET_MASK_OK_DONE; +} +#else +#define gic_set_affinity NULL +#define gic_ipi_send_mask NULL +#define gic_smp_init() do { } while (0) +#endif + +static int gic_retrigger(struct irq_data *data) +{ + return !gic_irq_set_irqchip_state(data, IRQCHIP_STATE_PENDING, true); +} + +#ifdef CONFIG_CPU_PM +static int gic_cpu_pm_notifier(struct notifier_block *self, + unsigned long cmd, void *v) +{ + if (cmd == CPU_PM_EXIT) { + if (gic_dist_security_disabled()) + gic_enable_redist(true); + gic_cpu_sys_reg_init(); + } else if (cmd == CPU_PM_ENTER && gic_dist_security_disabled()) { + gic_write_grpen1(0); + gic_enable_redist(false); + } + return NOTIFY_OK; +} + +static struct notifier_block gic_cpu_pm_notifier_block = { + .notifier_call = gic_cpu_pm_notifier, +}; + +static void gic_cpu_pm_init(void) +{ + cpu_pm_register_notifier(&gic_cpu_pm_notifier_block); +} + +#else +static inline void gic_cpu_pm_init(void) { } +#endif /* CONFIG_CPU_PM */ + +static struct irq_chip gic_chip = { + .name = "GIC-phytium-2500", + .irq_mask = gic_mask_irq, + .irq_unmask = gic_unmask_irq, + .irq_eoi = gic_eoi_irq, + .irq_set_type = gic_set_type, + .irq_set_affinity = gic_set_affinity, + .irq_retrigger = gic_retrigger, + .irq_get_irqchip_state = gic_irq_get_irqchip_state, + .irq_set_irqchip_state = gic_irq_set_irqchip_state, + .irq_nmi_setup = gic_irq_nmi_setup, + .irq_nmi_teardown = gic_irq_nmi_teardown, + .ipi_send_mask = gic_ipi_send_mask, + .flags = IRQCHIP_SET_TYPE_MASKED | + IRQCHIP_SKIP_SET_WAKE | + IRQCHIP_MASK_ON_SUSPEND, +}; + +static struct irq_chip gic_eoimode1_chip = { + .name = "GICv3-phytium-2500", + .irq_mask = gic_eoimode1_mask_irq, + .irq_unmask = gic_unmask_irq, + .irq_eoi = gic_eoimode1_eoi_irq, + .irq_set_type = gic_set_type, + .irq_set_affinity = gic_set_affinity, + .irq_retrigger = gic_retrigger, + .irq_get_irqchip_state = gic_irq_get_irqchip_state, + .irq_set_irqchip_state = gic_irq_set_irqchip_state, + .irq_set_vcpu_affinity = gic_irq_set_vcpu_affinity, + .irq_nmi_setup = gic_irq_nmi_setup, + .irq_nmi_teardown = gic_irq_nmi_teardown, + .ipi_send_mask = gic_ipi_send_mask, + .flags = IRQCHIP_SET_TYPE_MASKED | + IRQCHIP_SKIP_SET_WAKE | + IRQCHIP_MASK_ON_SUSPEND, +}; + +static int gic_irq_domain_map(struct irq_domain *d, unsigned int irq, + irq_hw_number_t hw) +{ + struct irq_chip *chip = &gic_chip; + struct irq_data *irqd = irq_desc_get_irq_data(irq_to_desc(irq)); + + if (static_branch_likely(&supports_deactivate_key)) + chip = &gic_eoimode1_chip; + + switch (__get_intid_range(hw)) { + case SGI_RANGE: + irq_set_percpu_devid(irq); + irq_domain_set_info(d, irq, hw, chip, d->host_data, + handle_percpu_devid_fasteoi_ipi, + NULL, NULL); + break; + + case PPI_RANGE: + case EPPI_RANGE: + irq_set_percpu_devid(irq); + irq_domain_set_info(d, irq, hw, chip, d->host_data, + handle_percpu_devid_irq, NULL, NULL); + break; + + case SPI_RANGE: + case ESPI_RANGE: + irq_domain_set_info(d, irq, hw, chip, d->host_data, + handle_fasteoi_irq, NULL, NULL); + irq_set_probe(irq); + irqd_set_single_target(irqd); + break; + + case LPI_RANGE: + if (!gic_dist_supports_lpis()) + return -EPERM; + irq_domain_set_info(d, irq, hw, chip, d->host_data, + handle_fasteoi_irq, NULL, NULL); + break; + + default: + return -EPERM; + } + + /* Prevents SW retriggers which mess up the ACK/EOI ordering */ + irqd_set_handle_enforce_irqctx(irqd); + return 0; +} + +static int gic_irq_domain_translate(struct irq_domain *d, + struct irq_fwspec *fwspec, + unsigned long *hwirq, + unsigned int *type) +{ + if (fwspec->param_count == 1 && fwspec->param[0] < 16) { + *hwirq = fwspec->param[0]; + *type = IRQ_TYPE_EDGE_RISING; + return 0; + } + + if (is_of_node(fwspec->fwnode)) { + if (fwspec->param_count < 3) + return -EINVAL; + + switch (fwspec->param[0]) { + case 0: /* SPI */ + *hwirq = fwspec->param[1] + 32; + break; + case 1: /* PPI */ + *hwirq = fwspec->param[1] + 16; + break; + case 2: /* ESPI */ + *hwirq = fwspec->param[1] + ESPI_BASE_INTID; + break; + case 3: /* EPPI */ + *hwirq = fwspec->param[1] + EPPI_BASE_INTID; + break; + case GIC_IRQ_TYPE_LPI: /* LPI */ + *hwirq = fwspec->param[1]; + break; + case GIC_IRQ_TYPE_PARTITION: + *hwirq = fwspec->param[1]; + if (fwspec->param[1] >= 16) + *hwirq += EPPI_BASE_INTID - 16; + else + *hwirq += 16; + break; + default: + return -EINVAL; + } + + *type = fwspec->param[2] & IRQ_TYPE_SENSE_MASK; + + /* + * Make it clear that broken DTs are... broken. + * Partitionned PPIs are an unfortunate exception. + */ + WARN_ON(*type == IRQ_TYPE_NONE && + fwspec->param[0] != GIC_IRQ_TYPE_PARTITION); + return 0; + } + + if (is_fwnode_irqchip(fwspec->fwnode)) { + if (fwspec->param_count != 2) + return -EINVAL; + + *hwirq = fwspec->param[0]; + *type = fwspec->param[1]; + + WARN_ON(*type == IRQ_TYPE_NONE); + return 0; + } + + return -EINVAL; +} + +static int gic_irq_domain_alloc(struct irq_domain *domain, unsigned int virq, + unsigned int nr_irqs, void *arg) +{ + int i, ret; + irq_hw_number_t hwirq; + unsigned int type = IRQ_TYPE_NONE; + struct irq_fwspec *fwspec = arg; + + ret = gic_irq_domain_translate(domain, fwspec, &hwirq, &type); + if (ret) + return ret; + + for (i = 0; i < nr_irqs; i++) { + ret = gic_irq_domain_map(domain, virq + i, hwirq + i); + if (ret) + return ret; + } + + return 0; +} + +static void gic_irq_domain_free(struct irq_domain *domain, unsigned int virq, + unsigned int nr_irqs) +{ + int i; + + for (i = 0; i < nr_irqs; i++) { + struct irq_data *d = irq_domain_get_irq_data(domain, virq + i); + irq_set_handler(virq + i, NULL); + irq_domain_reset_irq_data(d); + } +} + +static int gic_irq_domain_select(struct irq_domain *d, + struct irq_fwspec *fwspec, + enum irq_domain_bus_token bus_token) +{ + /* Not for us */ + if (fwspec->fwnode != d->fwnode) + return 0; + + /* If this is not DT, then we have a single domain */ + if (!is_of_node(fwspec->fwnode)) + return 1; + + /* + * If this is a PPI and we have a 4th (non-null) parameter, + * then we need to match the partition domain. + */ + if (fwspec->param_count >= 4 && + fwspec->param[0] == 1 && fwspec->param[3] != 0 && + gic_data.ppi_descs) + return d == partition_get_domain(gic_data.ppi_descs[fwspec->param[1]]); + + return d == gic_data.domain; +} + +static const struct irq_domain_ops gic_irq_domain_ops = { + .translate = gic_irq_domain_translate, + .alloc = gic_irq_domain_alloc, + .free = gic_irq_domain_free, + .select = gic_irq_domain_select, +}; + +static int partition_domain_translate(struct irq_domain *d, + struct irq_fwspec *fwspec, + unsigned long *hwirq, + unsigned int *type) +{ + struct device_node *np; + int ret; + + if (!gic_data.ppi_descs) + return -ENOMEM; + + np = of_find_node_by_phandle(fwspec->param[3]); + if (WARN_ON(!np)) + return -EINVAL; + + ret = partition_translate_id(gic_data.ppi_descs[fwspec->param[1]], + of_node_to_fwnode(np)); + if (ret < 0) + return ret; + + *hwirq = ret; + *type = fwspec->param[2] & IRQ_TYPE_SENSE_MASK; + + return 0; +} + +static const struct irq_domain_ops partition_domain_ops = { + .translate = partition_domain_translate, + .select = gic_irq_domain_select, +}; + +static bool gic_enable_quirk_msm8996(void *data) +{ + struct gic_chip_data *d = data; + + d->flags |= FLAGS_WORKAROUND_GICR_WAKER_MSM8996; + + return true; +} + +static bool gic_enable_quirk_cavium_38539(void *data) +{ + struct gic_chip_data *d = data; + + d->flags |= FLAGS_WORKAROUND_CAVIUM_ERRATUM_38539; + + return true; +} + +static bool gic_enable_quirk_hip06_07(void *data) +{ + struct gic_chip_data *d = data; + + /* + * HIP06 GICD_IIDR clashes with GIC-600 product number (despite + * not being an actual ARM implementation). The saving grace is + * that GIC-600 doesn't have ESPI, so nothing to do in that case. + * HIP07 doesn't even have a proper IIDR, and still pretends to + * have ESPI. In both cases, put them right. + */ + if (d->rdists.gicd_typer & GICD_TYPER_ESPI) { + /* Zero both ESPI and the RES0 field next to it... */ + d->rdists.gicd_typer &= ~GENMASK(9, 8); + return true; + } + + return false; +} + +static const struct gic_quirk gic_quirks[] = { + { + .desc = "GICv3: Qualcomm MSM8996 broken firmware", + .compatible = "qcom,msm8996-gic-v3", + .init = gic_enable_quirk_msm8996, + }, + { + .desc = "GICv3: HIP06 erratum 161010803", + .iidr = 0x0204043b, + .mask = 0xffffffff, + .init = gic_enable_quirk_hip06_07, + }, + { + .desc = "GICv3: HIP07 erratum 161010803", + .iidr = 0x00000000, + .mask = 0xffffffff, + .init = gic_enable_quirk_hip06_07, + }, + { + /* + * Reserved register accesses generate a Synchronous + * External Abort. This erratum applies to: + * - ThunderX: CN88xx + * - OCTEON TX: CN83xx, CN81xx + * - OCTEON TX2: CN93xx, CN96xx, CN98xx, CNF95xx* + */ + .desc = "GICv3: Cavium erratum 38539", + .iidr = 0xa000034c, + .mask = 0xe8f00fff, + .init = gic_enable_quirk_cavium_38539, + }, + { + } +}; + +static void gic_enable_nmi_support(void) +{ + int i; + + if (!gic_prio_masking_enabled()) + return; + + ppi_nmi_refs = kcalloc(gic_data.ppi_nr, sizeof(*ppi_nmi_refs), GFP_KERNEL); + if (!ppi_nmi_refs) + return; + + for (i = 0; i < gic_data.ppi_nr; i++) + refcount_set(&ppi_nmi_refs[i], 0); + + /* + * Linux itself doesn't use 1:N distribution, so has no need to + * set PMHE. The only reason to have it set is if EL3 requires it + * (and we can't change it). + */ + if (gic_read_ctlr() & ICC_CTLR_EL1_PMHE_MASK) + static_branch_enable(&gic_pmr_sync); + + pr_info("Pseudo-NMIs enabled using %s ICC_PMR_EL1 synchronisation\n", + static_branch_unlikely(&gic_pmr_sync) ? "forced" : "relaxed"); + + /* + * How priority values are used by the GIC depends on two things: + * the security state of the GIC (controlled by the GICD_CTRL.DS bit) + * and if Group 0 interrupts can be delivered to Linux in the non-secure + * world as FIQs (controlled by the SCR_EL3.FIQ bit). These affect the + * the ICC_PMR_EL1 register and the priority that software assigns to + * interrupts: + * + * GICD_CTRL.DS | SCR_EL3.FIQ | ICC_PMR_EL1 | Group 1 priority + * ----------------------------------------------------------- + * 1 | - | unchanged | unchanged + * ----------------------------------------------------------- + * 0 | 1 | non-secure | non-secure + * ----------------------------------------------------------- + * 0 | 0 | unchanged | non-secure + * + * where non-secure means that the value is right-shifted by one and the + * MSB bit set, to make it fit in the non-secure priority range. + * + * In the first two cases, where ICC_PMR_EL1 and the interrupt priority + * are both either modified or unchanged, we can use the same set of + * priorities. + * + * In the last case, where only the interrupt priorities are modified to + * be in the non-secure range, we use a different PMR value to mask IRQs + * and the rest of the values that we use remain unchanged. + */ + if (gic_has_group0() && !gic_dist_security_disabled()) + static_branch_enable(&gic_nonsecure_priorities); + + static_branch_enable(&supports_pseudo_nmis_ft2500); + + if (static_branch_likely(&supports_deactivate_key)) + gic_eoimode1_chip.flags |= IRQCHIP_SUPPORTS_NMI; + else + gic_chip.flags |= IRQCHIP_SUPPORTS_NMI; +} + +static int __init gic_init_bases(void __iomem *dist_base, + struct redist_region *rdist_regs, + u32 nr_redist_regions, + u64 redist_stride, + struct fwnode_handle *handle) +{ + u32 typer; + int err; + + if (!is_hyp_mode_available()) + static_branch_disable(&supports_deactivate_key); + + if (static_branch_likely(&supports_deactivate_key)) + pr_info("GIC: Using split EOI/Deactivate mode\n"); + + gic_data.fwnode = handle; + gic_data.dist_base = dist_base; + gic_data.redist_regions = rdist_regs; + gic_data.nr_redist_regions = nr_redist_regions; + gic_data.redist_stride = redist_stride; + + /* + * Find out how many interrupts are supported. + */ + typer = readl_relaxed(gic_data.dist_base + GICD_TYPER); + gic_data.rdists.gicd_typer = typer; + + gic_enable_quirks(readl_relaxed(gic_data.dist_base + GICD_IIDR), + gic_quirks, &gic_data); + + pr_info("%d SPIs implemented\n", GIC_LINE_NR - 32); + pr_info("%d Extended SPIs implemented\n", GIC_ESPI_NR); + + /* + * ThunderX1 explodes on reading GICD_TYPER2, in violation of the + * architecture spec (which says that reserved registers are RES0). + */ + if (!(gic_data.flags & FLAGS_WORKAROUND_CAVIUM_ERRATUM_38539)) + gic_data.rdists.gicd_typer2 = readl_relaxed(gic_data.dist_base + GICD_TYPER2); + + gic_data.domain = irq_domain_create_tree(handle, &gic_irq_domain_ops, + &gic_data); + gic_data.rdists.rdist = alloc_percpu(typeof(*gic_data.rdists.rdist)); + gic_data.rdists.has_rvpeid = true; + gic_data.rdists.has_vlpis = true; + gic_data.rdists.has_direct_lpi = true; + gic_data.rdists.has_vpend_valid_dirty = true; + + if (WARN_ON(!gic_data.domain) || WARN_ON(!gic_data.rdists.rdist)) { + err = -ENOMEM; + goto out_free; + } + + irq_domain_update_bus_token(gic_data.domain, DOMAIN_BUS_WIRED); + + gic_data.has_rss = !!(typer & GICD_TYPER_RSS); + pr_info("Distributor has %sRange Selector support\n", + gic_data.has_rss ? "" : "no "); + + if (typer & GICD_TYPER_MBIS) { + err = mbi_init(handle, gic_data.domain); + if (err) + pr_err("Failed to initialize MBIs\n"); + } + + set_handle_irq(gic_handle_irq); + + gic_update_rdist_properties(); + + gic_dist_init(); + gic_cpu_init(); + gic_smp_init(); + gic_cpu_pm_init(); + + if (gic_dist_supports_lpis()) { + phytium_its_init(handle, &gic_data.rdists, gic_data.domain); + phytium_its_cpu_init(); + } else { + if (IS_ENABLED(CONFIG_ARM_GIC_V2M)) + gicv2m_init(handle, gic_data.domain); + } + + gic_enable_nmi_support(); + + return 0; + +out_free: + if (gic_data.domain) + irq_domain_remove(gic_data.domain); + free_percpu(gic_data.rdists.rdist); + return err; +} + +static int __init gic_validate_dist_version(void __iomem *dist_base) +{ + u32 reg = readl_relaxed(dist_base + GICD_PIDR2) & GIC_PIDR2_ARCH_MASK; + + if (reg != GIC_PIDR2_ARCH_GICv3 && reg != GIC_PIDR2_ARCH_GICv4) + return -ENODEV; + + return 0; +} + +/* Create all possible partitions at boot time */ +static void __init gic_populate_ppi_partitions(struct device_node *gic_node) +{ + struct device_node *parts_node, *child_part; + int part_idx = 0, i; + int nr_parts; + struct partition_affinity *parts; + + parts_node = of_get_child_by_name(gic_node, "ppi-partitions"); + if (!parts_node) + return; + + gic_data.ppi_descs = kcalloc(gic_data.ppi_nr, sizeof(*gic_data.ppi_descs), GFP_KERNEL); + if (!gic_data.ppi_descs) + return; + + nr_parts = of_get_child_count(parts_node); + + if (!nr_parts) + goto out_put_node; + + parts = kcalloc(nr_parts, sizeof(*parts), GFP_KERNEL); + if (WARN_ON(!parts)) + goto out_put_node; + + for_each_child_of_node(parts_node, child_part) { + struct partition_affinity *part; + int n; + + part = &parts[part_idx]; + + part->partition_id = of_node_to_fwnode(child_part); + + pr_info("GIC: PPI partition %pOFn[%d] { ", + child_part, part_idx); + + n = of_property_count_elems_of_size(child_part, "affinity", + sizeof(u32)); + WARN_ON(n <= 0); + + for (i = 0; i < n; i++) { + int err, cpu; + u32 cpu_phandle; + struct device_node *cpu_node; + + err = of_property_read_u32_index(child_part, "affinity", + i, &cpu_phandle); + if (WARN_ON(err)) + continue; + + cpu_node = of_find_node_by_phandle(cpu_phandle); + if (WARN_ON(!cpu_node)) + continue; + + cpu = of_cpu_node_to_id(cpu_node); + if (WARN_ON(cpu < 0)) + continue; + + pr_cont("%pOF[%d] ", cpu_node, cpu); + + cpumask_set_cpu(cpu, &part->mask); + } + + pr_cont("}\n"); + part_idx++; + } + + for (i = 0; i < gic_data.ppi_nr; i++) { + unsigned int irq; + struct partition_desc *desc; + struct irq_fwspec ppi_fwspec = { + .fwnode = gic_data.fwnode, + .param_count = 3, + .param = { + [0] = GIC_IRQ_TYPE_PARTITION, + [1] = i, + [2] = IRQ_TYPE_NONE, + }, + }; + + irq = irq_create_fwspec_mapping(&ppi_fwspec); + if (WARN_ON(!irq)) + continue; + desc = partition_create_desc(gic_data.fwnode, parts, nr_parts, + irq, &partition_domain_ops); + if (WARN_ON(!desc)) + continue; + + gic_data.ppi_descs[i] = desc; + } + +out_put_node: + of_node_put(parts_node); +} + +static void __init gic_of_setup_kvm_info(struct device_node *node) +{ + int ret; + struct resource r; + u32 gicv_idx; + + gic_v3_kvm_info.type = GIC_V3; + + gic_v3_kvm_info.maint_irq = irq_of_parse_and_map(node, 0); + if (!gic_v3_kvm_info.maint_irq) + return; + + if (of_property_read_u32(node, "#redistributor-regions", + &gicv_idx)) + gicv_idx = 1; + + gicv_idx += 3; /* Also skip GICD, GICC, GICH */ + ret = of_address_to_resource(node, gicv_idx, &r); + if (!ret) + gic_v3_kvm_info.vcpu = r; + + gic_v3_kvm_info.has_v4 = gic_data.rdists.has_vlpis; + gic_v3_kvm_info.has_v4_1 = gic_data.rdists.has_rvpeid; + gic_set_kvm_info(&gic_v3_kvm_info); +} + +static int __init gic_of_init(struct device_node *node, struct device_node *parent) +{ + void __iomem *dist_base; + struct redist_region *rdist_regs; + u64 redist_stride; + u32 nr_redist_regions; + int err, i; + struct resource res; + unsigned long skt; + + dist_base = of_iomap(node, 0); + if (!dist_base) { + pr_err("%pOF: unable to map gic dist registers\n", node); + return -ENXIO; + } + + err = gic_validate_dist_version(dist_base); + if (err) { + pr_err("%pOF: no distributor detected, giving up\n", node); + goto out_unmap_dist; + } + + if (of_address_to_resource(node, 0, &res)) { + printk("Error: No GIC Distributor in FDT\n"); + goto out_unmap_dist; + } + + mars3_gic_dists[0].phys_base = res.start; + mars3_gic_dists[0].size = resource_size(&res); + mars3_gic_dists[0].dist_base = dist_base; + + if (of_property_read_u32(node, "#mars3_soc_bitmap", &mars3_sockets_bitmap)) + mars3_sockets_bitmap = 0x1; + + + for (skt = 1; skt < MAX_MARS3_SOC_COUNT; skt++) { + if ((((unsigned int)1 << skt) & mars3_sockets_bitmap) == 0) + continue; + + mars3_gic_dists[skt].phys_base = ((unsigned long)skt << MARS3_ADDR_SKTID_SHIFT) | mars3_gic_dists[0].phys_base; + mars3_gic_dists[skt].size = mars3_gic_dists[0].size; + mars3_gic_dists[skt].dist_base = ioremap(mars3_gic_dists[skt].phys_base, mars3_gic_dists[skt].size); + + } + + if (of_property_read_u32(node, "#redistributor-regions", &nr_redist_regions)) + nr_redist_regions = 1; + + rdist_regs = kcalloc(nr_redist_regions, sizeof(*rdist_regs), + GFP_KERNEL); + if (!rdist_regs) { + err = -ENOMEM; + goto out_unmap_dist; + } + + for (i = 0; i < nr_redist_regions; i++) { + struct resource res; + int ret; + + ret = of_address_to_resource(node, 1 + i, &res); + rdist_regs[i].redist_base = of_iomap(node, 1 + i); + if (ret || !rdist_regs[i].redist_base) { + pr_err("%pOF: couldn't map region %d\n", node, i); + err = -ENODEV; + goto out_unmap_rdist; + } + rdist_regs[i].phys_base = res.start; + } + + if (of_property_read_u64(node, "redistributor-stride", &redist_stride)) + redist_stride = 0; + + gic_enable_of_quirks(node, gic_quirks, &gic_data); + + err = gic_init_bases(dist_base, rdist_regs, nr_redist_regions, + redist_stride, &node->fwnode); + if (err) + goto out_unmap_rdist; + + gic_populate_ppi_partitions(node); + + if (static_branch_likely(&supports_deactivate_key)) + gic_of_setup_kvm_info(node); + return 0; + +out_unmap_rdist: + for (i = 0; i < nr_redist_regions; i++) + if (rdist_regs[i].redist_base) + iounmap(rdist_regs[i].redist_base); + kfree(rdist_regs); +out_unmap_dist: + iounmap(dist_base); + return err; +} + +IRQCHIP_DECLARE(gic_phyt_2500, "arm,gic-phytium-2500", gic_of_init); + +#ifdef CONFIG_ACPI +static struct +{ + void __iomem *dist_base; + struct redist_region *redist_regs; + u32 nr_redist_regions; + bool single_redist; + int enabled_rdists; + u32 maint_irq; + int maint_irq_mode; + phys_addr_t vcpu_base; +} acpi_data __initdata; + +static int gic_mars3_sockets_bitmap(void) +{ + unsigned int skt, i; + int skt_bitmap = 0; + + unsigned int skt_cpu_cnt[MAX_MARS3_SOC_COUNT] = {0}; + + for (i = 0; i < nr_cpu_ids; i++) { + skt = (cpu_logical_map(i) >> 16) & 0xff; + if ((skt >= 0) && (skt < MAX_MARS3_SOC_COUNT)) { + skt_cpu_cnt[skt]++; + } else if (0xff != skt) { + pr_err("socket address: %d is out of range.", skt); + } + } + + for (i = 0; i < MAX_MARS3_SOC_COUNT; i++) { + if (skt_cpu_cnt[i] > 0) + skt_bitmap |= (1 << i); + } + + return skt_bitmap; +} + +static void __init +gic_acpi_register_redist(phys_addr_t phys_base, void __iomem *redist_base) +{ + static int count = 0; + + acpi_data.redist_regs[count].phys_base = phys_base; + acpi_data.redist_regs[count].redist_base = redist_base; + acpi_data.redist_regs[count].single_redist = acpi_data.single_redist; + count++; +} + +static int __init +gic_acpi_parse_madt_redist(union acpi_subtable_headers *header, + const unsigned long end) +{ + struct acpi_madt_generic_redistributor *redist = + (struct acpi_madt_generic_redistributor *)header; + void __iomem *redist_base; + + redist_base = ioremap(redist->base_address, redist->length); + if (!redist_base) { + pr_err("Couldn't map GICR region @%llx\n", redist->base_address); + return -ENOMEM; + } + + gic_acpi_register_redist(redist->base_address, redist_base); + return 0; +} + +static int __init +gic_acpi_parse_madt_gicc(union acpi_subtable_headers *header, + const unsigned long end) +{ + struct acpi_madt_generic_interrupt *gicc = + (struct acpi_madt_generic_interrupt *)header; + u32 reg = readl_relaxed(acpi_data.dist_base + GICD_PIDR2) & GIC_PIDR2_ARCH_MASK; + u32 size = reg == GIC_PIDR2_ARCH_GICv4 ? SZ_64K * 4 : SZ_64K * 2; + void __iomem *redist_base; + + /* GICC entry which has !ACPI_MADT_ENABLED is not unusable so skip */ + if (!(gicc->flags & ACPI_MADT_ENABLED)) + return 0; + + redist_base = ioremap(gicc->gicr_base_address, size); + if (!redist_base) + return -ENOMEM; + + gic_acpi_register_redist(gicc->gicr_base_address, redist_base); + return 0; +} + +static int __init gic_acpi_collect_gicr_base(void) +{ + acpi_tbl_entry_handler redist_parser; + enum acpi_madt_type type; + + if (acpi_data.single_redist) { + type = ACPI_MADT_TYPE_GENERIC_INTERRUPT; + redist_parser = gic_acpi_parse_madt_gicc; + } else { + type = ACPI_MADT_TYPE_GENERIC_REDISTRIBUTOR; + redist_parser = gic_acpi_parse_madt_redist; + } + + /* Collect redistributor base addresses in GICR entries */ + if (acpi_table_parse_madt(type, redist_parser, 0) > 0) + return 0; + + pr_info("No valid GICR entries exist\n"); + return -ENODEV; +} + +static int __init gic_acpi_match_gicr(union acpi_subtable_headers *header, + const unsigned long end) +{ + /* Subtable presence means that redist exists, that's it */ + return 0; +} + +static int __init gic_acpi_match_gicc(union acpi_subtable_headers *header, + const unsigned long end) +{ + struct acpi_madt_generic_interrupt *gicc = + (struct acpi_madt_generic_interrupt *)header; + + /* + * If GICC is enabled and has valid gicr base address, then it means + * GICR base is presented via GICC + */ + if ((gicc->flags & ACPI_MADT_ENABLED) && gicc->gicr_base_address) { + acpi_data.enabled_rdists++; + return 0; + } + + /* + * It's perfectly valid firmware can pass disabled GICC entry, driver + * should not treat as errors, skip the entry instead of probe fail. + */ + if (!(gicc->flags & ACPI_MADT_ENABLED)) + return 0; + + return -ENODEV; +} + +static int __init gic_acpi_count_gicr_regions(void) +{ + int count; + + /* + * Count how many redistributor regions we have. It is not allowed + * to mix redistributor description, GICR and GICC subtables have to be + * mutually exclusive. + */ + count = acpi_table_parse_madt(ACPI_MADT_TYPE_GENERIC_REDISTRIBUTOR, + gic_acpi_match_gicr, 0); + if (count > 0) { + acpi_data.single_redist = false; + return count; + } + + count = acpi_table_parse_madt(ACPI_MADT_TYPE_GENERIC_INTERRUPT, + gic_acpi_match_gicc, 0); + if (count > 0) { + acpi_data.single_redist = true; + count = acpi_data.enabled_rdists; + } + + return count; +} + +static bool __init acpi_validate_gic_table(struct acpi_subtable_header *header, + struct acpi_probe_entry *ape) +{ + struct acpi_madt_generic_distributor *dist; + int count; + + dist = (struct acpi_madt_generic_distributor *)header; + if (dist->version != ape->driver_data) + return false; + + /* We need to do that exercise anyway, the sooner the better */ + count = gic_acpi_count_gicr_regions(); + if (count <= 0) + return false; + + acpi_data.nr_redist_regions = count; + return true; +} + +static int __init gic_acpi_parse_virt_madt_gicc(union acpi_subtable_headers *header, + const unsigned long end) +{ + struct acpi_madt_generic_interrupt *gicc = + (struct acpi_madt_generic_interrupt *)header; + int maint_irq_mode; + static int first_madt = true; + + /* Skip unusable CPUs */ + if (!(gicc->flags & ACPI_MADT_ENABLED)) + return 0; + + maint_irq_mode = (gicc->flags & ACPI_MADT_VGIC_IRQ_MODE) ? + ACPI_EDGE_SENSITIVE : ACPI_LEVEL_SENSITIVE; + + if (first_madt) { + first_madt = false; + + acpi_data.maint_irq = gicc->vgic_interrupt; + acpi_data.maint_irq_mode = maint_irq_mode; + acpi_data.vcpu_base = gicc->gicv_base_address; + + return 0; + } + + /* + * The maintenance interrupt and GICV should be the same for every CPU + */ + if ((acpi_data.maint_irq != gicc->vgic_interrupt) || + (acpi_data.maint_irq_mode != maint_irq_mode) || + (acpi_data.vcpu_base != gicc->gicv_base_address)) + return -EINVAL; + + return 0; +} + +static bool __init gic_acpi_collect_virt_info(void) +{ + int count; + + count = acpi_table_parse_madt(ACPI_MADT_TYPE_GENERIC_INTERRUPT, + gic_acpi_parse_virt_madt_gicc, 0); + + return (count > 0); +} + +#define ACPI_GICV3_DIST_MEM_SIZE (SZ_64K) +#define ACPI_GICV2_VCTRL_MEM_SIZE (SZ_4K) +#define ACPI_GICV2_VCPU_MEM_SIZE (SZ_8K) + +static void __init gic_acpi_setup_kvm_info(void) +{ + int irq; + + if (!gic_acpi_collect_virt_info()) { + pr_warn("Unable to get hardware information used for virtualization\n"); + return; + } + + gic_v3_kvm_info.type = GIC_V3; + + irq = acpi_register_gsi(NULL, acpi_data.maint_irq, + acpi_data.maint_irq_mode, + ACPI_ACTIVE_HIGH); + if (irq <= 0) + return; + + gic_v3_kvm_info.maint_irq = irq; + + if (acpi_data.vcpu_base) { + struct resource *vcpu = &gic_v3_kvm_info.vcpu; + + vcpu->flags = IORESOURCE_MEM; + vcpu->start = acpi_data.vcpu_base; + vcpu->end = vcpu->start + ACPI_GICV2_VCPU_MEM_SIZE - 1; + } + + gic_v3_kvm_info.has_v4 = gic_data.rdists.has_vlpis; + gic_v3_kvm_info.has_v4_1 = gic_data.rdists.has_rvpeid; + gic_set_kvm_info(&gic_v3_kvm_info); +} + +static int __init +gic_acpi_init(union acpi_subtable_headers *header, const unsigned long end) +{ + struct acpi_madt_generic_distributor *dist; + struct fwnode_handle *domain_handle; + size_t size; + int i, err; + int skt; + + /* Get distributor base address */ + dist = (struct acpi_madt_generic_distributor *)header; + acpi_data.dist_base = ioremap(dist->base_address, + ACPI_GICV3_DIST_MEM_SIZE); + if (!acpi_data.dist_base) { + pr_err("Unable to map GICD registers\n"); + return -ENOMEM; + } + + err = gic_validate_dist_version(acpi_data.dist_base); + if (err) { + pr_err("No distributor detected at @%p, giving up\n", + acpi_data.dist_base); + goto out_dist_unmap; + } + + mars3_gic_dists[0].phys_base = dist->base_address; + mars3_gic_dists[0].size = ACPI_GICV3_DIST_MEM_SIZE; + mars3_gic_dists[0].dist_base = acpi_data.dist_base; + +#ifdef CONFIG_ACPI + mars3_sockets_bitmap = gic_mars3_sockets_bitmap(); + if (mars3_sockets_bitmap == 0) { + mars3_sockets_bitmap = 0x1; + pr_err("No socket, please check cpus MPIDR_AFFINITY_LEVEL!!!"); + } else + printk("mars3_sockets_bitmap = 0x%x\n", mars3_sockets_bitmap); +#endif + + for (skt = 1; skt < MAX_MARS3_SOC_COUNT; skt++) { + if ((((unsigned int)1 << skt) & mars3_sockets_bitmap) == 0) + continue; + + mars3_gic_dists[skt].phys_base = ((unsigned long)skt << MARS3_ADDR_SKTID_SHIFT) | mars3_gic_dists[0].phys_base; + mars3_gic_dists[skt].size = mars3_gic_dists[0].size; + mars3_gic_dists[skt].dist_base = ioremap(mars3_gic_dists[skt].phys_base, mars3_gic_dists[skt].size); + + } + + size = sizeof(*acpi_data.redist_regs) * acpi_data.nr_redist_regions; + acpi_data.redist_regs = kzalloc(size, GFP_KERNEL); + if (!acpi_data.redist_regs) { + err = -ENOMEM; + goto out_dist_unmap; + } + + err = gic_acpi_collect_gicr_base(); + if (err) + goto out_redist_unmap; + + domain_handle = irq_domain_alloc_fwnode(&dist->base_address); + if (!domain_handle) { + err = -ENOMEM; + goto out_redist_unmap; + } + + err = gic_init_bases(acpi_data.dist_base, acpi_data.redist_regs, + acpi_data.nr_redist_regions, 0, domain_handle); + if (err) + goto out_fwhandle_free; + + acpi_set_irq_model(ACPI_IRQ_MODEL_GIC, domain_handle); + + if (static_branch_likely(&supports_deactivate_key)) + gic_acpi_setup_kvm_info(); + + return 0; + +out_fwhandle_free: + irq_domain_free_fwnode(domain_handle); +out_redist_unmap: + for (i = 0; i < acpi_data.nr_redist_regions; i++) + if (acpi_data.redist_regs[i].redist_base) + iounmap(acpi_data.redist_regs[i].redist_base); + kfree(acpi_data.redist_regs); +out_dist_unmap: + iounmap(acpi_data.dist_base); + return err; +} +IRQCHIP_ACPI_DECLARE(gic_phyt_2500, ACPI_MADT_TYPE_PHYTIUM_2500, + acpi_validate_gic_table, ACPI_MADT_GIC_VERSION_V3, + gic_acpi_init); +#endif diff --git a/include/acpi/actbl2.h b/include/acpi/actbl2.h index e44230e4aff9..a1f33b091801 100644 --- a/include/acpi/actbl2.h +++ b/include/acpi/actbl2.h @@ -518,7 +518,8 @@ enum acpi_madt_type { ACPI_MADT_TYPE_GENERIC_MSI_FRAME = 13, ACPI_MADT_TYPE_GENERIC_REDISTRIBUTOR = 14, ACPI_MADT_TYPE_GENERIC_TRANSLATOR = 15, - ACPI_MADT_TYPE_RESERVED = 16 /* 16 and greater are reserved */ + ACPI_MADT_TYPE_RESERVED = 16, /* 16 and greater are reserved */ + ACPI_MADT_TYPE_PHYTIUM_2500 = 128 };
/* diff --git a/include/linux/irqchip/arm-gic-phytium-2500.h b/include/linux/irqchip/arm-gic-phytium-2500.h new file mode 100644 index 000000000000..96c5ca83f44d --- /dev/null +++ b/include/linux/irqchip/arm-gic-phytium-2500.h @@ -0,0 +1,725 @@ +/* + * Copyright (C) 2020 Phytium Corporation. + * Author: Wang Yinfeng wangyinfeng@phytium.com.cn + * Chen Baozi chenbaozi@phytium.com.cn + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see http://www.gnu.org/licenses/. + */ +#ifndef __LINUX_IRQCHIP_ARM_GIC_PHYTIUM_2500_H +#define __LINUX_IRQCHIP_ARM_GIC_PHYTIUM_2500_H + +/* + * Distributor registers. We assume we're running non-secure, with ARE + * being set. Secure-only and non-ARE registers are not described. + */ +#define GICD_CTLR 0x0000 +#define GICD_TYPER 0x0004 +#define GICD_IIDR 0x0008 +#define GICD_TYPER2 0x000C +#define GICD_STATUSR 0x0010 +#define GICD_SETSPI_NSR 0x0040 +#define GICD_CLRSPI_NSR 0x0048 +#define GICD_SETSPI_SR 0x0050 +#define GICD_CLRSPI_SR 0x0058 +#define GICD_IGROUPR 0x0080 +#define GICD_ISENABLER 0x0100 +#define GICD_ICENABLER 0x0180 +#define GICD_ISPENDR 0x0200 +#define GICD_ICPENDR 0x0280 +#define GICD_ISACTIVER 0x0300 +#define GICD_ICACTIVER 0x0380 +#define GICD_IPRIORITYR 0x0400 +#define GICD_ICFGR 0x0C00 +#define GICD_IGRPMODR 0x0D00 +#define GICD_NSACR 0x0E00 +#define GICD_IGROUPRnE 0x1000 +#define GICD_ISENABLERnE 0x1200 +#define GICD_ICENABLERnE 0x1400 +#define GICD_ISPENDRnE 0x1600 +#define GICD_ICPENDRnE 0x1800 +#define GICD_ISACTIVERnE 0x1A00 +#define GICD_ICACTIVERnE 0x1C00 +#define GICD_IPRIORITYRnE 0x2000 +#define GICD_ICFGRnE 0x3000 +#define GICD_IROUTER 0x6000 +#define GICD_IROUTERnE 0x8000 +#define GICD_IDREGS 0xFFD0 +#define GICD_PIDR2 0xFFE8 + +#define ESPI_BASE_INTID 4096 + +/* + * Those registers are actually from GICv2, but the spec demands that they + * are implemented as RES0 if ARE is 1 (which we do in KVM's emulated GICv3). + */ +#define GICD_ITARGETSR 0x0800 +#define GICD_SGIR 0x0F00 +#define GICD_CPENDSGIR 0x0F10 +#define GICD_SPENDSGIR 0x0F20 + +#define GICD_CTLR_RWP (1U << 31) +#define GICD_CTLR_nASSGIreq (1U << 8) +#define GICD_CTLR_DS (1U << 6) +#define GICD_CTLR_ARE_NS (1U << 4) +#define GICD_CTLR_ENABLE_G1A (1U << 1) +#define GICD_CTLR_ENABLE_G1 (1U << 0) + +#define GICD_IIDR_IMPLEMENTER_SHIFT 0 +#define GICD_IIDR_IMPLEMENTER_MASK (0xfff << GICD_IIDR_IMPLEMENTER_SHIFT) +#define GICD_IIDR_REVISION_SHIFT 12 +#define GICD_IIDR_REVISION_MASK (0xf << GICD_IIDR_REVISION_SHIFT) +#define GICD_IIDR_VARIANT_SHIFT 16 +#define GICD_IIDR_VARIANT_MASK (0xf << GICD_IIDR_VARIANT_SHIFT) +#define GICD_IIDR_PRODUCT_ID_SHIFT 24 +#define GICD_IIDR_PRODUCT_ID_MASK (0xff << GICD_IIDR_PRODUCT_ID_SHIFT) + + +/* + * In systems with a single security state (what we emulate in KVM) + * the meaning of the interrupt group enable bits is slightly different + */ +#define GICD_CTLR_ENABLE_SS_G1 (1U << 1) +#define GICD_CTLR_ENABLE_SS_G0 (1U << 0) + +#define GICD_TYPER_RSS (1U << 26) +#define GICD_TYPER_LPIS (1U << 17) +#define GICD_TYPER_MBIS (1U << 16) +#define GICD_TYPER_ESPI (1U << 8) + +#define GICD_TYPER_ID_BITS(typer) ((((typer) >> 19) & 0x1f) + 1) +#define GICD_TYPER_NUM_LPIS(typer) ((((typer) >> 11) & 0x1f) + 1) +#define GICD_TYPER_SPIS(typer) ((((typer) & 0x1f) + 1) * 32) +#define GICD_TYPER_ESPIS(typer) \ + (((typer) & GICD_TYPER_ESPI) ? GICD_TYPER_SPIS((typer) >> 27) : 0) + +#define GICD_TYPER2_nASSGIcap (1U << 8) +#define GICD_TYPER2_VIL (1U << 7) +#define GICD_TYPER2_VID GENMASK(4, 0) + +#define GICD_IROUTER_SPI_MODE_ONE (0U << 31) +#define GICD_IROUTER_SPI_MODE_ANY (1U << 31) + +#define GIC_PIDR2_ARCH_MASK 0xf0 +#define GIC_PIDR2_ARCH_GICv3 0x30 +#define GIC_PIDR2_ARCH_GICv4 0x40 + +#define GIC_V3_DIST_SIZE 0x10000 + +#define GIC_PAGE_SIZE_4K 0ULL +#define GIC_PAGE_SIZE_16K 1ULL +#define GIC_PAGE_SIZE_64K 2ULL +#define GIC_PAGE_SIZE_MASK 3ULL + +/* + * Re-Distributor registers, offsets from RD_base + */ +#define GICR_CTLR GICD_CTLR +#define GICR_IIDR 0x0004 +#define GICR_TYPER 0x0008 +#define GICR_STATUSR GICD_STATUSR +#define GICR_WAKER 0x0014 +#define GICR_SETLPIR 0x0040 +#define GICR_CLRLPIR 0x0048 +#define GICR_PROPBASER 0x0070 +#define GICR_PENDBASER 0x0078 +#define GICR_INVLPIR 0x00A0 +#define GICR_INVALLR 0x00B0 +#define GICR_SYNCR 0x00C0 +#define GICR_IDREGS GICD_IDREGS +#define GICR_PIDR2 GICD_PIDR2 + +#define GICR_CTLR_ENABLE_LPIS (1UL << 0) +#define GICR_CTLR_RWP (1UL << 3) + +#define GICR_TYPER_CPU_NUMBER(r) (((r) >> 8) & 0xffff) + +#define EPPI_BASE_INTID 1056 + +#define GICR_TYPER_NR_PPIS(r) \ + ({ \ + unsigned int __ppinum = ((r) >> 27) & 0x1f; \ + unsigned int __nr_ppis = 16; \ + if (__ppinum == 1 || __ppinum == 2) \ + __nr_ppis += __ppinum * 32; \ + \ + __nr_ppis; \ + }) + +#define GICR_WAKER_ProcessorSleep (1U << 1) +#define GICR_WAKER_ChildrenAsleep (1U << 2) + +#define GIC_BASER_CACHE_nCnB 0ULL +#define GIC_BASER_CACHE_SameAsInner 0ULL +#define GIC_BASER_CACHE_nC 1ULL +#define GIC_BASER_CACHE_RaWt 2ULL +#define GIC_BASER_CACHE_RaWb 3ULL +#define GIC_BASER_CACHE_WaWt 4ULL +#define GIC_BASER_CACHE_WaWb 5ULL +#define GIC_BASER_CACHE_RaWaWt 6ULL +#define GIC_BASER_CACHE_RaWaWb 7ULL +#define GIC_BASER_CACHE_MASK 7ULL +#define GIC_BASER_NonShareable 0ULL +#define GIC_BASER_InnerShareable 1ULL +#define GIC_BASER_OuterShareable 2ULL +#define GIC_BASER_SHAREABILITY_MASK 3ULL + +#define GIC_BASER_CACHEABILITY(reg, inner_outer, type) \ + (GIC_BASER_CACHE_##type << reg##_##inner_outer##_CACHEABILITY_SHIFT) + +#define GIC_BASER_SHAREABILITY(reg, type) \ + (GIC_BASER_##type << reg##_SHAREABILITY_SHIFT) + +/* encode a size field of width @w containing @n - 1 units */ +#define GIC_ENCODE_SZ(n, w) (((unsigned long)(n) - 1) & GENMASK_ULL(((w) - 1), 0)) + +#define GICR_PROPBASER_SHAREABILITY_SHIFT (10) +#define GICR_PROPBASER_INNER_CACHEABILITY_SHIFT (7) +#define GICR_PROPBASER_OUTER_CACHEABILITY_SHIFT (56) +#define GICR_PROPBASER_SHAREABILITY_MASK \ + GIC_BASER_SHAREABILITY(GICR_PROPBASER, SHAREABILITY_MASK) +#define GICR_PROPBASER_INNER_CACHEABILITY_MASK \ + GIC_BASER_CACHEABILITY(GICR_PROPBASER, INNER, MASK) +#define GICR_PROPBASER_OUTER_CACHEABILITY_MASK \ + GIC_BASER_CACHEABILITY(GICR_PROPBASER, OUTER, MASK) +#define GICR_PROPBASER_CACHEABILITY_MASK GICR_PROPBASER_INNER_CACHEABILITY_MASK + +#define GICR_PROPBASER_InnerShareable \ + GIC_BASER_SHAREABILITY(GICR_PROPBASER, InnerShareable) + +#define GICR_PROPBASER_nCnB GIC_BASER_CACHEABILITY(GICR_PROPBASER, INNER, nCnB) +#define GICR_PROPBASER_nC GIC_BASER_CACHEABILITY(GICR_PROPBASER, INNER, nC) +#define GICR_PROPBASER_RaWt GIC_BASER_CACHEABILITY(GICR_PROPBASER, INNER, RaWt) +#define GICR_PROPBASER_RaWb GIC_BASER_CACHEABILITY(GICR_PROPBASER, INNER, RaWb) +#define GICR_PROPBASER_WaWt GIC_BASER_CACHEABILITY(GICR_PROPBASER, INNER, WaWt) +#define GICR_PROPBASER_WaWb GIC_BASER_CACHEABILITY(GICR_PROPBASER, INNER, WaWb) +#define GICR_PROPBASER_RaWaWt GIC_BASER_CACHEABILITY(GICR_PROPBASER, INNER, RaWaWt) +#define GICR_PROPBASER_RaWaWb GIC_BASER_CACHEABILITY(GICR_PROPBASER, INNER, RaWaWb) + +#define GICR_PROPBASER_IDBITS_MASK (0x1f) +#define GICR_PROPBASER_ADDRESS(x) ((x) & GENMASK_ULL(51, 12)) +#define GICR_PENDBASER_ADDRESS(x) ((x) & GENMASK_ULL(51, 16)) + +#define GICR_PENDBASER_SHAREABILITY_SHIFT (10) +#define GICR_PENDBASER_INNER_CACHEABILITY_SHIFT (7) +#define GICR_PENDBASER_OUTER_CACHEABILITY_SHIFT (56) +#define GICR_PENDBASER_SHAREABILITY_MASK \ + GIC_BASER_SHAREABILITY(GICR_PENDBASER, SHAREABILITY_MASK) +#define GICR_PENDBASER_INNER_CACHEABILITY_MASK \ + GIC_BASER_CACHEABILITY(GICR_PENDBASER, INNER, MASK) +#define GICR_PENDBASER_OUTER_CACHEABILITY_MASK \ + GIC_BASER_CACHEABILITY(GICR_PENDBASER, OUTER, MASK) +#define GICR_PENDBASER_CACHEABILITY_MASK GICR_PENDBASER_INNER_CACHEABILITY_MASK + +#define GICR_PENDBASER_InnerShareable \ + GIC_BASER_SHAREABILITY(GICR_PENDBASER, InnerShareable) + +#define GICR_PENDBASER_nCnB GIC_BASER_CACHEABILITY(GICR_PENDBASER, INNER, nCnB) +#define GICR_PENDBASER_nC GIC_BASER_CACHEABILITY(GICR_PENDBASER, INNER, nC) +#define GICR_PENDBASER_RaWt GIC_BASER_CACHEABILITY(GICR_PENDBASER, INNER, RaWt) +#define GICR_PENDBASER_RaWb GIC_BASER_CACHEABILITY(GICR_PENDBASER, INNER, RaWb) +#define GICR_PENDBASER_WaWt GIC_BASER_CACHEABILITY(GICR_PENDBASER, INNER, WaWt) +#define GICR_PENDBASER_WaWb GIC_BASER_CACHEABILITY(GICR_PENDBASER, INNER, WaWb) +#define GICR_PENDBASER_RaWaWt GIC_BASER_CACHEABILITY(GICR_PENDBASER, INNER, RaWaWt) +#define GICR_PENDBASER_RaWaWb GIC_BASER_CACHEABILITY(GICR_PENDBASER, INNER, RaWaWb) + +#define GICR_PENDBASER_PTZ BIT_ULL(62) + +/* + * Re-Distributor registers, offsets from SGI_base + */ +#define GICR_IGROUPR0 GICD_IGROUPR +#define GICR_ISENABLER0 GICD_ISENABLER +#define GICR_ICENABLER0 GICD_ICENABLER +#define GICR_ISPENDR0 GICD_ISPENDR +#define GICR_ICPENDR0 GICD_ICPENDR +#define GICR_ISACTIVER0 GICD_ISACTIVER +#define GICR_ICACTIVER0 GICD_ICACTIVER +#define GICR_IPRIORITYR0 GICD_IPRIORITYR +#define GICR_ICFGR0 GICD_ICFGR +#define GICR_IGRPMODR0 GICD_IGRPMODR +#define GICR_NSACR GICD_NSACR + +#define GICR_TYPER_PLPIS (1U << 0) +#define GICR_TYPER_VLPIS (1U << 1) +#define GICR_TYPER_DIRTY (1U << 2) +#define GICR_TYPER_DirectLPIS (1U << 3) +#define GICR_TYPER_LAST (1U << 4) +#define GICR_TYPER_RVPEID (1U << 7) +#define GICR_TYPER_COMMON_LPI_AFF GENMASK_ULL(25, 24) +#define GICR_TYPER_AFFINITY GENMASK_ULL(63, 32) + +#define GICR_INVLPIR_INTID GENMASK_ULL(31, 0) +#define GICR_INVLPIR_VPEID GENMASK_ULL(47, 32) +#define GICR_INVLPIR_V GENMASK_ULL(63, 63) + +#define GICR_INVALLR_VPEID GICR_INVLPIR_VPEID +#define GICR_INVALLR_V GICR_INVLPIR_V + +#define GIC_V3_REDIST_SIZE 0x20000 + +#define LPI_PROP_GROUP1 (1 << 1) +#define LPI_PROP_ENABLED (1 << 0) + +/* + * Re-Distributor registers, offsets from VLPI_base + */ +#define GICR_VPROPBASER 0x0070 + +#define GICR_VPROPBASER_IDBITS_MASK 0x1f + +#define GICR_VPROPBASER_SHAREABILITY_SHIFT (10) +#define GICR_VPROPBASER_INNER_CACHEABILITY_SHIFT (7) +#define GICR_VPROPBASER_OUTER_CACHEABILITY_SHIFT (56) + +#define GICR_VPROPBASER_SHAREABILITY_MASK \ + GIC_BASER_SHAREABILITY(GICR_VPROPBASER, SHAREABILITY_MASK) +#define GICR_VPROPBASER_INNER_CACHEABILITY_MASK \ + GIC_BASER_CACHEABILITY(GICR_VPROPBASER, INNER, MASK) +#define GICR_VPROPBASER_OUTER_CACHEABILITY_MASK \ + GIC_BASER_CACHEABILITY(GICR_VPROPBASER, OUTER, MASK) +#define GICR_VPROPBASER_CACHEABILITY_MASK \ + GICR_VPROPBASER_INNER_CACHEABILITY_MASK + +#define GICR_VPROPBASER_InnerShareable \ + GIC_BASER_SHAREABILITY(GICR_VPROPBASER, InnerShareable) + +#define GICR_VPROPBASER_nCnB GIC_BASER_CACHEABILITY(GICR_VPROPBASER, INNER, nCnB) +#define GICR_VPROPBASER_nC GIC_BASER_CACHEABILITY(GICR_VPROPBASER, INNER, nC) +#define GICR_VPROPBASER_RaWt GIC_BASER_CACHEABILITY(GICR_VPROPBASER, INNER, RaWt) +#define GICR_VPROPBASER_RaWb GIC_BASER_CACHEABILITY(GICR_VPROPBASER, INNER, RaWb) +#define GICR_VPROPBASER_WaWt GIC_BASER_CACHEABILITY(GICR_VPROPBASER, INNER, WaWt) +#define GICR_VPROPBASER_WaWb GIC_BASER_CACHEABILITY(GICR_VPROPBASER, INNER, WaWb) +#define GICR_VPROPBASER_RaWaWt GIC_BASER_CACHEABILITY(GICR_VPROPBASER, INNER, RaWaWt) +#define GICR_VPROPBASER_RaWaWb GIC_BASER_CACHEABILITY(GICR_VPROPBASER, INNER, RaWaWb) + +/* + * GICv4.1 VPROPBASER reinvention. A subtle mix between the old + * VPROPBASER and ITS_BASER. Just not quite any of the two. + */ +#define GICR_VPROPBASER_4_1_VALID (1ULL << 63) +#define GICR_VPROPBASER_4_1_ENTRY_SIZE GENMASK_ULL(61, 59) +#define GICR_VPROPBASER_4_1_INDIRECT (1ULL << 55) +#define GICR_VPROPBASER_4_1_PAGE_SIZE GENMASK_ULL(54, 53) +#define GICR_VPROPBASER_4_1_Z (1ULL << 52) +#define GICR_VPROPBASER_4_1_ADDR GENMASK_ULL(51, 12) +#define GICR_VPROPBASER_4_1_SIZE GENMASK_ULL(6, 0) + +#define GICR_VPENDBASER 0x0078 + +#define GICR_VPENDBASER_SHAREABILITY_SHIFT (10) +#define GICR_VPENDBASER_INNER_CACHEABILITY_SHIFT (7) +#define GICR_VPENDBASER_OUTER_CACHEABILITY_SHIFT (56) +#define GICR_VPENDBASER_SHAREABILITY_MASK \ + GIC_BASER_SHAREABILITY(GICR_VPENDBASER, SHAREABILITY_MASK) +#define GICR_VPENDBASER_INNER_CACHEABILITY_MASK \ + GIC_BASER_CACHEABILITY(GICR_VPENDBASER, INNER, MASK) +#define GICR_VPENDBASER_OUTER_CACHEABILITY_MASK \ + GIC_BASER_CACHEABILITY(GICR_VPENDBASER, OUTER, MASK) +#define GICR_VPENDBASER_CACHEABILITY_MASK \ + GICR_VPENDBASER_INNER_CACHEABILITY_MASK + +#define GICR_VPENDBASER_NonShareable \ + GIC_BASER_SHAREABILITY(GICR_VPENDBASER, NonShareable) + +#define GICR_VPENDBASER_InnerShareable \ + GIC_BASER_SHAREABILITY(GICR_VPENDBASER, InnerShareable) + +#define GICR_VPENDBASER_nCnB GIC_BASER_CACHEABILITY(GICR_VPENDBASER, INNER, nCnB) +#define GICR_VPENDBASER_nC GIC_BASER_CACHEABILITY(GICR_VPENDBASER, INNER, nC) +#define GICR_VPENDBASER_RaWt GIC_BASER_CACHEABILITY(GICR_VPENDBASER, INNER, RaWt) +#define GICR_VPENDBASER_RaWb GIC_BASER_CACHEABILITY(GICR_VPENDBASER, INNER, RaWb) +#define GICR_VPENDBASER_WaWt GIC_BASER_CACHEABILITY(GICR_VPENDBASER, INNER, WaWt) +#define GICR_VPENDBASER_WaWb GIC_BASER_CACHEABILITY(GICR_VPENDBASER, INNER, WaWb) +#define GICR_VPENDBASER_RaWaWt GIC_BASER_CACHEABILITY(GICR_VPENDBASER, INNER, RaWaWt) +#define GICR_VPENDBASER_RaWaWb GIC_BASER_CACHEABILITY(GICR_VPENDBASER, INNER, RaWaWb) + +#define GICR_VPENDBASER_Dirty (1ULL << 60) +#define GICR_VPENDBASER_PendingLast (1ULL << 61) +#define GICR_VPENDBASER_IDAI (1ULL << 62) +#define GICR_VPENDBASER_Valid (1ULL << 63) + +/* + * GICv4.1 VPENDBASER, used for VPE residency. On top of these fields, + * also use the above Valid, PendingLast and Dirty. + */ +#define GICR_VPENDBASER_4_1_DB (1ULL << 62) +#define GICR_VPENDBASER_4_1_VGRP0EN (1ULL << 59) +#define GICR_VPENDBASER_4_1_VGRP1EN (1ULL << 58) +#define GICR_VPENDBASER_4_1_VPEID GENMASK_ULL(15, 0) + +#define GICR_VSGIR 0x0080 + +#define GICR_VSGIR_VPEID GENMASK(15, 0) + +#define GICR_VSGIPENDR 0x0088 + +#define GICR_VSGIPENDR_BUSY (1U << 31) +#define GICR_VSGIPENDR_PENDING GENMASK(15, 0) + +/* + * ITS registers, offsets from ITS_base + */ +#define GITS_CTLR 0x0000 +#define GITS_IIDR 0x0004 +#define GITS_TYPER 0x0008 +#define GITS_MPIDR 0x0018 +#define GITS_CBASER 0x0080 +#define GITS_CWRITER 0x0088 +#define GITS_CREADR 0x0090 +#define GITS_BASER 0x0100 +#define GITS_IDREGS_BASE 0xffd0 +#define GITS_PIDR0 0xffe0 +#define GITS_PIDR1 0xffe4 +#define GITS_PIDR2 GICR_PIDR2 +#define GITS_PIDR4 0xffd0 +#define GITS_CIDR0 0xfff0 +#define GITS_CIDR1 0xfff4 +#define GITS_CIDR2 0xfff8 +#define GITS_CIDR3 0xfffc + +#define GITS_TRANSLATER 0x10040 + +#define GITS_SGIR 0x20020 + +#define GITS_SGIR_VPEID GENMASK_ULL(47, 32) +#define GITS_SGIR_VINTID GENMASK_ULL(3, 0) + +#define GITS_CTLR_ENABLE (1U << 0) +#define GITS_CTLR_ImDe (1U << 1) +#define GITS_CTLR_ITS_NUMBER_SHIFT 4 +#define GITS_CTLR_ITS_NUMBER (0xFU << GITS_CTLR_ITS_NUMBER_SHIFT) +#define GITS_CTLR_QUIESCENT (1U << 31) + +#define GITS_TYPER_PLPIS (1UL << 0) +#define GITS_TYPER_VLPIS (1UL << 1) +#define GITS_TYPER_ITT_ENTRY_SIZE_SHIFT 4 +#define GITS_TYPER_ITT_ENTRY_SIZE GENMASK_ULL(7, 4) +#define GITS_TYPER_IDBITS_SHIFT 8 +#define GITS_TYPER_DEVBITS_SHIFT 13 +#define GITS_TYPER_DEVBITS GENMASK_ULL(17, 13) +#define GITS_TYPER_PTA (1UL << 19) +#define GITS_TYPER_HCC_SHIFT 24 +#define GITS_TYPER_HCC(r) (((r) >> GITS_TYPER_HCC_SHIFT) & 0xff) +#define GITS_TYPER_VMOVP (1ULL << 37) +#define GITS_TYPER_VMAPP (1ULL << 40) +#define GITS_TYPER_SVPET GENMASK_ULL(42, 41) + +#define GITS_IIDR_REV_SHIFT 12 +#define GITS_IIDR_REV_MASK (0xf << GITS_IIDR_REV_SHIFT) +#define GITS_IIDR_REV(r) (((r) >> GITS_IIDR_REV_SHIFT) & 0xf) +#define GITS_IIDR_PRODUCTID_SHIFT 24 + +#define GITS_CBASER_VALID (1ULL << 63) +#define GITS_CBASER_SHAREABILITY_SHIFT (10) +#define GITS_CBASER_INNER_CACHEABILITY_SHIFT (59) +#define GITS_CBASER_OUTER_CACHEABILITY_SHIFT (53) +#define GITS_CBASER_SHAREABILITY_MASK \ + GIC_BASER_SHAREABILITY(GITS_CBASER, SHAREABILITY_MASK) +#define GITS_CBASER_INNER_CACHEABILITY_MASK \ + GIC_BASER_CACHEABILITY(GITS_CBASER, INNER, MASK) +#define GITS_CBASER_OUTER_CACHEABILITY_MASK \ + GIC_BASER_CACHEABILITY(GITS_CBASER, OUTER, MASK) +#define GITS_CBASER_CACHEABILITY_MASK GITS_CBASER_INNER_CACHEABILITY_MASK + +#define GITS_CBASER_InnerShareable \ + GIC_BASER_SHAREABILITY(GITS_CBASER, InnerShareable) + +#define GITS_CBASER_nCnB GIC_BASER_CACHEABILITY(GITS_CBASER, INNER, nCnB) +#define GITS_CBASER_nC GIC_BASER_CACHEABILITY(GITS_CBASER, INNER, nC) +#define GITS_CBASER_RaWt GIC_BASER_CACHEABILITY(GITS_CBASER, INNER, RaWt) +#define GITS_CBASER_RaWb GIC_BASER_CACHEABILITY(GITS_CBASER, INNER, RaWb) +#define GITS_CBASER_WaWt GIC_BASER_CACHEABILITY(GITS_CBASER, INNER, WaWt) +#define GITS_CBASER_WaWb GIC_BASER_CACHEABILITY(GITS_CBASER, INNER, WaWb) +#define GITS_CBASER_RaWaWt GIC_BASER_CACHEABILITY(GITS_CBASER, INNER, RaWaWt) +#define GITS_CBASER_RaWaWb GIC_BASER_CACHEABILITY(GITS_CBASER, INNER, RaWaWb) + +#define GITS_CBASER_ADDRESS(cbaser) ((cbaser) & GENMASK_ULL(51, 12)) + +#define GITS_BASER_NR_REGS 8 + +#define GITS_BASER_VALID (1ULL << 63) +#define GITS_BASER_INDIRECT (1ULL << 62) + +#define GITS_BASER_INNER_CACHEABILITY_SHIFT (59) +#define GITS_BASER_OUTER_CACHEABILITY_SHIFT (53) +#define GITS_BASER_INNER_CACHEABILITY_MASK \ + GIC_BASER_CACHEABILITY(GITS_BASER, INNER, MASK) +#define GITS_BASER_CACHEABILITY_MASK GITS_BASER_INNER_CACHEABILITY_MASK +#define GITS_BASER_OUTER_CACHEABILITY_MASK \ + GIC_BASER_CACHEABILITY(GITS_BASER, OUTER, MASK) +#define GITS_BASER_SHAREABILITY_MASK \ + GIC_BASER_SHAREABILITY(GITS_BASER, SHAREABILITY_MASK) + +#define GITS_BASER_nCnB GIC_BASER_CACHEABILITY(GITS_BASER, INNER, nCnB) +#define GITS_BASER_nC GIC_BASER_CACHEABILITY(GITS_BASER, INNER, nC) +#define GITS_BASER_RaWt GIC_BASER_CACHEABILITY(GITS_BASER, INNER, RaWt) +#define GITS_BASER_RaWb GIC_BASER_CACHEABILITY(GITS_BASER, INNER, RaWb) +#define GITS_BASER_WaWt GIC_BASER_CACHEABILITY(GITS_BASER, INNER, WaWt) +#define GITS_BASER_WaWb GIC_BASER_CACHEABILITY(GITS_BASER, INNER, WaWb) +#define GITS_BASER_RaWaWt GIC_BASER_CACHEABILITY(GITS_BASER, INNER, RaWaWt) +#define GITS_BASER_RaWaWb GIC_BASER_CACHEABILITY(GITS_BASER, INNER, RaWaWb) + +#define GITS_BASER_TYPE_SHIFT (56) +#define GITS_BASER_TYPE(r) (((r) >> GITS_BASER_TYPE_SHIFT) & 7) +#define GITS_BASER_ENTRY_SIZE_SHIFT (48) +#define GITS_BASER_ENTRY_SIZE(r) ((((r) >> GITS_BASER_ENTRY_SIZE_SHIFT) & 0x1f) + 1) +#define GITS_BASER_ENTRY_SIZE_MASK GENMASK_ULL(52, 48) +#define GITS_BASER_PHYS_52_to_48(phys) \ + (((phys) & GENMASK_ULL(47, 16)) | (((phys) >> 48) & 0xf) << 12) +#define GITS_BASER_ADDR_48_to_52(baser) \ + (((baser) & GENMASK_ULL(47, 16)) | (((baser) >> 12) & 0xf) << 48) + +#define GITS_BASER_SHAREABILITY_SHIFT (10) +#define GITS_BASER_InnerShareable \ + GIC_BASER_SHAREABILITY(GITS_BASER, InnerShareable) +#define GITS_BASER_PAGE_SIZE_SHIFT (8) +#define __GITS_BASER_PSZ(sz) (GIC_PAGE_SIZE_ ## sz << GITS_BASER_PAGE_SIZE_SHIFT) +#define GITS_BASER_PAGE_SIZE_4K __GITS_BASER_PSZ(4K) +#define GITS_BASER_PAGE_SIZE_16K __GITS_BASER_PSZ(16K) +#define GITS_BASER_PAGE_SIZE_64K __GITS_BASER_PSZ(64K) +#define GITS_BASER_PAGE_SIZE_MASK __GITS_BASER_PSZ(MASK) +#define GITS_BASER_PAGES_MAX 256 +#define GITS_BASER_PAGES_SHIFT (0) +#define GITS_BASER_NR_PAGES(r) (((r) & 0xff) + 1) + +#define GITS_BASER_TYPE_NONE 0 +#define GITS_BASER_TYPE_DEVICE 1 +#define GITS_BASER_TYPE_VCPU 2 +#define GITS_BASER_TYPE_RESERVED3 3 +#define GITS_BASER_TYPE_COLLECTION 4 +#define GITS_BASER_TYPE_RESERVED5 5 +#define GITS_BASER_TYPE_RESERVED6 6 +#define GITS_BASER_TYPE_RESERVED7 7 + +#define GITS_LVL1_ENTRY_SIZE (8UL) + +/* + * ITS commands + */ +#define GITS_CMD_MAPD 0x08 +#define GITS_CMD_MAPC 0x09 +#define GITS_CMD_MAPTI 0x0a +#define GITS_CMD_MAPI 0x0b +#define GITS_CMD_MOVI 0x01 +#define GITS_CMD_DISCARD 0x0f +#define GITS_CMD_INV 0x0c +#define GITS_CMD_MOVALL 0x0e +#define GITS_CMD_INVALL 0x0d +#define GITS_CMD_INT 0x03 +#define GITS_CMD_CLEAR 0x04 +#define GITS_CMD_SYNC 0x05 + +/* + * GICv4 ITS specific commands + */ +#define GITS_CMD_GICv4(x) ((x) | 0x20) +#define GITS_CMD_VINVALL GITS_CMD_GICv4(GITS_CMD_INVALL) +#define GITS_CMD_VMAPP GITS_CMD_GICv4(GITS_CMD_MAPC) +#define GITS_CMD_VMAPTI GITS_CMD_GICv4(GITS_CMD_MAPTI) +#define GITS_CMD_VMOVI GITS_CMD_GICv4(GITS_CMD_MOVI) +#define GITS_CMD_VSYNC GITS_CMD_GICv4(GITS_CMD_SYNC) +/* VMOVP, VSGI and INVDB are the odd ones, as they dont have a physical counterpart */ +#define GITS_CMD_VMOVP GITS_CMD_GICv4(2) +#define GITS_CMD_VSGI GITS_CMD_GICv4(3) +#define GITS_CMD_INVDB GITS_CMD_GICv4(0xe) + +/* + * ITS error numbers + */ +#define E_ITS_MOVI_UNMAPPED_INTERRUPT 0x010107 +#define E_ITS_MOVI_UNMAPPED_COLLECTION 0x010109 +#define E_ITS_INT_UNMAPPED_INTERRUPT 0x010307 +#define E_ITS_CLEAR_UNMAPPED_INTERRUPT 0x010507 +#define E_ITS_MAPD_DEVICE_OOR 0x010801 +#define E_ITS_MAPD_ITTSIZE_OOR 0x010802 +#define E_ITS_MAPC_PROCNUM_OOR 0x010902 +#define E_ITS_MAPC_COLLECTION_OOR 0x010903 +#define E_ITS_MAPTI_UNMAPPED_DEVICE 0x010a04 +#define E_ITS_MAPTI_ID_OOR 0x010a05 +#define E_ITS_MAPTI_PHYSICALID_OOR 0x010a06 +#define E_ITS_INV_UNMAPPED_INTERRUPT 0x010c07 +#define E_ITS_INVALL_UNMAPPED_COLLECTION 0x010d09 +#define E_ITS_MOVALL_PROCNUM_OOR 0x010e01 +#define E_ITS_DISCARD_UNMAPPED_INTERRUPT 0x010f07 + +/* + * CPU interface registers + */ +#define ICC_CTLR_EL1_EOImode_SHIFT (1) +#define ICC_CTLR_EL1_EOImode_drop_dir (0U << ICC_CTLR_EL1_EOImode_SHIFT) +#define ICC_CTLR_EL1_EOImode_drop (1U << ICC_CTLR_EL1_EOImode_SHIFT) +#define ICC_CTLR_EL1_EOImode_MASK (1 << ICC_CTLR_EL1_EOImode_SHIFT) +#define ICC_CTLR_EL1_CBPR_SHIFT 0 +#define ICC_CTLR_EL1_CBPR_MASK (1 << ICC_CTLR_EL1_CBPR_SHIFT) +#define ICC_CTLR_EL1_PMHE_SHIFT 6 +#define ICC_CTLR_EL1_PMHE_MASK (1 << ICC_CTLR_EL1_PMHE_SHIFT) +#define ICC_CTLR_EL1_PRI_BITS_SHIFT 8 +#define ICC_CTLR_EL1_PRI_BITS_MASK (0x7 << ICC_CTLR_EL1_PRI_BITS_SHIFT) +#define ICC_CTLR_EL1_ID_BITS_SHIFT 11 +#define ICC_CTLR_EL1_ID_BITS_MASK (0x7 << ICC_CTLR_EL1_ID_BITS_SHIFT) +#define ICC_CTLR_EL1_SEIS_SHIFT 14 +#define ICC_CTLR_EL1_SEIS_MASK (0x1 << ICC_CTLR_EL1_SEIS_SHIFT) +#define ICC_CTLR_EL1_A3V_SHIFT 15 +#define ICC_CTLR_EL1_A3V_MASK (0x1 << ICC_CTLR_EL1_A3V_SHIFT) +#define ICC_CTLR_EL1_RSS (0x1 << 18) +#define ICC_CTLR_EL1_ExtRange (0x1 << 19) +#define ICC_PMR_EL1_SHIFT 0 +#define ICC_PMR_EL1_MASK (0xff << ICC_PMR_EL1_SHIFT) +#define ICC_BPR0_EL1_SHIFT 0 +#define ICC_BPR0_EL1_MASK (0x7 << ICC_BPR0_EL1_SHIFT) +#define ICC_BPR1_EL1_SHIFT 0 +#define ICC_BPR1_EL1_MASK (0x7 << ICC_BPR1_EL1_SHIFT) +#define ICC_IGRPEN0_EL1_SHIFT 0 +#define ICC_IGRPEN0_EL1_MASK (1 << ICC_IGRPEN0_EL1_SHIFT) +#define ICC_IGRPEN1_EL1_SHIFT 0 +#define ICC_IGRPEN1_EL1_MASK (1 << ICC_IGRPEN1_EL1_SHIFT) +#define ICC_SRE_EL1_DIB (1U << 2) +#define ICC_SRE_EL1_DFB (1U << 1) +#define ICC_SRE_EL1_SRE (1U << 0) + +/* + * Hypervisor interface registers (SRE only) + */ +#define ICH_LR_VIRTUAL_ID_MASK ((1ULL << 32) - 1) + +#define ICH_LR_EOI (1ULL << 41) +#define ICH_LR_GROUP (1ULL << 60) +#define ICH_LR_HW (1ULL << 61) +#define ICH_LR_STATE (3ULL << 62) +#define ICH_LR_PENDING_BIT (1ULL << 62) +#define ICH_LR_ACTIVE_BIT (1ULL << 63) +#define ICH_LR_PHYS_ID_SHIFT 32 +#define ICH_LR_PHYS_ID_MASK (0x3ffULL << ICH_LR_PHYS_ID_SHIFT) +#define ICH_LR_PRIORITY_SHIFT 48 +#define ICH_LR_PRIORITY_MASK (0xffULL << ICH_LR_PRIORITY_SHIFT) + +/* These are for GICv2 emulation only */ +#define GICH_LR_VIRTUALID (0x3ffUL << 0) +#define GICH_LR_PHYSID_CPUID_SHIFT (10) +#define GICH_LR_PHYSID_CPUID (7UL << GICH_LR_PHYSID_CPUID_SHIFT) + +#define ICH_MISR_EOI (1 << 0) +#define ICH_MISR_U (1 << 1) + +#define ICH_HCR_EN (1 << 0) +#define ICH_HCR_UIE (1 << 1) +#define ICH_HCR_NPIE (1 << 3) +#define ICH_HCR_TC (1 << 10) +#define ICH_HCR_TALL0 (1 << 11) +#define ICH_HCR_TALL1 (1 << 12) +#define ICH_HCR_EOIcount_SHIFT 27 +#define ICH_HCR_EOIcount_MASK (0x1f << ICH_HCR_EOIcount_SHIFT) + +#define ICH_VMCR_ACK_CTL_SHIFT 2 +#define ICH_VMCR_ACK_CTL_MASK (1 << ICH_VMCR_ACK_CTL_SHIFT) +#define ICH_VMCR_FIQ_EN_SHIFT 3 +#define ICH_VMCR_FIQ_EN_MASK (1 << ICH_VMCR_FIQ_EN_SHIFT) +#define ICH_VMCR_CBPR_SHIFT 4 +#define ICH_VMCR_CBPR_MASK (1 << ICH_VMCR_CBPR_SHIFT) +#define ICH_VMCR_EOIM_SHIFT 9 +#define ICH_VMCR_EOIM_MASK (1 << ICH_VMCR_EOIM_SHIFT) +#define ICH_VMCR_BPR1_SHIFT 18 +#define ICH_VMCR_BPR1_MASK (7 << ICH_VMCR_BPR1_SHIFT) +#define ICH_VMCR_BPR0_SHIFT 21 +#define ICH_VMCR_BPR0_MASK (7 << ICH_VMCR_BPR0_SHIFT) +#define ICH_VMCR_PMR_SHIFT 24 +#define ICH_VMCR_PMR_MASK (0xffUL << ICH_VMCR_PMR_SHIFT) +#define ICH_VMCR_ENG0_SHIFT 0 +#define ICH_VMCR_ENG0_MASK (1 << ICH_VMCR_ENG0_SHIFT) +#define ICH_VMCR_ENG1_SHIFT 1 +#define ICH_VMCR_ENG1_MASK (1 << ICH_VMCR_ENG1_SHIFT) + +#define ICH_VTR_PRI_BITS_SHIFT 29 +#define ICH_VTR_PRI_BITS_MASK (7 << ICH_VTR_PRI_BITS_SHIFT) +#define ICH_VTR_ID_BITS_SHIFT 23 +#define ICH_VTR_ID_BITS_MASK (7 << ICH_VTR_ID_BITS_SHIFT) +#define ICH_VTR_SEIS_SHIFT 22 +#define ICH_VTR_SEIS_MASK (1 << ICH_VTR_SEIS_SHIFT) +#define ICH_VTR_A3V_SHIFT 21 +#define ICH_VTR_A3V_MASK (1 << ICH_VTR_A3V_SHIFT) + +#define ICC_IAR1_EL1_SPURIOUS 0x3ff + +#define ICC_SRE_EL2_SRE (1 << 0) +#define ICC_SRE_EL2_ENABLE (1 << 3) + +#define ICC_SGI1R_TARGET_LIST_SHIFT 0 +#define ICC_SGI1R_TARGET_LIST_MASK (0xffff << ICC_SGI1R_TARGET_LIST_SHIFT) +#define ICC_SGI1R_AFFINITY_1_SHIFT 16 +#define ICC_SGI1R_AFFINITY_1_MASK (0xff << ICC_SGI1R_AFFINITY_1_SHIFT) +#define ICC_SGI1R_SGI_ID_SHIFT 24 +#define ICC_SGI1R_SGI_ID_MASK (0xfULL << ICC_SGI1R_SGI_ID_SHIFT) +#define ICC_SGI1R_AFFINITY_2_SHIFT 32 +#define ICC_SGI1R_AFFINITY_2_MASK (0xffULL << ICC_SGI1R_AFFINITY_2_SHIFT) +#define ICC_SGI1R_IRQ_ROUTING_MODE_BIT 40 +#define ICC_SGI1R_RS_SHIFT 44 +#define ICC_SGI1R_RS_MASK (0xfULL << ICC_SGI1R_RS_SHIFT) +#define ICC_SGI1R_AFFINITY_3_SHIFT 48 +#define ICC_SGI1R_AFFINITY_3_MASK (0xffULL << ICC_SGI1R_AFFINITY_3_SHIFT) + +#include <asm/arch_gicv3.h> + +#ifndef __ASSEMBLY__ + +/* + * We need a value to serve as a irq-type for LPIs. Choose one that will + * hopefully pique the interest of the reviewer. + */ +#define GIC_IRQ_TYPE_LPI 0xa110c8ed + +struct rdists { + struct { + raw_spinlock_t rd_lock; + void __iomem *rd_base; + struct page *pend_page; + phys_addr_t phys_base; + bool lpi_enabled; + cpumask_t *vpe_table_mask; + void *vpe_l1_base; + } __percpu *rdist; + phys_addr_t prop_table_pa; + void *prop_table_va; + u64 flags; + u32 gicd_typer; + u32 gicd_typer2; + bool has_vlpis; + bool has_rvpeid; + bool has_direct_lpi; + bool has_vpend_valid_dirty; +}; + +struct irq_domain; +struct fwnode_handle; +int phytium_its_cpu_init(void); +int phytium_its_init(struct fwnode_handle *handle, struct rdists *rdists, + struct irq_domain *domain); +int mbi_init(struct fwnode_handle *fwnode, struct irq_domain *parent); + +static inline bool gic_enable_sre(void) +{ + u32 val; + + val = gic_read_sre(); + if (val & ICC_SRE_EL1_SRE) + return true; + + val |= ICC_SRE_EL1_SRE; + gic_write_sre(val); + val = gic_read_sre(); + + return !!(val & ICC_SRE_EL1_SRE); +} + +#endif + +#endif
hulk inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I41AUQ CVE: NA
-------------------------------------------------
While compiling with openeuler_defconfig, configs CONFIG_ARCH_PHYTIUM and CONFIG_ARM_GIC_PHYTIUM_2500 should be set to enable support for Phytium ARMv8 S2500/FT-2500 SoC and 2-Processor server.
Enable these two configs for arch/arm64/configs/openeuler_defconfig by default.
Note: As FT-2500 2-Processor server has 16 NUMA nodes, CONFIG_NODES_SHIFT should be 4.
Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com Reviewed-by: Xiongfeng Wang wangxiongfeng2@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com --- arch/arm64/configs/openeuler_defconfig | 2 ++ 1 file changed, 2 insertions(+)
diff --git a/arch/arm64/configs/openeuler_defconfig b/arch/arm64/configs/openeuler_defconfig index 0ff8e6ce6b78..248ceeefec7f 100644 --- a/arch/arm64/configs/openeuler_defconfig +++ b/arch/arm64/configs/openeuler_defconfig @@ -299,6 +299,7 @@ CONFIG_ARCH_HISI=y # CONFIG_ARCH_MESON is not set # CONFIG_ARCH_MVEBU is not set # CONFIG_ARCH_MXC is not set +CONFIG_ARCH_PHYTIUM=y CONFIG_ARCH_QCOM=y # CONFIG_ARCH_REALTEK is not set # CONFIG_ARCH_RENESAS is not set @@ -5912,6 +5913,7 @@ CONFIG_ARM_GIC_V2M=y CONFIG_ARM_GIC_V3=y CONFIG_ARM_GIC_V3_ITS=y CONFIG_ARM_GIC_V3_ITS_PCI=y +CONFIG_ARM_GIC_PHYTIUM_2500=y # CONFIG_AL_FIC is not set CONFIG_HISILICON_IRQ_MBIGEN=y CONFIG_PARTITION_PERCPU=y
From: Peter Zijlstra peterz@infradead.org
mainline inclusion from mainline-5.12-rc1 commit 3f2a8fc4b15de18644e8a80a09edda168676e22c category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I410UT CVE: NA Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i...
---------------------------
Provide a stub function that return 0 and wire up the static call site patching to replace the CALL with a single 5 byte instruction that clears %RAX, the return value register.
The function can be cast to any function pointer type that has a single %RAX return (including pointers). Also provide a version that returns an int for convenience. We are clearing the entire %RAX register in any case, whether the return value is 32 or 64 bits, since %RAX is always a scratch register anyway.
Signed-off-by: Peter Zijlstra (Intel) peterz@infradead.org Signed-off-by: Frederic Weisbecker frederic@kernel.org Signed-off-by: Peter Zijlstra (Intel) peterz@infradead.org Signed-off-by: Ingo Molnar mingo@kernel.org Link: https://lkml.kernel.org/r/20210118141223.123667-2-frederic@kernel.org Signed-off-by: Ma Junhai majunhai2@huawei.com Reviewed-by: Chen Hui judy.chenhui@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com --- arch/x86/kernel/static_call.c | 17 +++++++++++++++-- include/linux/static_call.h | 12 ++++++++++++ kernel/static_call.c | 5 +++++ 3 files changed, 32 insertions(+), 2 deletions(-)
diff --git a/arch/x86/kernel/static_call.c b/arch/x86/kernel/static_call.c index ca9a380d9c0b..9442c4136c38 100644 --- a/arch/x86/kernel/static_call.c +++ b/arch/x86/kernel/static_call.c @@ -11,14 +11,26 @@ enum insn_type { RET = 3, /* tramp / site cond-tail-call */ };
+/* + * data16 data16 xorq %rax, %rax - a single 5 byte instruction that clears %rax + * The REX.W cancels the effect of any data16. + */ +static const u8 xor5rax[] = { 0x66, 0x66, 0x48, 0x31, 0xc0 }; + static void __ref __static_call_transform(void *insn, enum insn_type type, void *func) { + const void *emulate = NULL; int size = CALL_INSN_SIZE; const void *code;
switch (type) { case CALL: code = text_gen_insn(CALL_INSN_OPCODE, insn, func); + if (func == &__static_call_return0) { + emulate = code; + code = &xor5rax; + } + break;
case NOP: @@ -41,7 +53,7 @@ static void __ref __static_call_transform(void *insn, enum insn_type type, void if (unlikely(system_state == SYSTEM_BOOTING)) return text_poke_early(insn, code, size);
- text_poke_bp(insn, code, size, NULL); + text_poke_bp(insn, code, size, emulate); }
static void __static_call_validate(void *insn, bool tail) @@ -54,7 +66,8 @@ static void __static_call_validate(void *insn, bool tail) return; } else { if (opcode == CALL_INSN_OPCODE || - !memcmp(insn, ideal_nops[NOP_ATOMIC5], 5)) + !memcmp(insn, ideal_nops[NOP_ATOMIC5], 5) || + !memcmp(insn, xor5rax, 5)) return; }
diff --git a/include/linux/static_call.h b/include/linux/static_call.h index 04e6042d252d..6550902c8b59 100644 --- a/include/linux/static_call.h +++ b/include/linux/static_call.h @@ -148,6 +148,8 @@ extern void __static_call_update(struct static_call_key *key, void *tramp, void extern int static_call_mod_init(struct module *mod); extern int static_call_text_reserved(void *start, void *end);
+extern long __static_call_return0(void); + #define DEFINE_STATIC_CALL(name, _func) \ DECLARE_STATIC_CALL(name, _func); \ struct static_call_key STATIC_CALL_KEY(name) = { \ @@ -219,6 +221,11 @@ static inline int static_call_text_reserved(void *start, void *end) return 0; }
+static inline long __static_call_return0(void) +{ + return 0; +} + #define EXPORT_STATIC_CALL(name) \ EXPORT_SYMBOL(STATIC_CALL_KEY(name)); \ EXPORT_SYMBOL(STATIC_CALL_TRAMP(name)) @@ -240,6 +247,11 @@ struct static_call_key { void *func; };
+static inline long __static_call_return0(void) +{ + return 0; +} + #define DEFINE_STATIC_CALL(name, _func) \ DECLARE_STATIC_CALL(name, _func); \ struct static_call_key STATIC_CALL_KEY(name) = { \ diff --git a/kernel/static_call.c b/kernel/static_call.c index f59089a12231..2c5950b0b90e 100644 --- a/kernel/static_call.c +++ b/kernel/static_call.c @@ -496,6 +496,11 @@ int __init static_call_init(void) } early_initcall(static_call_init);
+long __static_call_return0(void) +{ + return 0; +} + #ifdef CONFIG_STATIC_CALL_SELFTEST
static int func_a(int x)
From: Frederic Weisbecker frederic@kernel.org
mainline inclusion from mainline-5.12-rc1 commit 29fd01944b7273bb630c649a2104b7f9e4ef3fa6 category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I410UT CVE: NA Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i...
---------------------------
DECLARE_STATIC_CALL() must pass the original function targeted for a given static call. But DEFINE_STATIC_CALL() may want to initialize it as off. In this case we can't pass NULL (for functions without return value) or __static_call_return0 (for functions returning a value) directly to DEFINE_STATIC_CALL() as that may trigger a static call redeclaration with a different function prototype. Type casts neither can work around that as they don't get along with typeof().
The proper way to do that for functions that don't return a value is to use DEFINE_STATIC_CALL_NULL(). But functions returning a actual value don't have an equivalent yet.
Provide DEFINE_STATIC_CALL_RET0() to solve this situation.
Signed-off-by: Frederic Weisbecker frederic@kernel.org Signed-off-by: Peter Zijlstra (Intel) peterz@infradead.org Signed-off-by: Ingo Molnar mingo@kernel.org Link: https://lkml.kernel.org/r/20210118141223.123667-3-frederic@kernel.org Signed-off-by: Ma Junhai majunhai2@huawei.com Reviewed-by: Chen Hui judy.chenhui@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com --- include/linux/static_call.h | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-)
diff --git a/include/linux/static_call.h b/include/linux/static_call.h index 6550902c8b59..85ecc789f4ff 100644 --- a/include/linux/static_call.h +++ b/include/linux/static_call.h @@ -150,13 +150,13 @@ extern int static_call_text_reserved(void *start, void *end);
extern long __static_call_return0(void);
-#define DEFINE_STATIC_CALL(name, _func) \ +#define __DEFINE_STATIC_CALL(name, _func, _func_init) \ DECLARE_STATIC_CALL(name, _func); \ struct static_call_key STATIC_CALL_KEY(name) = { \ - .func = _func, \ + .func = _func_init, \ .type = 1, \ }; \ - ARCH_DEFINE_STATIC_CALL_TRAMP(name, _func) + ARCH_DEFINE_STATIC_CALL_TRAMP(name, _func_init)
#define DEFINE_STATIC_CALL_NULL(name, _func) \ DECLARE_STATIC_CALL(name, _func); \ @@ -191,12 +191,12 @@ struct static_call_key { void *func; };
-#define DEFINE_STATIC_CALL(name, _func) \ +#define __DEFINE_STATIC_CALL(name, _func, _func_init) \ DECLARE_STATIC_CALL(name, _func); \ struct static_call_key STATIC_CALL_KEY(name) = { \ - .func = _func, \ + .func = _func_init, \ }; \ - ARCH_DEFINE_STATIC_CALL_TRAMP(name, _func) + ARCH_DEFINE_STATIC_CALL_TRAMP(name, _func_init)
#define DEFINE_STATIC_CALL_NULL(name, _func) \ DECLARE_STATIC_CALL(name, _func); \ @@ -252,10 +252,10 @@ static inline long __static_call_return0(void) return 0; }
-#define DEFINE_STATIC_CALL(name, _func) \ +#define __DEFINE_STATIC_CALL(name, _func, _func_init) \ DECLARE_STATIC_CALL(name, _func); \ struct static_call_key STATIC_CALL_KEY(name) = { \ - .func = _func, \ + .func = _func_init, \ }
#define DEFINE_STATIC_CALL_NULL(name, _func) \ @@ -304,4 +304,10 @@ static inline int static_call_text_reserved(void *start, void *end)
#endif /* CONFIG_HAVE_STATIC_CALL */
+#define DEFINE_STATIC_CALL(name, _func) \ + __DEFINE_STATIC_CALL(name, _func, _func) + +#define DEFINE_STATIC_CALL_RET0(name, _func) \ + __DEFINE_STATIC_CALL(name, _func, __static_call_return0) + #endif /* _LINUX_STATIC_CALL_H */
From: Michal Hocko mhocko@kernel.org
mainline inclusion from mainline-5.12-rc1 commit 6ef869e0647439af0fc28dde162d33320d4e1dd7 category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I410UT CVE: NA Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i...
---------------------------
Preemption mode selection is currently hardcoded on Kconfig choices. Introduce a dedicated option to tune preemption flavour at boot time,
This will be only available on architectures efficiently supporting static calls in order not to tempt with the feature against additional overhead that might be prohibitive or undesirable.
CONFIG_PREEMPT_DYNAMIC is automatically selected by CONFIG_PREEMPT if the architecture provides the necessary support (CONFIG_STATIC_CALL_INLINE, CONFIG_GENERIC_ENTRY, and provide with __preempt_schedule_function() / __preempt_schedule_notrace_function()).
Suggested-by: Peter Zijlstra peterz@infradead.org Signed-off-by: Michal Hocko mhocko@suse.com Signed-off-by: Frederic Weisbecker frederic@kernel.org [peterz: relax requirement to HAVE_STATIC_CALL] Signed-off-by: Peter Zijlstra (Intel) peterz@infradead.org Signed-off-by: Ingo Molnar mingo@kernel.org Link: https://lkml.kernel.org/r/20210118141223.123667-5-frederic@kernel.org Signed-off-by: Ma Junhai majunhai2@huawei.com Reviewed-by: Chen Hui judy.chenhui@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com --- .../admin-guide/kernel-parameters.txt | 7 +++++++ arch/Kconfig | 9 +++++++++ arch/x86/Kconfig | 1 + kernel/Kconfig.preempt | 19 +++++++++++++++++++ 4 files changed, 36 insertions(+)
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index c125a678fcf3..e732fe5d4e42 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -3997,6 +3997,13 @@ Format: {"off"} Disable Hardware Transactional Memory
+ preempt= [KNL] + Select preemption mode if you have CONFIG_PREEMPT_DYNAMIC + none - Limited to cond_resched() calls + voluntary - Limited to cond_resched() and might_sleep() calls + full - Any section that isn't explicitly preempt disabled + can be preempted anytime. + print-fatal-signals= [KNL] debug: print fatal signals
diff --git a/arch/Kconfig b/arch/Kconfig index 3f9f9739655f..f2d65d38f119 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -1082,6 +1082,15 @@ config HAVE_STATIC_CALL_INLINE bool depends on HAVE_STATIC_CALL
+config HAVE_PREEMPT_DYNAMIC + bool + depends on HAVE_STATIC_CALL + depends on GENERIC_ENTRY + help + Select this if the architecture support boot time preempt setting + on top of static calls. It is strongly advised to support inline + static call to avoid any overhead. + config ARCH_WANT_LD_ORPHAN_WARN bool help diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 9bf0136e8bff..af7123b5b493 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -223,6 +223,7 @@ config X86 select HAVE_STACK_VALIDATION if X86_64 select HAVE_STATIC_CALL select HAVE_STATIC_CALL_INLINE if HAVE_STACK_VALIDATION + select HAVE_PREEMPT_DYNAMIC select HAVE_RSEQ select HAVE_SYSCALL_TRACEPOINTS select HAVE_UNSTABLE_SCHED_CLOCK diff --git a/kernel/Kconfig.preempt b/kernel/Kconfig.preempt index bf82259cff96..416017301660 100644 --- a/kernel/Kconfig.preempt +++ b/kernel/Kconfig.preempt @@ -40,6 +40,7 @@ config PREEMPT depends on !ARCH_NO_PREEMPT select PREEMPTION select UNINLINE_SPIN_UNLOCK if !ARCH_INLINE_SPIN_UNLOCK + select PREEMPT_DYNAMIC if HAVE_PREEMPT_DYNAMIC help This option reduces the latency of the kernel by making all kernel code (that is not executing in a critical section) @@ -80,3 +81,21 @@ config PREEMPT_COUNT config PREEMPTION bool select PREEMPT_COUNT + +config PREEMPT_DYNAMIC + bool + help + This option allows to define the preemption model on the kernel + command line parameter and thus override the default preemption + model defined during compile time. + + The feature is primarily interesting for Linux distributions which + provide a pre-built kernel binary to reduce the number of kernel + flavors they offer while still offering different usecases. + + The runtime overhead is negligible with HAVE_STATIC_CALL_INLINE enabled + but if runtime patching is not available for the specific architecture + then the potential overhead should be considered. + + Interesting if you want the same pre-built kernel should be used for + both Server and Desktop workloads.
From: "Peter Zijlstra (Intel)" peterz@infradead.org
mainline inclusion from mainline-5.12-rc1 commit b965f1ddb47daa5b8b2e2bc9c921431236830367 category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I410UT CVE: NA Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i...
---------------------------
Provide static calls to control cond_resched() (called in !CONFIG_PREEMPT) and might_resched() (called in CONFIG_PREEMPT_VOLUNTARY) to that we can override their behaviour when preempt= is overriden.
Since the default behaviour is full preemption, both their calls are ignored when preempt= isn't passed.
[fweisbec: branch might_resched() directly to __cond_resched(), only define static calls when PREEMPT_DYNAMIC]
Signed-off-by: Peter Zijlstra (Intel) peterz@infradead.org Signed-off-by: Frederic Weisbecker frederic@kernel.org Signed-off-by: Peter Zijlstra (Intel) peterz@infradead.org Signed-off-by: Ingo Molnar mingo@kernel.org Link: https://lkml.kernel.org/r/20210118141223.123667-6-frederic@kernel.org Signed-off-by: Ma Junhai majunhai2@huawei.com
Conflicts: include/linux/kernel.h Reviewed-by: Chen Hui judy.chenhui@huawei.com
Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com --- include/linux/kernel.h | 22 +++++++++++++++++++--- include/linux/sched.h | 27 ++++++++++++++++++++++++--- kernel/sched/core.c | 16 +++++++++++++--- 3 files changed, 56 insertions(+), 9 deletions(-)
diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 2f05e9128201..0b809c92d817 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -15,6 +15,7 @@ #include <linux/typecheck.h> #include <linux/printk.h> #include <linux/build_bug.h> +#include <linux/static_call_types.h> #include <asm/byteorder.h> #include <asm/div64.h> #include <uapi/linux/kernel.h> @@ -194,11 +195,26 @@ struct pt_regs; struct user;
#ifdef CONFIG_PREEMPT_VOLUNTARY -extern int _cond_resched(void); -# define might_resched() _cond_resched() + +extern int __cond_resched(void); +# define might_resched() __cond_resched() + +#elif defined(CONFIG_PREEMPT_DYNAMIC) + +extern int __cond_resched(void); + +DECLARE_STATIC_CALL(might_resched, __cond_resched); + +static __always_inline void might_resched(void) +{ + static_call(might_resched)(); +} + #else + # define might_resched() do { } while (0) -#endif + +#endif /* CONFIG_PREEMPT_* */
#ifdef CONFIG_DEBUG_ATOMIC_SLEEP extern void ___might_sleep(const char *file, int line, int preempt_offset); diff --git a/include/linux/sched.h b/include/linux/sched.h index 8408622c53ee..befd7b3fd4e8 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1860,11 +1860,32 @@ static inline int test_tsk_need_resched(struct task_struct *tsk) * value indicates whether a reschedule was done in fact. * cond_resched_lock() will drop the spinlock before scheduling, */ -#ifndef CONFIG_PREEMPTION -extern int _cond_resched(void); +#if !defined(CONFIG_PREEMPTION) || defined(CONFIG_PREEMPT_DYNAMIC) +extern int __cond_resched(void); + +#ifdef CONFIG_PREEMPT_DYNAMIC + +DECLARE_STATIC_CALL(cond_resched, __cond_resched); + +static __always_inline int _cond_resched(void) +{ + return static_call(cond_resched)(); +} + #else + +static inline int _cond_resched(void) +{ + return __cond_resched(); +} + +#endif /* CONFIG_PREEMPT_DYNAMIC */ + +#else + static inline int _cond_resched(void) { return 0; } -#endif + +#endif /* !defined(CONFIG_PREEMPTION) || defined(CONFIG_PREEMPT_DYNAMIC) */
#define cond_resched() ({ \ ___might_sleep(__FILE__, __LINE__, 0); \ diff --git a/kernel/sched/core.c b/kernel/sched/core.c index a3bd9985d801..03aca2e905a7 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -6115,17 +6115,27 @@ SYSCALL_DEFINE0(sched_yield) return 0; }
-#ifndef CONFIG_PREEMPTION -int __sched _cond_resched(void) +#if !defined(CONFIG_PREEMPTION) || defined(CONFIG_PREEMPT_DYNAMIC) +int __sched __cond_resched(void) { if (should_resched(0)) { preempt_schedule_common(); return 1; } +#ifndef CONFIG_PREEMPT_RCU rcu_all_qs(); +#endif return 0; } -EXPORT_SYMBOL(_cond_resched); +EXPORT_SYMBOL(__cond_resched); +#endif + +#ifdef CONFIG_PREEMPT_DYNAMIC +DEFINE_STATIC_CALL_RET0(cond_resched, __cond_resched); +EXPORT_STATIC_CALL(cond_resched); + +DEFINE_STATIC_CALL_RET0(might_resched, __cond_resched); +EXPORT_STATIC_CALL(might_resched); #endif
/*
From: "Peter Zijlstra (Intel)" peterz@infradead.org
mainline inclusion from mainline-5.12-rc1 commit 2c9a98d3bc808717ab63ad928a2b568967775388 category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I410UT CVE: NA Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i...
---------------------------
Provide static calls to control preempt_schedule[_notrace]() (called in CONFIG_PREEMPT) so that we can override their behaviour when preempt= is overriden.
Since the default behaviour is full preemption, both their calls are initialized to the arch provided wrapper, if any.
[fweisbec: only define static calls when PREEMPT_DYNAMIC, make it less dependent on x86 with __preempt_schedule_func] Signed-off-by: Peter Zijlstra (Intel) peterz@infradead.org Signed-off-by: Frederic Weisbecker frederic@kernel.org Signed-off-by: Peter Zijlstra (Intel) peterz@infradead.org Signed-off-by: Ingo Molnar mingo@kernel.org Link: https://lkml.kernel.org/r/20210118141223.123667-7-frederic@kernel.org Signed-off-by: Ma Junhai majunhai2@huawei.com Reviewed-by: Chen Hui judy.chenhui@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com --- arch/x86/include/asm/preempt.h | 34 ++++++++++++++++++++++++++-------- kernel/sched/core.c | 12 ++++++++++++ 2 files changed, 38 insertions(+), 8 deletions(-)
diff --git a/arch/x86/include/asm/preempt.h b/arch/x86/include/asm/preempt.h index 69485ca13665..9b12dce9bda5 100644 --- a/arch/x86/include/asm/preempt.h +++ b/arch/x86/include/asm/preempt.h @@ -5,6 +5,7 @@ #include <asm/rmwcc.h> #include <asm/percpu.h> #include <linux/thread_info.h> +#include <linux/static_call_types.h>
DECLARE_PER_CPU(int, __preempt_count);
@@ -103,16 +104,33 @@ static __always_inline bool should_resched(int preempt_offset) }
#ifdef CONFIG_PREEMPTION - extern asmlinkage void preempt_schedule_thunk(void); -# define __preempt_schedule() \ - asm volatile ("call preempt_schedule_thunk" : ASM_CALL_CONSTRAINT)
- extern asmlinkage void preempt_schedule(void); - extern asmlinkage void preempt_schedule_notrace_thunk(void); -# define __preempt_schedule_notrace() \ - asm volatile ("call preempt_schedule_notrace_thunk" : ASM_CALL_CONSTRAINT) +extern asmlinkage void preempt_schedule(void); +extern asmlinkage void preempt_schedule_thunk(void); + +#define __preempt_schedule_func preempt_schedule_thunk + +DECLARE_STATIC_CALL(preempt_schedule, __preempt_schedule_func); + +#define __preempt_schedule() \ +do { \ + __ADDRESSABLE(STATIC_CALL_KEY(preempt_schedule)); \ + asm volatile ("call " STATIC_CALL_TRAMP_STR(preempt_schedule) : ASM_CALL_CONSTRAINT); \ +} while (0) + +extern asmlinkage void preempt_schedule_notrace(void); +extern asmlinkage void preempt_schedule_notrace_thunk(void); + +#define __preempt_schedule_notrace_func preempt_schedule_notrace_thunk + +DECLARE_STATIC_CALL(preempt_schedule_notrace, __preempt_schedule_notrace_func); + +#define __preempt_schedule_notrace() \ +do { \ + __ADDRESSABLE(STATIC_CALL_KEY(preempt_schedule_notrace)); \ + asm volatile ("call " STATIC_CALL_TRAMP_STR(preempt_schedule_notrace) : ASM_CALL_CONSTRAINT); \ +} while (0)
- extern asmlinkage void preempt_schedule_notrace(void); #endif
#endif /* __ASM_PREEMPT_H */ diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 03aca2e905a7..18699725eaab 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -4703,6 +4703,12 @@ asmlinkage __visible void __sched notrace preempt_schedule(void) NOKPROBE_SYMBOL(preempt_schedule); EXPORT_SYMBOL(preempt_schedule);
+#ifdef CONFIG_PREEMPT_DYNAMIC +DEFINE_STATIC_CALL(preempt_schedule, __preempt_schedule_func); +EXPORT_STATIC_CALL(preempt_schedule); +#endif + + /** * preempt_schedule_notrace - preempt_schedule called by tracing * @@ -4755,6 +4761,12 @@ asmlinkage __visible void __sched notrace preempt_schedule_notrace(void) } EXPORT_SYMBOL_GPL(preempt_schedule_notrace);
+#ifdef CONFIG_PREEMPT_DYNAMIC +DEFINE_STATIC_CALL(preempt_schedule_notrace, __preempt_schedule_notrace_func); +EXPORT_STATIC_CALL(preempt_schedule_notrace); +#endif + + #endif /* CONFIG_PREEMPTION */
/*
From: "Peter Zijlstra (Intel)" peterz@infradead.org
mainline inclusion from mainline-5.12-rc1 commit 40607ee97e4eec5655cc0f76a720bdc4c63a6434 category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I410UT CVE: NA Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i...
---------------------------
Provide static call to control IRQ preemption (called in CONFIG_PREEMPT) so that we can override its behaviour when preempt= is overriden.
Since the default behaviour is full preemption, its call is initialized to provide IRQ preemption when preempt= isn't passed.
Signed-off-by: Peter Zijlstra (Intel) peterz@infradead.org Signed-off-by: Frederic Weisbecker frederic@kernel.org Signed-off-by: Peter Zijlstra (Intel) peterz@infradead.org Signed-off-by: Ingo Molnar mingo@kernel.org Link: https://lkml.kernel.org/r/20210118141223.123667-8-frederic@kernel.org Signed-off-by: Ma Junhai majunhai2@huawei.com Reviewed-by: Chen Hui judy.chenhui@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com --- include/linux/entry-common.h | 4 ++++ kernel/entry/common.c | 10 +++++++++- 2 files changed, 13 insertions(+), 1 deletion(-)
diff --git a/include/linux/entry-common.h b/include/linux/entry-common.h index 7dff07713a07..d8e1c798dc9d 100644 --- a/include/linux/entry-common.h +++ b/include/linux/entry-common.h @@ -2,6 +2,7 @@ #ifndef __LINUX_ENTRYCOMMON_H #define __LINUX_ENTRYCOMMON_H
+#include <linux/static_call_types.h> #include <linux/tracehook.h> #include <linux/syscalls.h> #include <linux/seccomp.h> @@ -403,6 +404,9 @@ irqentry_state_t noinstr irqentry_enter(struct pt_regs *regs); * Conditional reschedule with additional sanity checks. */ void irqentry_exit_cond_resched(void); +#ifdef CONFIG_PREEMPT_DYNAMIC +DECLARE_STATIC_CALL(irqentry_exit_cond_resched, irqentry_exit_cond_resched); +#endif
/** * irqentry_exit - Handle return from exception that used irqentry_enter() diff --git a/kernel/entry/common.c b/kernel/entry/common.c index e289e6773292..8a4dd7027e90 100644 --- a/kernel/entry/common.c +++ b/kernel/entry/common.c @@ -357,6 +357,9 @@ void irqentry_exit_cond_resched(void) preempt_schedule_irq(); } } +#ifdef CONFIG_PREEMPT_DYNAMIC +DEFINE_STATIC_CALL(irqentry_exit_cond_resched, irqentry_exit_cond_resched); +#endif
noinstr void irqentry_exit(struct pt_regs *regs, irqentry_state_t state) { @@ -383,8 +386,13 @@ noinstr void irqentry_exit(struct pt_regs *regs, irqentry_state_t state) }
instrumentation_begin(); - if (IS_ENABLED(CONFIG_PREEMPTION)) + if (IS_ENABLED(CONFIG_PREEMPTION)) { +#ifdef CONFIG_PREEMT_DYNAMIC + static_call(irqentry_exit_cond_resched)(); +#else irqentry_exit_cond_resched(); +#endif + } /* Covers both tracing and lockdep */ trace_hardirqs_on(); instrumentation_end();
From: "Peter Zijlstra (Intel)" peterz@infradead.org
mainline inclusion from mainline-5.12-rc1 commit 826bfeb37bb4302ee6042f330c4c0c757152bdb8 category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I410UT CVE: NA Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i...
---------------------------
Support the preempt= boot option and patch the static call sites accordingly.
Signed-off-by: Peter Zijlstra (Intel) peterz@infradead.org Signed-off-by: Frederic Weisbecker frederic@kernel.org Signed-off-by: Peter Zijlstra (Intel) peterz@infradead.org Signed-off-by: Ingo Molnar mingo@kernel.org Link: https://lkml.kernel.org/r/20210118141223.123667-9-frederic@kernel.org Signed-off-by: Ma Junhai majunhai2@huawei.com Reviewed-by: Chen Hui judy.chenhui@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com --- kernel/sched/core.c | 68 ++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 67 insertions(+), 1 deletion(-)
diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 18699725eaab..ff0e21e8fb82 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -4766,9 +4766,75 @@ DEFINE_STATIC_CALL(preempt_schedule_notrace, __preempt_schedule_notrace_func); EXPORT_STATIC_CALL(preempt_schedule_notrace); #endif
- #endif /* CONFIG_PREEMPTION */
+#ifdef CONFIG_PREEMPT_DYNAMIC + +#include <linux/entry-common.h> + +/* + * SC:cond_resched + * SC:might_resched + * SC:preempt_schedule + * SC:preempt_schedule_notrace + * SC:irqentry_exit_cond_resched + * + * + * NONE: + * cond_resched <- __cond_resched + * might_resched <- RET0 + * preempt_schedule <- NOP + * preempt_schedule_notrace <- NOP + * irqentry_exit_cond_resched <- NOP + * + * VOLUNTARY: + * cond_resched <- __cond_resched + * might_resched <- __cond_resched + * preempt_schedule <- NOP + * preempt_schedule_notrace <- NOP + * irqentry_exit_cond_resched <- NOP + * + * FULL: + * cond_resched <- RET0 + * might_resched <- RET0 + * preempt_schedule <- preempt_schedule + * preempt_schedule_notrace <- preempt_schedule_notrace + * irqentry_exit_cond_resched <- irqentry_exit_cond_resched + */ +static int __init setup_preempt_mode(char *str) +{ + if (!strcmp(str, "none")) { + static_call_update(cond_resched, __cond_resched); + static_call_update(might_resched, (typeof(&__cond_resched)) __static_call_return0); + static_call_update(preempt_schedule, (typeof(&preempt_schedule)) NULL); + static_call_update(preempt_schedule_notrace, (typeof(&preempt_schedule_notrace)) NULL); + static_call_update(irqentry_exit_cond_resched, (typeof(&irqentry_exit_cond_resched)) NULL); + pr_info("Dynamic Preempt: %s\n", str); + } else if (!strcmp(str, "voluntary")) { + static_call_update(cond_resched, __cond_resched); + static_call_update(might_resched, __cond_resched); + static_call_update(preempt_schedule, (typeof(&preempt_schedule)) NULL); + static_call_update(preempt_schedule_notrace, (typeof(&preempt_schedule_notrace)) NULL); + static_call_update(irqentry_exit_cond_resched, (typeof(&irqentry_exit_cond_resched)) NULL); + pr_info("Dynamic Preempt: %s\n", str); + } else if (!strcmp(str, "full")) { + static_call_update(cond_resched, (typeof(&__cond_resched)) __static_call_return0); + static_call_update(might_resched, (typeof(&__cond_resched)) __static_call_return0); + static_call_update(preempt_schedule, __preempt_schedule_func); + static_call_update(preempt_schedule_notrace, __preempt_schedule_notrace_func); + static_call_update(irqentry_exit_cond_resched, irqentry_exit_cond_resched); + pr_info("Dynamic Preempt: %s\n", str); + } else { + pr_warn("Dynamic Preempt: Unsupported preempt mode %s, default to full\n", str); + return 1; + } + return 0; +} +__setup("preempt=", setup_preempt_mode); + +#endif /* CONFIG_PREEMPT_DYNAMIC */ + + /* * This is the entry point to schedule() from kernel preemption * off of irq context.
From: Peter Zijlstra peterz@infradead.org
mainline inclusion from mainline-5.12-rc1 commit e59e10f8ef63d42fbb99776a5a112841e798b3b5 category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I410UT CVE: NA Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i...
---------------------------
Add a debugfs file to muck about with the preempt mode at runtime.
Signed-off-by: Peter Zijlstra (Intel) peterz@infradead.org Signed-off-by: Ingo Molnar mingo@kernel.org Link: https://lkml.kernel.org/r/YAsGiUYf6NyaTplX@hirez.programming.kicks-ass.net Signed-off-by: Ma Junhai majunhai2@huawei.com Reviewed-by: Chen Hui judy.chenhui@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com --- kernel/sched/core.c | 135 +++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 126 insertions(+), 9 deletions(-)
diff --git a/kernel/sched/core.c b/kernel/sched/core.c index ff0e21e8fb82..e41cbae19a83 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -4801,37 +4801,154 @@ EXPORT_STATIC_CALL(preempt_schedule_notrace); * preempt_schedule_notrace <- preempt_schedule_notrace * irqentry_exit_cond_resched <- irqentry_exit_cond_resched */ -static int __init setup_preempt_mode(char *str) + +enum { + preempt_dynamic_none = 0, + preempt_dynamic_voluntary, + preempt_dynamic_full, +}; + +static int preempt_dynamic_mode = preempt_dynamic_full; + +static int sched_dynamic_mode(const char *str) { - if (!strcmp(str, "none")) { + if (!strcmp(str, "none")) + return 0; + + if (!strcmp(str, "voluntary")) + return 1; + + if (!strcmp(str, "full")) + return 2; + + return -1; +} + +static void sched_dynamic_update(int mode) +{ + /* + * Avoid {NONE,VOLUNTARY} -> FULL transitions from ever ending up in + * the ZERO state, which is invalid. + */ + static_call_update(cond_resched, __cond_resched); + static_call_update(might_resched, __cond_resched); + static_call_update(preempt_schedule, __preempt_schedule_func); + static_call_update(preempt_schedule_notrace, __preempt_schedule_notrace_func); + static_call_update(irqentry_exit_cond_resched, irqentry_exit_cond_resched); + + switch (mode) { + case preempt_dynamic_none: static_call_update(cond_resched, __cond_resched); static_call_update(might_resched, (typeof(&__cond_resched)) __static_call_return0); static_call_update(preempt_schedule, (typeof(&preempt_schedule)) NULL); static_call_update(preempt_schedule_notrace, (typeof(&preempt_schedule_notrace)) NULL); static_call_update(irqentry_exit_cond_resched, (typeof(&irqentry_exit_cond_resched)) NULL); - pr_info("Dynamic Preempt: %s\n", str); - } else if (!strcmp(str, "voluntary")) { + pr_info("Dynamic Preempt: none\n"); + break; + + case preempt_dynamic_voluntary: static_call_update(cond_resched, __cond_resched); static_call_update(might_resched, __cond_resched); static_call_update(preempt_schedule, (typeof(&preempt_schedule)) NULL); static_call_update(preempt_schedule_notrace, (typeof(&preempt_schedule_notrace)) NULL); static_call_update(irqentry_exit_cond_resched, (typeof(&irqentry_exit_cond_resched)) NULL); - pr_info("Dynamic Preempt: %s\n", str); - } else if (!strcmp(str, "full")) { + pr_info("Dynamic Preempt: voluntary\n"); + break; + + case preempt_dynamic_full: static_call_update(cond_resched, (typeof(&__cond_resched)) __static_call_return0); static_call_update(might_resched, (typeof(&__cond_resched)) __static_call_return0); static_call_update(preempt_schedule, __preempt_schedule_func); static_call_update(preempt_schedule_notrace, __preempt_schedule_notrace_func); static_call_update(irqentry_exit_cond_resched, irqentry_exit_cond_resched); - pr_info("Dynamic Preempt: %s\n", str); - } else { - pr_warn("Dynamic Preempt: Unsupported preempt mode %s, default to full\n", str); + pr_info("Dynamic Preempt: full\n"); + break; + } + + preempt_dynamic_mode = mode; +} + +static int __init setup_preempt_mode(char *str) +{ + int mode = sched_dynamic_mode(str); + if (mode < 0) { + pr_warn("Dynamic Preempt: unsupported mode: %s\n", str); return 1; } + + sched_dynamic_update(mode); return 0; } __setup("preempt=", setup_preempt_mode);
+#ifdef CONFIG_SCHED_DEBUG + +static ssize_t sched_dynamic_write(struct file *filp, const char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + char buf[16]; + int mode; + + if (cnt > 15) + cnt = 15; + + if (copy_from_user(&buf, ubuf, cnt)) + return -EFAULT; + + buf[cnt] = 0; + mode = sched_dynamic_mode(strstrip(buf)); + if (mode < 0) + return mode; + + sched_dynamic_update(mode); + + *ppos += cnt; + + return cnt; +} + +static int sched_dynamic_show(struct seq_file *m, void *v) +{ + static const char * preempt_modes[] = { + "none", "voluntary", "full" + }; + int i; + + for (i = 0; i < ARRAY_SIZE(preempt_modes); i++) { + if (preempt_dynamic_mode == i) + seq_puts(m, "("); + seq_puts(m, preempt_modes[i]); + if (preempt_dynamic_mode == i) + seq_puts(m, ")"); + + seq_puts(m, " "); + } + + seq_puts(m, "\n"); + return 0; +} + +static int sched_dynamic_open(struct inode *inode, struct file *filp) +{ + return single_open(filp, sched_dynamic_show, NULL); +} + +static const struct file_operations sched_dynamic_fops = { + .open = sched_dynamic_open, + .write = sched_dynamic_write, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static __init int sched_init_debug_dynamic(void) +{ + debugfs_create_file("sched_preempt", 0644, NULL, NULL, &sched_dynamic_fops); + return 0; +} +late_initcall(sched_init_debug_dynamic); + +#endif /* CONFIG_SCHED_DEBUG */ #endif /* CONFIG_PREEMPT_DYNAMIC */
From: Zhen Lei thunder.leizhen@huawei.com
hulk inclusion category: feature bugzilla: 174251 CVE: NA
-------------------------------------------------------------------------
The obvious key to the performance optimization of commit 587e6c10a7ce ("iommu/arm-smmu-v3: Reduce contention during command-queue insertion") is to allow multiple cores to insert commands in parallel after a brief mutex contention.
Obviously, inserting as many commands at a time as possible can reduce the number of times the mutex contention participates, thereby improving the overall performance. At least it reduces the number of calls to function arm_smmu_cmdq_issue_cmdlist().
Therefore, use command queue batching helpers to insert multiple commands at a time.
Signed-off-by: Zhen Lei thunder.leizhen@huawei.com Reviewed-by: Hanjun Guo guohanjun@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com --- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index ac3497180e0c..abde4b9f3c88 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -1964,15 +1964,16 @@ static int arm_smmu_atc_inv_master(struct arm_smmu_master *master, unsigned int { int i; struct arm_smmu_cmdq_ent cmd; + struct arm_smmu_cmdq_batch cmds = {};
arm_smmu_atc_inv_to_cmd(ssid, 0, 0, &cmd);
for (i = 0; i < master->num_streams; i++) { cmd.atc.sid = master->streams[i].id; - arm_smmu_cmdq_issue_cmd(master->smmu, &cmd); + arm_smmu_cmdq_batch_add(master->smmu, &cmds, &cmd); }
- return arm_smmu_cmdq_issue_sync(master->smmu); + return arm_smmu_cmdq_batch_submit(master->smmu, &cmds); }
int arm_smmu_atc_inv_domain(struct arm_smmu_domain *smmu_domain, int ssid,
From: Zhen Lei thunder.leizhen@huawei.com
hulk inclusion category: feature bugzilla: 174251 CVE: NA
-------------------------------------------------------------------------
The obvious key to the performance optimization of commit 587e6c10a7ce ("iommu/arm-smmu-v3: Reduce contention during command-queue insertion") is to allow multiple cores to insert commands in parallel after a brief mutex contention.
Obviously, inserting as many commands at a time as possible can reduce the number of times the mutex contention participates, thereby improving the overall performance. At least it reduces the number of calls to function arm_smmu_cmdq_issue_cmdlist().
Therefore, function arm_smmu_cmdq_issue_cmd_with_sync() is added to insert the 'cmd+sync' commands at a time.
Signed-off-by: Zhen Lei thunder.leizhen@huawei.com Reviewed-by: Hanjun Guo guohanjun@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com --- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 33 +++++++++++++-------- 1 file changed, 21 insertions(+), 12 deletions(-)
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index abde4b9f3c88..f1b6597b1b0e 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -856,11 +856,25 @@ static int arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu, return arm_smmu_cmdq_issue_cmdlist(smmu, cmd, 1, false); }
-static int arm_smmu_cmdq_issue_sync(struct arm_smmu_device *smmu) +static int __maybe_unused arm_smmu_cmdq_issue_sync(struct arm_smmu_device *smmu) { return arm_smmu_cmdq_issue_cmdlist(smmu, NULL, 0, true); }
+static int arm_smmu_cmdq_issue_cmd_with_sync(struct arm_smmu_device *smmu, + struct arm_smmu_cmdq_ent *ent) +{ + u64 cmd[CMDQ_ENT_DWORDS]; + + if (arm_smmu_cmdq_build_cmd(cmd, ent)) { + dev_warn(smmu->dev, "ignoring unknown CMDQ opcode 0x%x\n", + ent->opcode); + return -EINVAL; + } + + return arm_smmu_cmdq_issue_cmdlist(smmu, cmd, 1, true); +} + static void arm_smmu_cmdq_batch_add(struct arm_smmu_device *smmu, struct arm_smmu_cmdq_batch *cmds, struct arm_smmu_cmdq_ent *cmd) @@ -949,8 +963,7 @@ void arm_smmu_tlb_inv_asid(struct arm_smmu_device *smmu, u16 asid) .tlbi.asid = asid, };
- arm_smmu_cmdq_issue_cmd(smmu, &cmd); - arm_smmu_cmdq_issue_sync(smmu); + arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd); }
static void arm_smmu_sync_cd(struct arm_smmu_domain *smmu_domain, @@ -1246,8 +1259,7 @@ static void arm_smmu_sync_ste_for_sid(struct arm_smmu_device *smmu, u32 sid) }, };
- arm_smmu_cmdq_issue_cmd(smmu, &cmd); - arm_smmu_cmdq_issue_sync(smmu); + arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd); }
static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid, @@ -2047,8 +2059,7 @@ static void __arm_smmu_tlb_inv_context(struct arm_smmu_domain *smmu_domain, } else { cmd.opcode = CMDQ_OP_TLBI_S12_VMALL; cmd.tlbi.vmid = smmu_domain->s2_cfg.vmid; - arm_smmu_cmdq_issue_cmd(smmu, &cmd); - arm_smmu_cmdq_issue_sync(smmu); + arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd); } if (smmu_domain->parent) arm_smmu_atc_inv_domain(smmu_domain->parent, smmu_domain->ssid, @@ -4387,18 +4398,16 @@ static int arm_smmu_device_reset(struct arm_smmu_device *smmu, bool bypass)
/* Invalidate any cached configuration */ cmd.opcode = CMDQ_OP_CFGI_ALL; - arm_smmu_cmdq_issue_cmd(smmu, &cmd); - arm_smmu_cmdq_issue_sync(smmu); + arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
/* Invalidate any stale TLB entries */ if (smmu->features & ARM_SMMU_FEAT_HYP) { cmd.opcode = CMDQ_OP_TLBI_EL2_ALL; - arm_smmu_cmdq_issue_cmd(smmu, &cmd); + arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd); }
cmd.opcode = CMDQ_OP_TLBI_NSNH_ALL; - arm_smmu_cmdq_issue_cmd(smmu, &cmd); - arm_smmu_cmdq_issue_sync(smmu); + arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
/* Event queue */ writeq_relaxed(smmu->evtq.q.q_base, smmu->base + ARM_SMMU_EVTQ_BASE);
From: Zhen Lei thunder.leizhen@huawei.com
hulk inclusion category: feature bugzilla: 174251 CVE: NA
-------------------------------------------------------------------------
One SMMU has only one normal CMDQ. Therefore, this CMDQ is used regardless of the core on which the command is inserted. It can be referenced directly through "smmu->cmdq". However, one SMMU has multiple ECMDQs, and the ECMDQ used by the core on which the command insertion is executed may be different. So the helper function arm_smmu_get_cmdq() is added, which returns the CMDQ/ECMDQ that the current core should use. Currently, the code that supports ECMDQ is not added. just simply returns "&smmu->cmdq".
Many subfunctions of arm_smmu_cmdq_issue_cmdlist() use "&smmu->cmdq" or "&smmu->cmdq.q" directly. To support ECMDQ, they need to call the newly added function arm_smmu_get_cmdq() instead.
Note that normal CMDQ is still required until ECMDQ is available.
Signed-off-by: Zhen Lei thunder.leizhen@huawei.com Reviewed-by: Hanjun Guo guohanjun@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com --- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 22 ++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-)
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index f1b6597b1b0e..cc607b871411 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -333,10 +333,14 @@ static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct arm_smmu_cmdq_ent *ent) return 0; }
+static struct arm_smmu_cmdq *arm_smmu_get_cmdq(struct arm_smmu_device *smmu) +{ + return &smmu->cmdq; +} + static void arm_smmu_cmdq_build_sync_cmd(u64 *cmd, struct arm_smmu_device *smmu, - u32 prod) + struct arm_smmu_queue *q, u32 prod) { - struct arm_smmu_queue *q = &smmu->cmdq.q; struct arm_smmu_cmdq_ent ent = { .opcode = CMDQ_OP_CMD_SYNC, }; @@ -576,7 +580,7 @@ static int arm_smmu_cmdq_poll_until_not_full(struct arm_smmu_device *smmu, { unsigned long flags; struct arm_smmu_queue_poll qp; - struct arm_smmu_cmdq *cmdq = &smmu->cmdq; + struct arm_smmu_cmdq *cmdq = arm_smmu_get_cmdq(smmu); int ret = 0;
/* @@ -592,7 +596,7 @@ static int arm_smmu_cmdq_poll_until_not_full(struct arm_smmu_device *smmu,
queue_poll_init(smmu, &qp); do { - llq->val = READ_ONCE(smmu->cmdq.q.llq.val); + llq->val = READ_ONCE(cmdq->q.llq.val); if (!queue_full(llq)) break;
@@ -611,7 +615,7 @@ static int __arm_smmu_cmdq_poll_until_msi(struct arm_smmu_device *smmu, { int ret = 0; struct arm_smmu_queue_poll qp; - struct arm_smmu_cmdq *cmdq = &smmu->cmdq; + struct arm_smmu_cmdq *cmdq = arm_smmu_get_cmdq(smmu); u32 *cmd = (u32 *)(Q_ENT(&cmdq->q, llq->prod));
queue_poll_init(smmu, &qp); @@ -634,12 +638,12 @@ static int __arm_smmu_cmdq_poll_until_consumed(struct arm_smmu_device *smmu, struct arm_smmu_ll_queue *llq) { struct arm_smmu_queue_poll qp; - struct arm_smmu_cmdq *cmdq = &smmu->cmdq; + struct arm_smmu_cmdq *cmdq = arm_smmu_get_cmdq(smmu); u32 prod = llq->prod; int ret = 0;
queue_poll_init(smmu, &qp); - llq->val = READ_ONCE(smmu->cmdq.q.llq.val); + llq->val = READ_ONCE(cmdq->q.llq.val); do { if (queue_consumed(llq, prod)) break; @@ -729,7 +733,7 @@ static int arm_smmu_cmdq_issue_cmdlist(struct arm_smmu_device *smmu, u32 prod; unsigned long flags; bool owner; - struct arm_smmu_cmdq *cmdq = &smmu->cmdq; + struct arm_smmu_cmdq *cmdq = arm_smmu_get_cmdq(smmu); struct arm_smmu_ll_queue llq = { .max_n_shift = cmdq->q.llq.max_n_shift, }, head = llq; @@ -769,7 +773,7 @@ static int arm_smmu_cmdq_issue_cmdlist(struct arm_smmu_device *smmu, arm_smmu_cmdq_write_entries(cmdq, cmds, llq.prod, n); if (sync) { prod = queue_inc_prod_n(&llq, n); - arm_smmu_cmdq_build_sync_cmd(cmd_sync, smmu, prod); + arm_smmu_cmdq_build_sync_cmd(cmd_sync, smmu, &cmdq->q, prod); queue_write(Q_ENT(&cmdq->q, prod), cmd_sync, CMDQ_ENT_DWORDS);
/*
From: Zhen Lei thunder.leizhen@huawei.com
hulk inclusion category: feature bugzilla: 174251 CVE: NA
-------------------------------------------------------------------------
When SMMU_GERROR.CMDQP_ERR is different to SMMU_GERRORN.CMDQP_ERR, it indicates that one or more errors have been encountered on a command queue control page interface. We need to traverse all ECMDQs in that control page to find all errors. For each ECMDQ error handling, it is much the same as the CMDQ error handling. This common processing part is extracted as a new function __arm_smmu_cmdq_skip_err().
Signed-off-by: Zhen Lei thunder.leizhen@huawei.com Reviewed-by: Hanjun Guo guohanjun@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com --- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-)
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index cc607b871411..56cc65203232 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -357,7 +357,8 @@ static void arm_smmu_cmdq_build_sync_cmd(u64 *cmd, struct arm_smmu_device *smmu, arm_smmu_cmdq_build_cmd(cmd, &ent); }
-static void arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu) +static void __arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu, + struct arm_smmu_queue *q) { static const char *cerror_str[] = { [CMDQ_ERR_CERROR_NONE_IDX] = "No error", @@ -368,7 +369,6 @@ static void arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu)
int i; u64 cmd[CMDQ_ENT_DWORDS]; - struct arm_smmu_queue *q = &smmu->cmdq.q; u32 cons = readl_relaxed(q->cons_reg); u32 idx = FIELD_GET(CMDQ_CONS_ERR, cons); struct arm_smmu_cmdq_ent cmd_sync = { @@ -414,6 +414,11 @@ static void arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu) queue_write(Q_ENT(q, cons), cmd, q->ent_dwords); }
+static void arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu) +{ + __arm_smmu_cmdq_skip_err(smmu, &smmu->cmdq.q); +} + /* * Command queue locking. * This is a form of bastardised rwlock with the following major changes:
From: Zhen Lei thunder.leizhen@huawei.com
hulk inclusion category: feature bugzilla: 174251 CVE: NA
-------------------------------------------------------------------------
Ensure that each core exclusively occupies an ECMDQ and all of them are enabled during initialization. During this initialization process, any errors will result in a fallback to using normal CMDQ.
When GERROR is triggered by ECMDQ, all ECMDQs need to be traversed: the ECMDQs with errors will be processed and the ECMDQs without errors will be skipped directly.
Compared with register SMMU_CMDQ_PROD, register SMMU_ECMDQ_PROD has one more 'EN' bit and one more 'ERRACK' bit. Therefore, an extra member 'ecmdq_prod' is added to record the values of these two bits. Each time register SMMU_ECMDQ_PROD is updated, the value of 'ecmdq_prod' is ORed. After the error indicated by SMMU_GERROR.CMDQP_ERR is fixed, the 'ERRACK' bit needs to be toggled to resume the corresponding ECMDQ. Therefore, a rwlock is used to protect the write operation to bit 'ERRACK' during error handling and the read operation to bit 'ERRACK' during command insertion.
Signed-off-by: Zhen Lei thunder.leizhen@huawei.com Reviewed-by: Hanjun Guo guohanjun@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com --- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 210 +++++++++++++++++++- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h | 36 ++++ 2 files changed, 245 insertions(+), 1 deletion(-)
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index 56cc65203232..5bf044dfa1f7 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -335,6 +335,14 @@ static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct arm_smmu_cmdq_ent *ent)
static struct arm_smmu_cmdq *arm_smmu_get_cmdq(struct arm_smmu_device *smmu) { + if (smmu->ecmdq_enabled) { + struct arm_smmu_ecmdq *ecmdq; + + ecmdq = *this_cpu_ptr(smmu->ecmdq); + + return &ecmdq->cmdq; + } + return &smmu->cmdq; }
@@ -419,6 +427,38 @@ static void arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu) __arm_smmu_cmdq_skip_err(smmu, &smmu->cmdq.q); }
+static void arm_smmu_ecmdq_skip_err(struct arm_smmu_device *smmu) +{ + int i; + u32 prod, cons; + struct arm_smmu_queue *q; + struct arm_smmu_ecmdq *ecmdq; + + for (i = 0; i < smmu->nr_ecmdq; i++) { + unsigned long flags; + + ecmdq = *per_cpu_ptr(smmu->ecmdq, i); + q = &ecmdq->cmdq.q; + + prod = readl_relaxed(q->prod_reg); + cons = readl_relaxed(q->cons_reg); + if (((prod ^ cons) & ECMDQ_CONS_ERR) == 0) + continue; + + __arm_smmu_cmdq_skip_err(smmu, q); + + write_lock_irqsave(&q->ecmdq_lock, flags); + q->ecmdq_prod &= ~ECMDQ_PROD_ERRACK; + q->ecmdq_prod |= cons & ECMDQ_CONS_ERR; + + prod = readl_relaxed(q->prod_reg); + prod &= ~ECMDQ_PROD_ERRACK; + prod |= cons & ECMDQ_CONS_ERR; + writel(prod, q->prod_reg); + write_unlock_irqrestore(&q->ecmdq_lock, flags); + } +} + /* * Command queue locking. * This is a form of bastardised rwlock with the following major changes: @@ -815,7 +855,13 @@ static int arm_smmu_cmdq_issue_cmdlist(struct arm_smmu_device *smmu, * d. Advance the hardware prod pointer * Control dependency ordering from the entries becoming valid. */ - writel_relaxed(prod, cmdq->q.prod_reg); + if (smmu->ecmdq_enabled) { + read_lock(&cmdq->q.ecmdq_lock); + writel_relaxed(prod | cmdq->q.ecmdq_prod, cmdq->q.prod_reg); + read_unlock(&cmdq->q.ecmdq_lock); + } else { + writel_relaxed(prod, cmdq->q.prod_reg); + }
/* * e. Tell the next owner we're done @@ -1893,6 +1939,9 @@ static irqreturn_t arm_smmu_gerror_handler(int irq, void *dev) if (active & GERROR_CMDQ_ERR) arm_smmu_cmdq_skip_err(smmu);
+ if (active & GERROR_CMDQP_ERR) + arm_smmu_ecmdq_skip_err(smmu); + writel(gerror, smmu->base + ARM_SMMU_GERRORN); return IRQ_HANDLED; } @@ -3976,6 +4025,20 @@ static int arm_smmu_cmdq_init(struct arm_smmu_device *smmu) return ret; }
+static int arm_smmu_ecmdq_init(struct arm_smmu_cmdq *cmdq) +{ + unsigned int nents = 1 << cmdq->q.llq.max_n_shift; + + atomic_set(&cmdq->owner_prod, 0); + atomic_set(&cmdq->lock, 0); + + cmdq->valid_map = (atomic_long_t *)bitmap_zalloc(nents, GFP_KERNEL); + if (!cmdq->valid_map) + return -ENOMEM; + + return 0; +} + static int arm_smmu_init_queues(struct arm_smmu_device *smmu) { int ret; @@ -4350,6 +4413,7 @@ static int arm_smmu_device_disable(struct arm_smmu_device *smmu)
static int arm_smmu_device_reset(struct arm_smmu_device *smmu, bool bypass) { + int i; int ret; u32 reg, enables; struct arm_smmu_cmdq_ent cmd; @@ -4397,6 +4461,28 @@ static int arm_smmu_device_reset(struct arm_smmu_device *smmu, bool bypass) writel_relaxed(smmu->cmdq.q.llq.prod, smmu->base + ARM_SMMU_CMDQ_PROD); writel_relaxed(smmu->cmdq.q.llq.cons, smmu->base + ARM_SMMU_CMDQ_CONS);
+ for (i = 0; i < smmu->nr_ecmdq; i++) { + struct arm_smmu_ecmdq *ecmdq; + struct arm_smmu_queue *q; + + ecmdq = *per_cpu_ptr(smmu->ecmdq, i); + q = &ecmdq->cmdq.q; + + writeq_relaxed(q->q_base, ecmdq->base + ARM_SMMU_ECMDQ_BASE); + writel_relaxed(q->llq.prod, ecmdq->base + ARM_SMMU_ECMDQ_PROD); + writel_relaxed(q->llq.cons, ecmdq->base + ARM_SMMU_ECMDQ_CONS); + + /* enable ecmdq */ + writel(ECMDQ_PROD_EN, q->prod_reg); + ret = readl_relaxed_poll_timeout(q->cons_reg, reg, reg & ECMDQ_CONS_ENACK, + 1, ARM_SMMU_POLL_TIMEOUT_US); + if (ret) { + dev_err(smmu->dev, "ecmdq[%d] enable failed\n", i); + smmu->ecmdq_enabled = 0; + break; + } + } + enables = CR0_CMDQEN; ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0, ARM_SMMU_CR0ACK); @@ -4486,6 +4572,115 @@ static int arm_smmu_device_reset(struct arm_smmu_device *smmu, bool bypass) return 0; }
+static int arm_smmu_ecmdq_layout(struct arm_smmu_device *smmu) +{ + int cpu; + struct arm_smmu_ecmdq *ecmdq; + + if (num_possible_cpus() <= smmu->nr_ecmdq) { + ecmdq = devm_alloc_percpu(smmu->dev, *ecmdq); + if (!ecmdq) + return -ENOMEM; + + for_each_possible_cpu(cpu) + *per_cpu_ptr(smmu->ecmdq, cpu) = per_cpu_ptr(ecmdq, cpu); + + /* A core requires at most one ECMDQ */ + smmu->nr_ecmdq = num_possible_cpus(); + + return 0; + } + + return -ENOSPC; +} + +static int arm_smmu_ecmdq_probe(struct arm_smmu_device *smmu) +{ + int ret, cpu; + u32 i, nump, numq, gap; + u32 reg, shift_increment; + u64 addr, smmu_dma_base; + void __iomem *cp_regs, *cp_base; + + /* IDR6 */ + reg = readl_relaxed(smmu->base + ARM_SMMU_IDR6); + nump = 1 << FIELD_GET(IDR6_LOG2NUMP, reg); + numq = 1 << FIELD_GET(IDR6_LOG2NUMQ, reg); + smmu->nr_ecmdq = nump * numq; + gap = ECMDQ_CP_RRESET_SIZE >> FIELD_GET(IDR6_LOG2NUMQ, reg); + + smmu_dma_base = (vmalloc_to_pfn(smmu->base) << PAGE_SHIFT); + cp_regs = ioremap(smmu_dma_base + ARM_SMMU_ECMDQ_CP_BASE, PAGE_SIZE); + if (!cp_regs) + return -ENOMEM; + + for (i = 0; i < nump; i++) { + u64 val, pre_addr; + + val = readq_relaxed(cp_regs + 32 * i); + if (!(val & ECMDQ_CP_PRESET)) { + iounmap(cp_regs); + dev_err(smmu->dev, "ecmdq control page %u is memory mode\n", i); + return -EFAULT; + } + + if (i && ((val & ECMDQ_CP_ADDR) != (pre_addr + ECMDQ_CP_RRESET_SIZE))) { + iounmap(cp_regs); + dev_err(smmu->dev, "ecmdq_cp memory region is not contiguous\n"); + return -EFAULT; + } + + pre_addr = val & ECMDQ_CP_ADDR; + } + + addr = readl_relaxed(cp_regs) & ECMDQ_CP_ADDR; + iounmap(cp_regs); + + cp_base = devm_ioremap(smmu->dev, smmu_dma_base + addr, ECMDQ_CP_RRESET_SIZE * nump); + if (!cp_base) + return -ENOMEM; + + smmu->ecmdq = devm_alloc_percpu(smmu->dev, struct arm_smmu_ecmdq *); + if (!smmu->ecmdq) + return -ENOMEM; + + ret = arm_smmu_ecmdq_layout(smmu); + if (ret) + return ret; + + shift_increment = order_base_2(num_possible_cpus() / smmu->nr_ecmdq); + + addr = 0; + for_each_possible_cpu(cpu) { + struct arm_smmu_ecmdq *ecmdq; + struct arm_smmu_queue *q; + + ecmdq = *per_cpu_ptr(smmu->ecmdq, cpu); + ecmdq->base = cp_base + addr; + + q = &ecmdq->cmdq.q; + + q->llq.max_n_shift = ECMDQ_MAX_SZ_SHIFT + shift_increment; + ret = arm_smmu_init_one_queue(smmu, q, ecmdq->base, ARM_SMMU_ECMDQ_PROD, + ARM_SMMU_ECMDQ_CONS, CMDQ_ENT_DWORDS, "ecmdq"); + if (ret) + return ret; + + q->ecmdq_prod = ECMDQ_PROD_EN; + rwlock_init(&q->ecmdq_lock); + + ret = arm_smmu_ecmdq_init(&ecmdq->cmdq); + if (ret) { + dev_err(smmu->dev, "ecmdq[%d] init failed\n", i); + return ret; + } + + addr += gap; + } + + return 0; +} + static void arm_smmu_get_httu(struct arm_smmu_device *smmu, u32 reg) { u32 fw_features = smmu->features & (ARM_SMMU_FEAT_HA | ARM_SMMU_FEAT_HD); @@ -4630,6 +4825,9 @@ static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu) return -ENXIO; }
+ if (reg & IDR1_ECMDQ) + smmu->features |= ARM_SMMU_FEAT_ECMDQ; + /* Queue sizes, capped to ensure natural alignment */ smmu->cmdq.q.llq.max_n_shift = min_t(u32, CMDQ_MAX_SZ_SHIFT, FIELD_GET(IDR1_CMDQS, reg)); @@ -4744,6 +4942,16 @@ static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu)
dev_info(smmu->dev, "ias %lu-bit, oas %lu-bit (features 0x%08x)\n", smmu->ias, smmu->oas, smmu->features); + + if (smmu->features & ARM_SMMU_FEAT_ECMDQ) { + int err; + + err = arm_smmu_ecmdq_probe(smmu); + if (err) { + dev_err(smmu->dev, "suppress ecmdq feature, errno=%d\n", err); + smmu->ecmdq_enabled = 0; + } + } return 0; }
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h index 927c9096c0b7..10acf2888a79 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h @@ -45,6 +45,7 @@ #define IDR0_S2P (1 << 0)
#define ARM_SMMU_IDR1 0x4 +#define IDR1_ECMDQ (1 << 31) #define IDR1_TABLES_PRESET (1 << 30) #define IDR1_QUEUES_PRESET (1 << 29) #define IDR1_REL (1 << 28) @@ -115,6 +116,7 @@ #define ARM_SMMU_IRQ_CTRLACK 0x54
#define ARM_SMMU_GERROR 0x60 +#define GERROR_CMDQP_ERR (1 << 9) #define GERROR_SFM_ERR (1 << 8) #define GERROR_MSI_GERROR_ABT_ERR (1 << 7) #define GERROR_MSI_PRIQ_ABT_ERR (1 << 6) @@ -160,6 +162,26 @@ #define ARM_SMMU_PRIQ_IRQ_CFG1 0xd8 #define ARM_SMMU_PRIQ_IRQ_CFG2 0xdc
+#define ARM_SMMU_IDR6 0x190 +#define IDR6_LOG2NUMP GENMASK(27, 24) +#define IDR6_LOG2NUMQ GENMASK(19, 16) +#define IDR6_BA_DOORBELLS GENMASK(9, 0) + +#define ARM_SMMU_ECMDQ_BASE 0x00 +#define ARM_SMMU_ECMDQ_PROD 0x08 +#define ARM_SMMU_ECMDQ_CONS 0x0c +#define ECMDQ_MAX_SZ_SHIFT 8 +#define ECMDQ_PROD_EN (1 << 31) +#define ECMDQ_CONS_ENACK (1 << 31) +#define ECMDQ_CONS_ERR (1 << 23) +#define ECMDQ_PROD_ERRACK (1 << 23) + +#define ARM_SMMU_ECMDQ_CP_BASE 0x4000 +#define ECMDQ_CP_ADDR GENMASK_ULL(51, 12) +#define ECMDQ_CP_CMDQGS GENMASK_ULL(2, 1) +#define ECMDQ_CP_PRESET (1UL << 0) +#define ECMDQ_CP_RRESET_SIZE 0x10000 + #define ARM_SMMU_REG_SZ 0xe00
/* Common MSI config fields */ @@ -540,6 +562,8 @@ struct arm_smmu_ll_queue { struct arm_smmu_queue { struct arm_smmu_ll_queue llq; int irq; /* Wired interrupt */ + u32 ecmdq_prod; + rwlock_t ecmdq_lock;
__le64 *base; dma_addr_t base_dma; @@ -565,6 +589,11 @@ struct arm_smmu_cmdq { atomic_t lock; };
+struct arm_smmu_ecmdq { + struct arm_smmu_cmdq cmdq; + void __iomem *base; +}; + struct arm_smmu_cmdq_batch { u64 cmds[CMDQ_BATCH_ENTRIES * CMDQ_ENT_DWORDS]; int num; @@ -667,6 +696,7 @@ struct arm_smmu_device { #define ARM_SMMU_FEAT_HD (1 << 20) #define ARM_SMMU_FEAT_BBML1 (1 << 21) #define ARM_SMMU_FEAT_BBML2 (1 << 22) +#define ARM_SMMU_FEAT_ECMDQ (1 << 23) u32 features;
#define ARM_SMMU_OPT_SKIP_PREFETCH (1 << 0) @@ -674,6 +704,12 @@ struct arm_smmu_device { #define ARM_SMMU_OPT_MSIPOLL (1 << 2) u32 options;
+ union { + u32 nr_ecmdq; + u32 ecmdq_enabled; + }; + struct arm_smmu_ecmdq *__percpu *ecmdq; + struct arm_smmu_cmdq cmdq; struct arm_smmu_evtq evtq; struct arm_smmu_priq priq;
From: Zhen Lei thunder.leizhen@huawei.com
hulk inclusion category: feature bugzilla: 174251 CVE: NA
-------------------------------------------------------------------------
The SYNC command only ensures that the command that precedes it in the same ECMDQ must be executed, but cannot synchronize the commands in other ECMDQs. If an unmap involves multiple commands, some commands are executed on one core, and the other commands are executed on another core. In this case, after the SYNC execution is complete, the execution of all preceded commands can not be ensured.
Prevent the process that performs a set of associated commands insertion from being migrated to other cores ensures that all commands are inserted into the same ECMDQ.
Signed-off-by: Zhen Lei thunder.leizhen@huawei.com Reviewed-by: Hanjun Guo guohanjun@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com --- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 40 +++++++++++++++++---- 1 file changed, 33 insertions(+), 7 deletions(-)
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index 5bf044dfa1f7..9c14d6828dd1 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -236,6 +236,18 @@ static int queue_remove_raw(struct arm_smmu_queue *q, u64 *ent) return 0; }
+static void arm_smmu_preempt_disable(struct arm_smmu_device *smmu) +{ + if (smmu->ecmdq_enabled) + preempt_disable(); +} + +static void arm_smmu_preempt_enable(struct arm_smmu_device *smmu) +{ + if (smmu->ecmdq_enabled) + preempt_enable(); +} + /* High-level queue accessors */ static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct arm_smmu_cmdq_ent *ent) { @@ -1037,6 +1049,7 @@ static void arm_smmu_sync_cd(struct arm_smmu_domain *smmu_domain, }, };
+ arm_smmu_preempt_disable(smmu); spin_lock_irqsave(&smmu_domain->devices_lock, flags); list_for_each_entry(master, &smmu_domain->devices, domain_head) { for (i = 0; i < master->num_streams; i++) { @@ -1047,6 +1060,7 @@ static void arm_smmu_sync_cd(struct arm_smmu_domain *smmu_domain, spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
arm_smmu_cmdq_batch_submit(smmu, &cmds); + arm_smmu_preempt_enable(smmu); }
static int arm_smmu_alloc_cd_leaf_table(struct arm_smmu_device *smmu, @@ -2032,30 +2046,36 @@ arm_smmu_atc_inv_to_cmd(int ssid, unsigned long iova, size_t size,
static int arm_smmu_atc_inv_master(struct arm_smmu_master *master, unsigned int ssid) { - int i; + int i, ret; struct arm_smmu_cmdq_ent cmd; struct arm_smmu_cmdq_batch cmds = {}; + struct arm_smmu_device *smmu = master->smmu;
arm_smmu_atc_inv_to_cmd(ssid, 0, 0, &cmd);
+ arm_smmu_preempt_disable(smmu); for (i = 0; i < master->num_streams; i++) { cmd.atc.sid = master->streams[i].id; - arm_smmu_cmdq_batch_add(master->smmu, &cmds, &cmd); + arm_smmu_cmdq_batch_add(smmu, &cmds, &cmd); }
- return arm_smmu_cmdq_batch_submit(master->smmu, &cmds); + ret = arm_smmu_cmdq_batch_submit(smmu, &cmds); + arm_smmu_preempt_enable(smmu); + + return ret; }
int arm_smmu_atc_inv_domain(struct arm_smmu_domain *smmu_domain, int ssid, unsigned long iova, size_t size) { - int i; + int i, ret; unsigned long flags; struct arm_smmu_cmdq_ent cmd; struct arm_smmu_master *master; struct arm_smmu_cmdq_batch cmds = {}; + struct arm_smmu_device *smmu = smmu_domain->smmu;
- if (!(smmu_domain->smmu->features & ARM_SMMU_FEAT_ATS)) + if (!(smmu->features & ARM_SMMU_FEAT_ATS)) return 0;
/* @@ -2077,6 +2097,7 @@ int arm_smmu_atc_inv_domain(struct arm_smmu_domain *smmu_domain, int ssid,
arm_smmu_atc_inv_to_cmd(ssid, iova, size, &cmd);
+ arm_smmu_preempt_disable(smmu); spin_lock_irqsave(&smmu_domain->devices_lock, flags); list_for_each_entry(master, &smmu_domain->devices, domain_head) { if (!master->ats_enabled) @@ -2084,12 +2105,15 @@ int arm_smmu_atc_inv_domain(struct arm_smmu_domain *smmu_domain, int ssid,
for (i = 0; i < master->num_streams; i++) { cmd.atc.sid = master->streams[i].id; - arm_smmu_cmdq_batch_add(smmu_domain->smmu, &cmds, &cmd); + arm_smmu_cmdq_batch_add(smmu, &cmds, &cmd); } } spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
- return arm_smmu_cmdq_batch_submit(smmu_domain->smmu, &cmds); + ret = arm_smmu_cmdq_batch_submit(smmu, &cmds); + arm_smmu_preempt_enable(smmu); + + return ret; }
/* IO_PGTABLE API */ @@ -2170,6 +2194,7 @@ static void __arm_smmu_tlb_inv_range(struct arm_smmu_cmdq_ent *cmd, num_pages = size >> tg; }
+ arm_smmu_preempt_disable(smmu); while (iova < end) { if (smmu->features & ARM_SMMU_FEAT_RANGE_INV) { /* @@ -2201,6 +2226,7 @@ static void __arm_smmu_tlb_inv_range(struct arm_smmu_cmdq_ent *cmd, iova += inv_range; } arm_smmu_cmdq_batch_submit(smmu, &cmds); + arm_smmu_preempt_enable(smmu); }
static void
From: Zhen Lei thunder.leizhen@huawei.com
hulk inclusion category: feature bugzilla: 174251 CVE: NA
-------------------------------------------------------------------------
When a core can exclusively own an ECMDQ, competition with other cores does not need to be considered during command insertion. Therefore, we can delete the part of arm_smmu_cmdq_issue_cmdlist() that deals with multi-core contention and generate a more efficient ECMDQ-specific function arm_smmu_ecmdq_issue_cmdlist().
Signed-off-by: Zhen Lei thunder.leizhen@huawei.com Reviewed-by: Hanjun Guo guohanjun@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com --- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 85 +++++++++++++++++++++ drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h | 1 + 2 files changed, 86 insertions(+)
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index 9c14d6828dd1..9613c0e4e2d8 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -767,6 +767,87 @@ static void arm_smmu_cmdq_write_entries(struct arm_smmu_cmdq *cmdq, u64 *cmds, } }
+/* + * The function is used when the current core exclusively occupies an ECMDQ. + * This is a reduced version of arm_smmu_cmdq_issue_cmdlist(), which eliminates + * a lot of unnecessary inter-core competition considerations. + */ +static int arm_smmu_ecmdq_issue_cmdlist(struct arm_smmu_device *smmu, + struct arm_smmu_cmdq *cmdq, + u64 *cmds, int n, bool sync) +{ + u32 prod; + unsigned long flags; + struct arm_smmu_ll_queue llq = { + .max_n_shift = cmdq->q.llq.max_n_shift, + }, head; + int ret = 0; + + /* 1. Allocate some space in the queue */ + local_irq_save(flags); + llq.val = READ_ONCE(cmdq->q.llq.val); + do { + u64 old; + + while (!queue_has_space(&llq, n + sync)) { + local_irq_restore(flags); + if (arm_smmu_cmdq_poll_until_not_full(smmu, &llq)) + dev_err_ratelimited(smmu->dev, "ECMDQ timeout\n"); + local_irq_save(flags); + } + + head.cons = llq.cons; + head.prod = queue_inc_prod_n(&llq, n + sync); + + old = cmpxchg_relaxed(&cmdq->q.llq.val, llq.val, head.val); + if (old == llq.val) + break; + + llq.val = old; + } while (1); + + /* 2. Write our commands into the queue */ + arm_smmu_cmdq_write_entries(cmdq, cmds, llq.prod, n); + if (sync) { + u64 cmd_sync[CMDQ_ENT_DWORDS]; + + prod = queue_inc_prod_n(&llq, n); + arm_smmu_cmdq_build_sync_cmd(cmd_sync, smmu, &cmdq->q, prod); + queue_write(Q_ENT(&cmdq->q, prod), cmd_sync, CMDQ_ENT_DWORDS); + } + + /* 3. Ensuring commands are visible first */ + dma_wmb(); + + /* 4. Advance the hardware prod pointer */ + read_lock(&cmdq->q.ecmdq_lock); + writel_relaxed(head.prod | cmdq->q.ecmdq_prod, cmdq->q.prod_reg); + read_unlock(&cmdq->q.ecmdq_lock); + + /* 5. If we are inserting a CMD_SYNC, we must wait for it to complete */ + if (sync) { + llq.prod = queue_inc_prod_n(&llq, n); + ret = arm_smmu_cmdq_poll_until_sync(smmu, &llq); + if (ret) { + dev_err_ratelimited(smmu->dev, + "CMD_SYNC timeout at 0x%08x [hwprod 0x%08x, hwcons 0x%08x]\n", + llq.prod, + readl_relaxed(cmdq->q.prod_reg), + readl_relaxed(cmdq->q.cons_reg)); + } + + /* + * Update cmdq->q.llq.cons, to improve the success rate of + * queue_has_space() when some new commands are inserted next + * time. + */ + WRITE_ONCE(cmdq->q.llq.cons, llq.cons); + } + + local_irq_restore(flags); + return ret; +} + /* * This is the actual insertion function, and provides the following * ordering guarantees to callers: @@ -796,6 +877,9 @@ static int arm_smmu_cmdq_issue_cmdlist(struct arm_smmu_device *smmu, }, head = llq; int ret = 0;
+ if (!cmdq->shared) + return arm_smmu_ecmdq_issue_cmdlist(smmu, cmdq, cmds, n, sync); + /* 1. Allocate some space in the queue */ local_irq_save(flags); llq.val = READ_ONCE(cmdq->q.llq.val); @@ -4036,6 +4120,7 @@ static int arm_smmu_cmdq_init(struct arm_smmu_device *smmu) unsigned int nents = 1 << cmdq->q.llq.max_n_shift; atomic_long_t *bitmap;
+ cmdq->shared = 1; atomic_set(&cmdq->owner_prod, 0); atomic_set(&cmdq->lock, 0);
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h index 10acf2888a79..785451c62730 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h @@ -587,6 +587,7 @@ struct arm_smmu_cmdq { atomic_long_t *valid_map; atomic_t owner_prod; atomic_t lock; + int shared; };
struct arm_smmu_ecmdq {
From: Zhen Lei thunder.leizhen@huawei.com
hulk inclusion category: feature bugzilla: 174251 CVE: NA
-------------------------------------------------------------------------
Due to limited hardware resources, the number of ECMDQs may be less than the number of cores. If the number of ECMDQs is greater than the number of numa nodes, ensure that each node has at least one ECMDQ. This is because ECMDQ queue memory is requested from the NUMA node where it resides, which may result in better command filling and insertion performance.
The current ECMDQ implementation reuses the command insertion function arm_smmu_cmdq_issue_cmdlist() of the normal CMDQ. This function already supports multiple cores concurrent insertion commands.
Signed-off-by: Zhen Lei thunder.leizhen@huawei.com Reviewed-by: Hanjun Guo guohanjun@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com --- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 101 ++++++++++++++++++-- 1 file changed, 92 insertions(+), 9 deletions(-)
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index 9613c0e4e2d8..311ec8fe4410 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -4685,14 +4685,15 @@ static int arm_smmu_device_reset(struct arm_smmu_device *smmu, bool bypass)
static int arm_smmu_ecmdq_layout(struct arm_smmu_device *smmu) { - int cpu; - struct arm_smmu_ecmdq *ecmdq; + int cpu, node, nr_remain, nr_nodes = 0; + int *nr_ecmdqs; + struct arm_smmu_ecmdq *ecmdq, **ecmdqs;
- if (num_possible_cpus() <= smmu->nr_ecmdq) { - ecmdq = devm_alloc_percpu(smmu->dev, *ecmdq); - if (!ecmdq) - return -ENOMEM; + ecmdq = devm_alloc_percpu(smmu->dev, *ecmdq); + if (!ecmdq) + return -ENOMEM;
+ if (num_possible_cpus() <= smmu->nr_ecmdq) { for_each_possible_cpu(cpu) *per_cpu_ptr(smmu->ecmdq, cpu) = per_cpu_ptr(ecmdq, cpu);
@@ -4702,7 +4703,79 @@ static int arm_smmu_ecmdq_layout(struct arm_smmu_device *smmu) return 0; }
- return -ENOSPC; + for_each_node(node) + if (nr_cpus_node(node)) + nr_nodes++; + + if (nr_nodes >= smmu->nr_ecmdq) { + dev_err(smmu->dev, "%d ECMDQs is less than %d nodes\n", smmu->nr_ecmdq, nr_nodes); + return -ENOSPC; + } + + nr_ecmdqs = kcalloc(MAX_NUMNODES, sizeof(int), GFP_KERNEL); + if (!nr_ecmdqs) + return -ENOMEM; + + ecmdqs = kcalloc(smmu->nr_ecmdq, sizeof(*ecmdqs), GFP_KERNEL); + if (!ecmdqs) { + kfree(nr_ecmdqs); + return -ENOMEM; + } + + /* [1] Ensure that each node has at least one ECMDQ */ + nr_remain = smmu->nr_ecmdq - nr_nodes; + for_each_node(node) { + /* + * Calculate the number of ECMDQs to be allocated to this node. + * NR_ECMDQS_PER_CPU = nr_remain / num_possible_cpus(); + * When nr_cpus_node(node) is not zero, less than one ECMDQ + * may be left due to truncation rounding. + */ + nr_ecmdqs[node] = nr_cpus_node(node) * nr_remain / num_possible_cpus(); + nr_remain -= nr_ecmdqs[node]; + } + + /* Divide the remaining ECMDQs */ + while (nr_remain) { + for_each_node(node) { + if (!nr_remain) + break; + + if (nr_ecmdqs[node] >= nr_cpus_node(node)) + continue; + + nr_ecmdqs[node]++; + nr_remain--; + } + } + + for_each_node(node) { + int i, round, shared = 0; + + if (!nr_cpus_node(node)) + continue; + + /* An ECMDQ has been reserved for each node at above [1] */ + nr_ecmdqs[node]++; + + if (nr_ecmdqs[node] < nr_cpus_node(node)) + shared = 1; + + i = 0; + for_each_cpu(cpu, cpumask_of_node(node)) { + round = i % nr_ecmdqs[node]; + if (i++ < nr_ecmdqs[node]) { + ecmdqs[round] = per_cpu_ptr(ecmdq, cpu); + ecmdqs[round]->cmdq.shared = shared; + } + *per_cpu_ptr(smmu->ecmdq, cpu) = ecmdqs[round]; + } + } + + kfree(nr_ecmdqs); + kfree(ecmdqs); + + return 0; }
static int arm_smmu_ecmdq_probe(struct arm_smmu_device *smmu) @@ -4767,10 +4840,20 @@ static int arm_smmu_ecmdq_probe(struct arm_smmu_device *smmu) struct arm_smmu_queue *q;
ecmdq = *per_cpu_ptr(smmu->ecmdq, cpu); - ecmdq->base = cp_base + addr; - q = &ecmdq->cmdq.q;
+ /* + * The boot option "maxcpus=" can limit the number of online + * CPUs. The CPUs that are not selected are not showed in + * cpumask_of_node(node), their 'ecmdq' may be NULL. + * + * (q->ecmdq_prod & ECMDQ_PROD_EN) indicates that the ECMDQ is + * shared by multiple cores and has been initialized. + */ + if (!ecmdq || (q->ecmdq_prod & ECMDQ_PROD_EN)) + continue; + ecmdq->base = cp_base + addr; + q->llq.max_n_shift = ECMDQ_MAX_SZ_SHIFT + shift_increment; ret = arm_smmu_init_one_queue(smmu, q, ecmdq->base, ARM_SMMU_ECMDQ_PROD, ARM_SMMU_ECMDQ_CONS, CMDQ_ENT_DWORDS, "ecmdq");
From: Chengwen Feng fengchengwen@huawei.com
mainline inclusion from mainline-master commit 1b713d14dc3c077ec45e65dab4ea01a8bc41b8c1 category: bugfix bugzilla: 173966 CVE: NA
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i...
----------------------------------------------------------------------
Currently, the mailbox synchronous communication between VF and PF use the following fields to maintain communication: 1. Origin_mbx_msg which was combined by message code and subcode, used to match request and response. 2. Received_resp which means whether received response.
There may possible mismatches of the following situation: 1. VF sends message A with code=1 subcode=1. 2. PF was blocked about 500ms when processing the message A. 3. VF will detect message A timeout because it can't get the response within 500ms. 4. VF sends message B with code=1 subcode=1 which equal message A. 5. PF processes the first message A and send the response message to VF. 6. VF will identify the response matched the message B because the code/subcode is the same. This will lead to mismatch of request and response.
To fix the above bug, we use the following scheme: 1. The message sent from VF was labelled with match_id which was a unique 16-bit non-zero value. 2. The response sent from PF will label with match_id which got from the request. 3. The VF uses the match_id to match request and response message.
As for PF driver, it only needs to copy the match_id from request to response.
Fixes: dde1a86e93ca ("net: hns3: Add mailbox support to PF driver") Signed-off-by: Chengwen Feng fengchengwen@huawei.com Signed-off-by: Guangbin Huang huangguangbin2@huawei.com Signed-off-by: Jakub Kicinski kuba@kernel.org Reviewed-by: Yongxin Li liyongxin1@huawei.com Signed-off-by: Junxin Chen chenjunxin1@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com --- drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h | 6 ++++-- drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c | 1 + 2 files changed, 5 insertions(+), 2 deletions(-)
diff --git a/drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h b/drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h index 0a6cda309b24..56b573e47072 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h +++ b/drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h @@ -143,7 +143,8 @@ struct hclge_mbx_vf_to_pf_cmd { u8 mbx_need_resp; u8 rsv1[1]; u8 msg_len; - u8 rsv2[3]; + u8 rsv2; + u16 match_id; struct hclge_vf_to_pf_msg msg; };
@@ -153,7 +154,8 @@ struct hclge_mbx_pf_to_vf_cmd { u8 dest_vfid; u8 rsv[3]; u8 msg_len; - u8 rsv1[3]; + u8 rsv1; + u16 match_id; struct hclge_pf_to_vf_msg msg; };
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c index e10a2c36b706..c0a478ae9583 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c @@ -47,6 +47,7 @@ static int hclge_gen_resp_to_vf(struct hclge_vport *vport,
resp_pf_to_vf->dest_vfid = vf_to_pf_req->mbx_src_vfid; resp_pf_to_vf->msg_len = vf_to_pf_req->msg_len; + resp_pf_to_vf->match_id = vf_to_pf_req->match_id;
resp_pf_to_vf->msg.code = HCLGE_MBX_PF_VF_RESP; resp_pf_to_vf->msg.vf_mbx_msg_code = vf_to_pf_req->msg.code;
From: Peng Li lipeng321@huawei.com
mainline inclusion from mainline-master commit 4671042f1ef0d37137884811afcc4ae67685ce07 category: feature bugzilla: 173966 CVE: NA
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i...
----------------------------------------------------------------------
When VF need response from PF, VF will wait (1us - 1s) to receive the response, or it will wait timeout and the VF action fails. If VF do not receive response in 1st action because timeout, the 2nd action may receive response for the 1st action, and get incorrect response data.VF must reciveve the right response from PF,or it will cause unexpected error.
This patch adds match_id to check mailbox response from PF to VF, to make sure VF get the right response: 1. The message sent from VF was labelled with match_id which was a unique 16-bit non-zero value. 2. The response sent from PF will label with match_id which got from the request. 3. The VF uses the match_id to match request and response message.
This scheme depends on PF driver supports match_id, if PF driver doesn't support then VF will uses the original scheme.
Signed-off-by: Peng Li lipeng321@huawei.com Signed-off-by: Guangbin Huang huangguangbin2@huawei.com Signed-off-by: Jakub Kicinski kuba@kernel.org Reviewed-by: Yongxin Li liyongxin1@huawei.com Signed-off-by: Junxin Chen chenjunxin1@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com --- .../net/ethernet/hisilicon/hns3/hclge_mbx.h | 1 + .../hisilicon/hns3/hns3vf/hclgevf_mbx.c | 19 +++++++++++++++++++ 2 files changed, 20 insertions(+)
diff --git a/drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h b/drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h index 56b573e47072..aa86a81c8f4a 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h +++ b/drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h @@ -98,6 +98,7 @@ struct hclgevf_mbx_resp_status { u32 origin_mbx_msg; bool received_resp; int resp_status; + u16 match_id; u8 additional_info[HCLGE_MBX_MAX_RESP_DATA_SIZE]; };
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_mbx.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_mbx.c index 9b17735b9f4c..772b2f8acd2e 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_mbx.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_mbx.c @@ -13,6 +13,7 @@ static int hclgevf_resp_to_errno(u16 resp_code) return resp_code ? -resp_code : 0; }
+#define HCLGEVF_MBX_MATCH_ID_START 1 static void hclgevf_reset_mbx_resp_status(struct hclgevf_dev *hdev) { /* this function should be called with mbx_resp.mbx_mutex held @@ -21,6 +22,10 @@ static void hclgevf_reset_mbx_resp_status(struct hclgevf_dev *hdev) hdev->mbx_resp.received_resp = false; hdev->mbx_resp.origin_mbx_msg = 0; hdev->mbx_resp.resp_status = 0; + hdev->mbx_resp.match_id++; + /* Update match_id and ensure the value of match_id is not zero */ + if (hdev->mbx_resp.match_id == 0) + hdev->mbx_resp.match_id = HCLGEVF_MBX_MATCH_ID_START; memset(hdev->mbx_resp.additional_info, 0, HCLGE_MBX_MAX_RESP_DATA_SIZE); }
@@ -115,6 +120,7 @@ int hclgevf_send_mbx_msg(struct hclgevf_dev *hdev, if (need_resp) { mutex_lock(&hdev->mbx_resp.mbx_mutex); hclgevf_reset_mbx_resp_status(hdev); + req->match_id = hdev->mbx_resp.match_id; status = hclgevf_cmd_send(&hdev->hw, &desc, 1); if (status) { dev_err(&hdev->pdev->dev, @@ -211,6 +217,19 @@ void hclgevf_mbx_handler(struct hclgevf_dev *hdev) resp->additional_info[i] = *temp; temp++; } + + /* If match_id is not zero, it means PF support + * match_id. If the match_id is right, VF get the + * right response, otherwise ignore the response. + * Driver will clear hdev->mbx_resp when send + * next message which need response. + */ + if (req->match_id) { + if (req->match_id == resp->match_id) + resp->received_resp = true; + } else { + resp->received_resp = true; + } break; case HCLGE_MBX_LINK_STAT_CHANGE: case HCLGE_MBX_ASSERTING_RESET:
From: Jian Shen shenjian15@huawei.com
mainline inclusion from mainline-master commit 184cd221a86321e53df9389c4b35a247b60c1e77 category: feature bugzilla: 173966 CVE: NA
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i...
----------------------------------------------------------------------
For hardware limitation, port VLAN filter is port level, and effective for all the functions of the port. So if not support port VLAN bypass, it's necessary to disable the port VLAN filter, in order to support function level VLAN filter control.
Fixes: 2ba306627f59 ("net: hns3: add support for modify VLAN filter state") Signed-off-by: Jian Shen shenjian15@huawei.com Signed-off-by: Guangbin Huang huangguangbin2@huawei.com Signed-off-by: Jakub Kicinski kuba@kernel.org Reviewed-by: Yongxin Li liyongxin1@huawei.com Signed-off-by: Junxin Chen chenjunxin1@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com --- drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-)
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index dd3354a57c62..ebeaf12e409b 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -9552,13 +9552,17 @@ static int hclge_set_vport_vlan_filter(struct hclge_vport *vport, bool enable) if (ret) return ret;
- if (test_bit(HNAE3_DEV_SUPPORT_PORT_VLAN_BYPASS_B, ae_dev->caps)) + if (test_bit(HNAE3_DEV_SUPPORT_PORT_VLAN_BYPASS_B, ae_dev->caps)) { ret = hclge_set_port_vlan_filter_bypass(hdev, vport->vport_id, !enable); - else if (!vport->vport_id) + } else if (!vport->vport_id) { + if (test_bit(HNAE3_DEV_SUPPORT_VLAN_FLTR_MDF_B, ae_dev->caps)) + enable = false; + ret = hclge_set_vlan_filter_ctrl(hdev, HCLGE_FILTER_TYPE_PORT, HCLGE_FILTER_FE_INGRESS, enable, 0); + }
return ret; }
From: Jian Shen shenjian15@huawei.com
mainline inclusion from mainline-v5.12-rc1-dontuse commit bbfd4506f962e7e6fff8f37f017154a3c3791264 category: bugfix bugzilla: 173966 CVE: NA
Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i...
----------------------------------------------------------------------
Currently, VF doesn't enable rx VLAN offload when initializating, and PF does it for VFs. If user disable the rx VLAN offload for VF with ethtool -K, and reload the VF driver, it may cause the rx VLAN offload state being inconsistent between hardware and software.
Fixes it by enabling rx VLAN offload when VF initializing.
Fixes: e2cb1dec9779 ("net: hns3: Add HNS3 VF HCL(Hardware Compatibility Layer) Support") Signed-off-by: Jian Shen shenjian15@huawei.com Signed-off-by: Guangbin Huang huangguangbin2@huawei.com Signed-off-by: Jakub Kicinski kuba@kernel.org Reviewed-by: Yongxin Li liyongxin1@huawei.com Signed-off-by: Junxin Chen chenjunxin1@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com --- .../net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c | 10 ++++++++++ 1 file changed, 10 insertions(+)
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c index 52eaf82b7cd7..8784d61e833f 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c @@ -2641,6 +2641,16 @@ static int hclgevf_rss_init_hw(struct hclgevf_dev *hdev)
static int hclgevf_init_vlan_config(struct hclgevf_dev *hdev) { + struct hnae3_handle *nic = &hdev->nic; + int ret; + + ret = hclgevf_en_hw_strip_rxvtag(nic, true); + if (ret) { + dev_err(&hdev->pdev->dev, + "failed to enable rx vlan offload, ret = %d\n", ret); + return ret; + } + return hclgevf_set_vlan_filter(&hdev->nic, htons(ETH_P_8021Q), 0, false); }
From: luochunsheng luochunsheng@huawei.com
euleros inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I42DAP CVE: NA
----------------------------------
arm64 cannot support virtualization pass-through feature when using the 3408iMR/3416iMR raid card. For solving the problem, we add two fuctions: 1.we prepare init bypass level2 entry for specific devices when smmu uses 2 level streamid entry. 2.we add smmu.bypassdev cmdline to allow SMMU bypass streams for some specific devices.
usage: 3408iMRraid: smmu.bypassdev=0x1000:0x17 3416iMRraid: smmu.bypassdev=0x1000:0x15
Signed-off-by: luochunshengluochunsheng@huawei.com Reviewed-by: Hanjun Guo guohanjun@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com --- drivers/iommu/Kconfig | 9 ++ drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 120 ++++++++++++++++++++ 2 files changed, 129 insertions(+)
diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig index d009889e154f..a65f1f835e7d 100644 --- a/drivers/iommu/Kconfig +++ b/drivers/iommu/Kconfig @@ -409,4 +409,13 @@ config VIRTIO_IOMMU
Say Y here if you intend to run this kernel as a guest.
+config SMMU_BYPASS_DEV + bool "SMMU bypass streams for some specific devices" + depends on ARM_SMMU_V3=y + help + according smmu.bypassdev cmdline, SMMU performs attribute + transformation only,with no address translation. + E.g:SMMU allow iMR3408/3416 Raid bypass at DMA default domain + to support other devices Virtualization through. + endif # IOMMU_SUPPORT diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index 311ec8fe4410..b9b2232b6b83 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -45,6 +45,42 @@ module_param(disable_msipolling, bool, 0444); MODULE_PARM_DESC(disable_msipolling, "Disable MSI-based polling for CMD_SYNC completion.");
+#ifdef CONFIG_SMMU_BYPASS_DEV +struct smmu_bypass_device { + unsigned short vendor; + unsigned short device; +}; +#define MAX_CMDLINE_SMMU_BYPASS_DEV 16 + +static struct smmu_bypass_device smmu_bypass_devices[MAX_CMDLINE_SMMU_BYPASS_DEV]; +static int smmu_bypass_devices_num; + +static int __init arm_smmu_bypass_dev_setup(char *str) +{ + unsigned short vendor; + unsigned short device; + int ret; + + if (!str) + return -EINVAL; + + ret = sscanf(str, "%hx:%hx", &vendor, &device); + if (ret != 2) + return -EINVAL; + + if (smmu_bypass_devices_num >= MAX_CMDLINE_SMMU_BYPASS_DEV) + return -ERANGE; + + smmu_bypass_devices[smmu_bypass_devices_num].vendor = vendor; + smmu_bypass_devices[smmu_bypass_devices_num].device = device; + smmu_bypass_devices_num++; + + return 0; +} + +__setup("smmu.bypassdev=", arm_smmu_bypass_dev_setup); +#endif + enum arm_smmu_msi_index { EVTQ_MSI_INDEX, GERROR_MSI_INDEX, @@ -4022,6 +4058,29 @@ static int arm_smmu_aux_get_pasid(struct iommu_domain *domain, struct device *de return smmu_domain->ssid ?: -EINVAL; }
+#ifdef CONFIG_SMMU_BYPASS_DEV +static int arm_smmu_device_domain_type(struct device *dev) +{ + int i; + struct pci_dev *pdev; + + if (!dev_is_pci(dev)) + return 0; + + pdev = to_pci_dev(dev); + for (i = 0; i < smmu_bypass_devices_num; i++) { + if ((smmu_bypass_devices[i].vendor == pdev->vendor) && + (smmu_bypass_devices[i].device == pdev->device)) { + dev_info(dev, "device 0x%hx:0x%hx uses identity mapping.", + pdev->vendor, pdev->device); + return IOMMU_DOMAIN_IDENTITY; + } + } + + return 0; +} +#endif + static struct iommu_ops arm_smmu_ops = { .capable = arm_smmu_capable, .domain_alloc = arm_smmu_domain_alloc, @@ -4060,6 +4119,9 @@ static struct iommu_ops arm_smmu_ops = { .aux_attach_dev = arm_smmu_aux_attach_dev, .aux_detach_dev = arm_smmu_aux_detach_dev, .aux_get_pasid = arm_smmu_aux_get_pasid, +#ifdef CONFIG_SMMU_BYPASS_DEV + .def_domain_type = arm_smmu_device_domain_type, +#endif .pgsize_bitmap = -1UL, /* Restricted during device attach */ };
@@ -4218,12 +4280,58 @@ static int arm_smmu_init_l1_strtab(struct arm_smmu_device *smmu) return 0; }
+#ifdef CONFIG_SMMU_BYPASS_DEV +static void arm_smmu_install_bypass_ste_for_dev(struct arm_smmu_device *smmu, + u32 sid) +{ + u64 val; + __le64 *step = arm_smmu_get_step_for_sid(smmu, sid); + + if (!step) + return; + + val = STRTAB_STE_0_V; + val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_BYPASS); + step[0] = cpu_to_le64(val); + step[1] = cpu_to_le64(FIELD_PREP(STRTAB_STE_1_SHCFG, + STRTAB_STE_1_SHCFG_INCOMING)); + step[2] = 0; +} + +static int arm_smmu_prepare_init_l2_strtab(struct device *dev, void *data) +{ + u32 sid; + int ret; + struct pci_dev *pdev; + struct arm_smmu_device *smmu = (struct arm_smmu_device *)data; + + if (!arm_smmu_device_domain_type(dev)) + return 0; + + pdev = to_pci_dev(dev); + sid = PCI_DEVID(pdev->bus->number, pdev->devfn); + if (!arm_smmu_sid_in_range(smmu, sid)) + return -ERANGE; + + ret = arm_smmu_init_l2_strtab(smmu, sid); + if (ret) + return ret; + + arm_smmu_install_bypass_ste_for_dev(smmu, sid); + + return 0; +} +#endif + static int arm_smmu_init_strtab_2lvl(struct arm_smmu_device *smmu) { void *strtab; u64 reg; u32 size, l1size; struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg; +#ifdef CONFIG_SMMU_BYPASS_DEV + int ret; +#endif
/* Calculate the L1 size, capped to the SIDSIZE. */ size = STRTAB_L1_SZ_SHIFT - (ilog2(STRTAB_L1_DESC_DWORDS) + 3); @@ -4252,8 +4360,20 @@ static int arm_smmu_init_strtab_2lvl(struct arm_smmu_device *smmu) reg |= FIELD_PREP(STRTAB_BASE_CFG_LOG2SIZE, size); reg |= FIELD_PREP(STRTAB_BASE_CFG_SPLIT, STRTAB_SPLIT); cfg->strtab_base_cfg = reg; +#ifdef CONFIG_SMMU_BYPASS_DEV + ret = arm_smmu_init_l1_strtab(smmu); + if (ret) + return ret; + + if (smmu_bypass_devices_num) { + ret = bus_for_each_dev(&pci_bus_type, NULL, (void *)smmu, + arm_smmu_prepare_init_l2_strtab); + }
+ return ret; +#else return arm_smmu_init_l1_strtab(smmu); +#endif }
static int arm_smmu_init_strtab_linear(struct arm_smmu_device *smmu)
euleros inclusion category: feature bugzilla: https://gitee.com/openeuler/kernel/issues/I42DAP CVE: NA
----------------------------------
Some manufacturers want to enable SMMU for using virtualization pass-through feature on the server with the 3408iMR/3416iMR RAID card installed.
Enable CONFIG_SMMU_BYPASS_DEV by default in openeuler_defconfig.
Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com Reviewed-by: Hanjun Guo guohanjun@huawei.com Signed-off-by: Zheng Zengkai zhengzengkai@huawei.com --- arch/arm64/configs/openeuler_defconfig | 1 + 1 file changed, 1 insertion(+)
diff --git a/arch/arm64/configs/openeuler_defconfig b/arch/arm64/configs/openeuler_defconfig index 248ceeefec7f..cb7ea0f4b282 100644 --- a/arch/arm64/configs/openeuler_defconfig +++ b/arch/arm64/configs/openeuler_defconfig @@ -5803,6 +5803,7 @@ CONFIG_ARM_SMMU=y CONFIG_ARM_SMMU_DISABLE_BYPASS_BY_DEFAULT=y CONFIG_ARM_SMMU_V3=y # CONFIG_ARM_SMMU_V3_SVA is not set +CONFIG_SMMU_BYPASS_DEV=y # CONFIG_QCOM_IOMMU is not set # CONFIG_VIRTIO_IOMMU is not set
On 2021/7/27 15:10, Zheng Zengkai wrote:
diff --git a/include/linux/irqchip/arm-gic-phytium-2500.h b/include/linux/irqchip/arm-gic-phytium-2500.h new file mode 100644 index 000000000000..96c5ca83f44d --- /dev/null +++ b/include/linux/irqchip/arm-gic-phytium-2500.h
I can't see any point of adding such a new header given that ...
+int phytium_its_cpu_init(void); +int phytium_its_init(struct fwnode_handle *handle, struct rdists *rdists,
struct irq_domain *domain);
... this is the only difference compared with arm-gic-v3.h _and_ if I read it correctly, phytium_its_{cpu_,}init() has exactly the same implementation with the upstream its_{cpu_,}init().
I hadn't looked into the details (as the huge diffstat already killed me ;-) ) so please feel free to ignore my random comments.