diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig index 3fd0a3888d22abd3b20593ec4fa7cd1b515b6be9..af725ea613a5c8654fc80341c625fcdd180f1b8f 100644 --- a/drivers/iommu/Kconfig +++ b/drivers/iommu/Kconfig @@ -423,6 +423,17 @@ config ARM_SMMU_V3_PM help Add support for suspend and resume support for arm smmu v3. +config ARM_SMMU_V3_ECMDQ + bool "Add arm_smmu_v3 ecmdq support" + depends on ARM_SMMU_V3 + default n + help + Add support for ARM_SMMU_V3 ECMDQ. One smmu can have multiple + ECMDQs which can be used to reduce competition when smmu try + to send commands. + + If not sure, say no. + config S390_IOMMU def_bool y if S390 && PCI depends on S390 && PCI diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index 25b5439ca6cfa87986ee0809e1826ea677ac73aa..143333f7dc92dd5ea592c9504388ecaddcafd031 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -41,6 +41,12 @@ module_param(disable_msipolling, bool, 0444); MODULE_PARM_DESC(disable_msipolling, "Disable MSI-based polling for CMD_SYNC completion."); +#ifdef CONFIG_ARM_SMMU_V3_ECMDQ +static bool disable_ecmdq; +module_param(disable_ecmdq, bool, 0444); +MODULE_PARM_DESC(disable_ecmdq, "Disable the use of ECMDQs"); +#endif + #ifdef CONFIG_SMMU_BYPASS_DEV struct smmu_bypass_device { unsigned short vendor; @@ -296,6 +302,24 @@ static int queue_remove_raw(struct arm_smmu_queue *q, u64 *ent) return 0; } +#ifdef CONFIG_ARM_SMMU_V3_ECMDQ +static void arm_smmu_preempt_disable(struct arm_smmu_device *smmu) +{ + if (smmu->ecmdq_enabled) + preempt_disable(); +} + +static void arm_smmu_preempt_enable(struct arm_smmu_device *smmu) +{ + if (smmu->ecmdq_enabled) + preempt_enable(); +} +#else +static void arm_smmu_preempt_disable(struct arm_smmu_device *smmu) {} +static void arm_smmu_preempt_enable(struct arm_smmu_device *smmu) {} +#endif + + /* High-level queue accessors */ static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct arm_smmu_cmdq_ent *ent) { @@ -400,6 +424,16 @@ static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct arm_smmu_cmdq_ent *ent) static struct arm_smmu_cmdq *arm_smmu_get_cmdq(struct arm_smmu_device *smmu) { +#ifdef CONFIG_ARM_SMMU_V3_ECMDQ + if (smmu->ecmdq_enabled) { + struct arm_smmu_ecmdq *ecmdq; + + ecmdq = *this_cpu_ptr(smmu->ecmdqs); + + return &ecmdq->cmdq; + } +#endif + return &smmu->cmdq; } @@ -482,6 +516,40 @@ static void arm_smmu_cmdq_skip_err(struct arm_smmu_device *smmu) __arm_smmu_cmdq_skip_err(smmu, &smmu->cmdq.q); } +#ifdef CONFIG_ARM_SMMU_V3_ECMDQ +static void arm_smmu_ecmdq_skip_err(struct arm_smmu_device *smmu) +{ + int i; + u32 prod, cons; + struct arm_smmu_queue *q; + struct arm_smmu_ecmdq *ecmdq; + + for (i = 0; i < smmu->nr_ecmdq; i++) { + unsigned long flags; + + ecmdq = *per_cpu_ptr(smmu->ecmdqs, i); + q = &ecmdq->cmdq.q; + + prod = readl_relaxed(q->prod_reg); + cons = readl_relaxed(q->cons_reg); + if (((prod ^ cons) & ECMDQ_CONS_ERR) == 0) + continue; + + __arm_smmu_cmdq_skip_err(smmu, q); + + write_lock_irqsave(&q->ecmdq_lock, flags); + q->ecmdq_prod &= ~ECMDQ_PROD_ERRACK; + q->ecmdq_prod |= cons & ECMDQ_CONS_ERR; + + prod = readl_relaxed(q->prod_reg); + prod &= ~ECMDQ_PROD_ERRACK; + prod |= cons & ECMDQ_CONS_ERR; + writel(prod, q->prod_reg); + write_unlock_irqrestore(&q->ecmdq_lock, flags); + } +} +#endif + /* * Command queue locking. * This is a form of bastardised rwlock with the following major changes: @@ -778,6 +846,89 @@ static void arm_smmu_cmdq_write_entries(struct arm_smmu_cmdq *cmdq, u64 *cmds, } } +#ifdef CONFIG_ARM_SMMU_V3_ECMDQ +/* + * The function is used when the current core exclusively occupies an ECMDQ. + * This is a reduced version of arm_smmu_cmdq_issue_cmdlist(), which eliminates + * a lot of unnecessary inter-core competition considerations. + */ +static int arm_smmu_ecmdq_issue_cmdlist(struct arm_smmu_device *smmu, + struct arm_smmu_cmdq *cmdq, + u64 *cmds, int n, bool sync) +{ + u32 prod; + unsigned long flags; + struct arm_smmu_ll_queue llq = { + .max_n_shift = cmdq->q.llq.max_n_shift, + }, head; + int ret = 0; + + /* 1. Allocate some space in the queue */ + local_irq_save(flags); + llq.val = READ_ONCE(cmdq->q.llq.val); + do { + u64 old; + + while (!queue_has_space(&llq, n + sync)) { + local_irq_restore(flags); + if (arm_smmu_cmdq_poll_until_not_full(smmu, &llq)) + dev_err_ratelimited(smmu->dev, "ECMDQ timeout\n"); + local_irq_save(flags); + } + + head.cons = llq.cons; + head.prod = queue_inc_prod_n(&llq, n + sync); + + old = cmpxchg_relaxed(&cmdq->q.llq.val, llq.val, head.val); + if (old == llq.val) + break; + + llq.val = old; + } while (1); + + /* 2. Write our commands into the queue */ + arm_smmu_cmdq_write_entries(cmdq, cmds, llq.prod, n); + if (sync) { + u64 cmd_sync[CMDQ_ENT_DWORDS]; + + prod = queue_inc_prod_n(&llq, n); + arm_smmu_cmdq_build_sync_cmd(cmd_sync, smmu, &cmdq->q, prod); + queue_write(Q_ENT(&cmdq->q, prod), cmd_sync, CMDQ_ENT_DWORDS); + } + + /* 3. Ensuring commands are visible first */ + dma_wmb(); + + /* 4. Advance the hardware prod pointer */ + read_lock(&cmdq->q.ecmdq_lock); + writel_relaxed(head.prod | cmdq->q.ecmdq_prod, cmdq->q.prod_reg); + read_unlock(&cmdq->q.ecmdq_lock); + + /* 5. If we are inserting a CMD_SYNC, we must wait for it to complete */ + if (sync) { + llq.prod = queue_inc_prod_n(&llq, n); + ret = arm_smmu_cmdq_poll_until_sync(smmu, &llq); + if (ret) { + dev_err_ratelimited(smmu->dev, + "CMD_SYNC timeout at 0x%08x [hwprod 0x%08x, hwcons 0x%08x]\n", + llq.prod, + readl_relaxed(cmdq->q.prod_reg), + readl_relaxed(cmdq->q.cons_reg)); + } + + /* + * Update cmdq->q.llq.cons, to improve the success rate of + * queue_has_space() when some new commands are inserted next + * time. + */ + WRITE_ONCE(cmdq->q.llq.cons, llq.cons); + } + + local_irq_restore(flags); + return ret; +} +#endif + /* * This is the actual insertion function, and provides the following * ordering guarantees to callers: @@ -805,6 +956,11 @@ static int arm_smmu_cmdq_issue_cmdlist(struct arm_smmu_device *smmu, struct arm_smmu_ll_queue llq, head; int ret = 0; +#ifdef CONFIG_ARM_SMMU_V3_ECMDQ + if (!cmdq->shared) + return arm_smmu_ecmdq_issue_cmdlist(smmu, cmdq, cmds, n, sync); +#endif + llq.max_n_shift = cmdq->q.llq.max_n_shift; /* 1. Allocate some space in the queue */ @@ -878,7 +1034,14 @@ static int arm_smmu_cmdq_issue_cmdlist(struct arm_smmu_device *smmu, * d. Advance the hardware prod pointer * Control dependency ordering from the entries becoming valid. */ - writel_relaxed(prod, cmdq->q.prod_reg); +#ifdef CONFIG_ARM_SMMU_V3_ECMDQ + if (smmu->ecmdq_enabled) { + read_lock(&cmdq->q.ecmdq_lock); + writel_relaxed(prod | cmdq->q.ecmdq_prod, cmdq->q.prod_reg); + read_unlock(&cmdq->q.ecmdq_lock); + } else +#endif + writel_relaxed(prod, cmdq->q.prod_reg); /* * e. Tell the next owner we're done @@ -1042,6 +1205,7 @@ static void arm_smmu_sync_cd(struct arm_smmu_domain *smmu_domain, cmds.num = 0; + arm_smmu_preempt_disable(smmu); spin_lock_irqsave(&smmu_domain->devices_lock, flags); list_for_each_entry(master, &smmu_domain->devices, domain_head) { for (i = 0; i < master->num_streams; i++) { @@ -1052,6 +1216,7 @@ static void arm_smmu_sync_cd(struct arm_smmu_domain *smmu_domain, spin_unlock_irqrestore(&smmu_domain->devices_lock, flags); arm_smmu_cmdq_batch_submit(smmu, &cmds); + arm_smmu_preempt_enable(smmu); } static int arm_smmu_alloc_cd_leaf_table(struct arm_smmu_device *smmu, @@ -1757,6 +1922,11 @@ static irqreturn_t arm_smmu_gerror_handler(int irq, void *dev) if (active & GERROR_CMDQ_ERR) arm_smmu_cmdq_skip_err(smmu); +#ifdef CONFIG_ARM_SMMU_V3_ECMDQ + if (active & GERROR_CMDQP_ERR) + arm_smmu_ecmdq_skip_err(smmu); +#endif + writel(gerror, smmu->base + ARM_SMMU_GERRORN); return IRQ_HANDLED; } @@ -1847,32 +2017,37 @@ arm_smmu_atc_inv_to_cmd(int ssid, unsigned long iova, size_t size, static int arm_smmu_atc_inv_master(struct arm_smmu_master *master) { - int i; + int i, ret; struct arm_smmu_cmdq_ent cmd; struct arm_smmu_cmdq_batch cmds; arm_smmu_atc_inv_to_cmd(IOMMU_NO_PASID, 0, 0, &cmd); cmds.num = 0; + arm_smmu_preempt_disable(master->smmu); for (i = 0; i < master->num_streams; i++) { cmd.atc.sid = master->streams[i].id; arm_smmu_cmdq_batch_add(master->smmu, &cmds, &cmd); } - return arm_smmu_cmdq_batch_submit(master->smmu, &cmds); + ret = arm_smmu_cmdq_batch_submit(master->smmu, &cmds); + arm_smmu_preempt_enable(master->smmu); + return ret; } int arm_smmu_atc_inv_domain(struct arm_smmu_domain *smmu_domain, int ssid, unsigned long iova, size_t size) { - int i; + int i, ret; unsigned long flags; struct arm_smmu_cmdq_ent cmd; struct arm_smmu_master *master; struct arm_smmu_cmdq_batch cmds; +#ifdef CONFIG_ARM_SMMU_V3_ECMDQ if (!(smmu_domain->smmu->features & ARM_SMMU_FEAT_ATS)) return 0; +#endif /* * Ensure that we've completed prior invalidation of the main TLBs @@ -1895,6 +2070,7 @@ int arm_smmu_atc_inv_domain(struct arm_smmu_domain *smmu_domain, int ssid, cmds.num = 0; + arm_smmu_preempt_disable(smmu_domain->smmu); spin_lock_irqsave(&smmu_domain->devices_lock, flags); list_for_each_entry(master, &smmu_domain->devices, domain_head) { if (!master->ats_enabled) @@ -1907,7 +2083,10 @@ int arm_smmu_atc_inv_domain(struct arm_smmu_domain *smmu_domain, int ssid, } spin_unlock_irqrestore(&smmu_domain->devices_lock, flags); - return arm_smmu_cmdq_batch_submit(smmu_domain->smmu, &cmds); + ret = arm_smmu_cmdq_batch_submit(smmu_domain->smmu, &cmds); + arm_smmu_preempt_enable(smmu_domain->smmu); + + return ret; } /* IO_PGTABLE API */ @@ -1972,6 +2151,7 @@ static void __arm_smmu_tlb_inv_range(struct arm_smmu_cmdq_ent *cmd, cmds.num = 0; + arm_smmu_preempt_disable(smmu); while (iova < end) { if (smmu->features & ARM_SMMU_FEAT_RANGE_INV) { /* @@ -2003,6 +2183,7 @@ static void __arm_smmu_tlb_inv_range(struct arm_smmu_cmdq_ent *cmd, iova += inv_range; } arm_smmu_cmdq_batch_submit(smmu, &cmds); + arm_smmu_preempt_enable(smmu); } static void arm_smmu_tlb_inv_range_domain(unsigned long iova, size_t size, @@ -3016,6 +3197,9 @@ static int arm_smmu_cmdq_init(struct arm_smmu_device *smmu) struct arm_smmu_cmdq *cmdq = &smmu->cmdq; unsigned int nents = 1 << cmdq->q.llq.max_n_shift; +#ifdef CONFIG_ARM_SMMU_V3_ECMDQ + cmdq->shared = 1; +#endif atomic_set(&cmdq->owner_prod, 0); atomic_set(&cmdq->lock, 0); @@ -3027,6 +3211,22 @@ static int arm_smmu_cmdq_init(struct arm_smmu_device *smmu) return 0; } +#ifdef CONFIG_ARM_SMMU_V3_ECMDQ +static int arm_smmu_ecmdq_init(struct arm_smmu_cmdq *cmdq) +{ + unsigned int nents = 1 << cmdq->q.llq.max_n_shift; + + atomic_set(&cmdq->owner_prod, 0); + atomic_set(&cmdq->lock, 0); + + cmdq->valid_map = (atomic_long_t *)bitmap_zalloc(nents, GFP_KERNEL); + if (!cmdq->valid_map) + return -ENOMEM; + + return 0; +} +#endif + static int arm_smmu_init_queues(struct arm_smmu_device *smmu) { int ret; @@ -3472,6 +3672,55 @@ static int arm_smmu_device_disable(struct arm_smmu_device *smmu) return ret; } +#ifdef CONFIG_ARM_SMMU_V3_ECMDQ +static int arm_smmu_ecmdq_reset(struct arm_smmu_device *smmu) +{ + int i, cpu, ret = 0; + u32 reg; + + if (!smmu->nr_ecmdq) + return 0; + + i = 0; + for_each_possible_cpu(cpu) { + struct arm_smmu_ecmdq *ecmdq; + struct arm_smmu_queue *q; + + ecmdq = *per_cpu_ptr(smmu->ecmdqs, cpu); + if (ecmdq != per_cpu_ptr(smmu->ecmdq, cpu)) + continue; + + q = &ecmdq->cmdq.q; + i++; + + if (WARN_ON(q->llq.prod != q->llq.cons)) { + q->llq.prod = 0; + q->llq.cons = 0; + } + writeq_relaxed(q->q_base, ecmdq->base + ARM_SMMU_ECMDQ_BASE); + writel_relaxed(q->llq.prod, ecmdq->base + ARM_SMMU_ECMDQ_PROD); + writel_relaxed(q->llq.cons, ecmdq->base + ARM_SMMU_ECMDQ_CONS); + + /* enable ecmdq */ + writel(ECMDQ_PROD_EN | q->llq.prod, q->prod_reg); + ret = readl_relaxed_poll_timeout(q->cons_reg, reg, reg & ECMDQ_CONS_ENACK, + 1, ARM_SMMU_POLL_TIMEOUT_US); + if (ret) { + dev_err(smmu->dev, "ecmdq[%d] enable failed\n", i); + smmu->ecmdq_enabled = 0; + break; + } + } + + return ret; +} +#else +static int arm_smmu_ecmdq_reset(struct arm_smmu_device *smmu) +{ + return 0; +} +#endif + static int arm_smmu_device_reset(struct arm_smmu_device *smmu, bool resume) { int ret; @@ -3518,6 +3767,8 @@ static int arm_smmu_device_reset(struct arm_smmu_device *smmu, bool resume) writel_relaxed(smmu->cmdq.q.llq.prod, smmu->base + ARM_SMMU_CMDQ_PROD); writel_relaxed(smmu->cmdq.q.llq.cons, smmu->base + ARM_SMMU_CMDQ_CONS); + arm_smmu_ecmdq_reset(smmu); + enables = CR0_CMDQEN; ret = arm_smmu_write_reg_sync(smmu, enables, ARM_SMMU_CR0, ARM_SMMU_CR0ACK); @@ -3607,6 +3858,136 @@ static int arm_smmu_device_reset(struct arm_smmu_device *smmu, bool resume) return 0; } +#ifdef CONFIG_ARM_SMMU_V3_ECMDQ +static int arm_smmu_ecmdq_layout(struct arm_smmu_device *smmu) +{ + int cpu, host_cpu; + struct arm_smmu_ecmdq *ecmdq; + + ecmdq = devm_alloc_percpu(smmu->dev, *ecmdq); + if (!ecmdq) + return -ENOMEM; + smmu->ecmdq = ecmdq; + + /* A core requires at most one ECMDQ */ + if (num_possible_cpus() < smmu->nr_ecmdq) + smmu->nr_ecmdq = num_possible_cpus(); + + for_each_possible_cpu(cpu) { + if (cpu < smmu->nr_ecmdq) { + *per_cpu_ptr(smmu->ecmdqs, cpu) = per_cpu_ptr(smmu->ecmdq, cpu); + } else { + host_cpu = cpu % smmu->nr_ecmdq; + ecmdq = per_cpu_ptr(smmu->ecmdq, host_cpu); + ecmdq->cmdq.shared = 1; + *per_cpu_ptr(smmu->ecmdqs, cpu) = ecmdq; + } + } + + return 0; +} + +static int arm_smmu_ecmdq_probe(struct arm_smmu_device *smmu) +{ + int ret, cpu; + u32 i, nump, numq, gap; + u32 reg, shift_increment; + u64 addr, smmu_dma_base; + void __iomem *cp_regs, *cp_base; + + /* IDR6 */ + reg = readl_relaxed(smmu->base + ARM_SMMU_IDR6); + nump = 1 << FIELD_GET(IDR6_LOG2NUMP, reg); + numq = 1 << FIELD_GET(IDR6_LOG2NUMQ, reg); + smmu->nr_ecmdq = nump * numq; + gap = ECMDQ_CP_RRESET_SIZE >> FIELD_GET(IDR6_LOG2NUMQ, reg); + if (!smmu->nr_ecmdq) + return -EOPNOTSUPP; + + smmu_dma_base = (vmalloc_to_pfn(smmu->base) << PAGE_SHIFT); + cp_regs = ioremap(smmu_dma_base + ARM_SMMU_ECMDQ_CP_BASE, PAGE_SIZE); + if (!cp_regs) + return -ENOMEM; + + for (i = 0; i < nump; i++) { + u64 val, pre_addr; + + val = readq_relaxed(cp_regs + 32 * i); + if (!(val & ECMDQ_CP_PRESET)) { + iounmap(cp_regs); + dev_err(smmu->dev, "ecmdq control page %u is memory mode\n", i); + return -EFAULT; + } + + if (i && ((val & ECMDQ_CP_ADDR) != (pre_addr + ECMDQ_CP_RRESET_SIZE))) { + iounmap(cp_regs); + dev_err(smmu->dev, "ecmdq_cp memory region is not contiguous\n"); + return -EFAULT; + } + + pre_addr = val & ECMDQ_CP_ADDR; + } + + addr = readl_relaxed(cp_regs) & ECMDQ_CP_ADDR; + iounmap(cp_regs); + + cp_base = devm_ioremap(smmu->dev, smmu_dma_base + addr, ECMDQ_CP_RRESET_SIZE * nump); + if (!cp_base) + return -ENOMEM; + + smmu->ecmdqs = devm_alloc_percpu(smmu->dev, struct arm_smmu_ecmdq *); + if (!smmu->ecmdqs) + return -ENOMEM; + + ret = arm_smmu_ecmdq_layout(smmu); + if (ret) + return ret; + + shift_increment = order_base_2(num_possible_cpus() / smmu->nr_ecmdq); + + addr = 0; + for_each_possible_cpu(cpu) { + struct arm_smmu_ecmdq *ecmdq; + struct arm_smmu_queue *q; + + ecmdq = *per_cpu_ptr(smmu->ecmdqs, cpu); + q = &ecmdq->cmdq.q; + + /* + * The boot option "maxcpus=" can limit the number of online + * CPUs. The CPUs that are not selected are not showed in + * cpumask_of_node(node), their 'ecmdq' may be NULL. + * + * (ecmdq != per_cpu_ptr(smmu->ecmdq, cpu)) indicates that the + * ECMDQ is shared by multiple cores and should be initialized + * only by the first owner. + */ + if (!ecmdq || (ecmdq != per_cpu_ptr(smmu->ecmdq, cpu))) + continue; + ecmdq->base = cp_base + addr; + + q->llq.max_n_shift = ECMDQ_MAX_SZ_SHIFT + shift_increment; + ret = arm_smmu_init_one_queue(smmu, q, ecmdq->base, ARM_SMMU_ECMDQ_PROD, + ARM_SMMU_ECMDQ_CONS, CMDQ_ENT_DWORDS, "ecmdq"); + if (ret) + return ret; + + q->ecmdq_prod = ECMDQ_PROD_EN; + rwlock_init(&q->ecmdq_lock); + + ret = arm_smmu_ecmdq_init(&ecmdq->cmdq); + if (ret) { + dev_err(smmu->dev, "ecmdq[%d] init failed\n", i); + return ret; + } + + addr += gap; + } + + return 0; +} +#endif + #define IIDR_IMPLEMENTER_ARM 0x43b #define IIDR_PRODUCTID_ARM_MMU_600 0x483 #define IIDR_PRODUCTID_ARM_MMU_700 0x487 @@ -3755,6 +4136,11 @@ static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu) return -ENXIO; } +#ifdef CONFIG_ARM_SMMU_V3_ECMDQ + if (reg & IDR1_ECMDQ) + smmu->features |= ARM_SMMU_FEAT_ECMDQ; +#endif + /* Queue sizes, capped to ensure natural alignment */ smmu->cmdq.q.llq.max_n_shift = min_t(u32, CMDQ_MAX_SZ_SHIFT, FIELD_GET(IDR1_CMDQS, reg)); @@ -3862,6 +4248,18 @@ static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu) dev_info(smmu->dev, "ias %lu-bit, oas %lu-bit (features 0x%08x)\n", smmu->ias, smmu->oas, smmu->features); + +#ifdef CONFIG_ARM_SMMU_V3_ECMDQ + if (smmu->features & ARM_SMMU_FEAT_ECMDQ && !disable_ecmdq) { + int err; + + err = arm_smmu_ecmdq_probe(smmu); + if (err) { + dev_err(smmu->dev, "suppress ecmdq feature, errno=%d\n", err); + smmu->ecmdq_enabled = 0; + } + } +#endif return 0; } @@ -3946,8 +4344,70 @@ static void __iomem *arm_smmu_ioremap(struct device *dev, resource_size_t start, } #ifdef CONFIG_ARM_SMMU_V3_PM +#ifdef CONFIG_ARM_SMMU_V3_ECMDQ +static int arm_smmu_ecmdq_disable(struct device *dev) +{ + int i, j; + int ret, nr_fail = 0, n = 100; + u32 reg, prod, cons; + struct arm_smmu_ecmdq *ecmdq; + struct arm_smmu_queue *q; + struct arm_smmu_device *smmu = dev_get_drvdata(dev); + + for (i = 0; i < smmu->nr_ecmdq; i++) { + ecmdq = *per_cpu_ptr(smmu->ecmdqs, i); + q = &ecmdq->cmdq.q; + + prod = readl_relaxed(q->prod_reg); + cons = readl_relaxed(q->cons_reg); + if ((prod & ECMDQ_PROD_EN) == 0) + continue; + + for (j = 0; j < n; j++) { + if (Q_IDX(&q->llq, prod) == Q_IDX(&q->llq, cons) && + Q_WRP(&q->llq, prod) == Q_WRP(&q->llq, cons)) + break; + + /* Wait a moment, so ECMDQ has a chance to finish */ + udelay(1); + cons = readl_relaxed(q->cons_reg); + } + WARN_ON(prod != readl_relaxed(q->prod_reg)); + if (j >= n) + dev_warn(smmu->dev, + "Forcibly disabling ecmdq[%d]: prod=%08x, cons=%08x\n", + i, prod, cons); + + /* disable ecmdq */ + prod &= ~ECMDQ_PROD_EN; + writel(prod, q->prod_reg); + ret = readl_relaxed_poll_timeout(q->cons_reg, reg, !(reg & ECMDQ_CONS_ENACK), + 1, ARM_SMMU_POLL_TIMEOUT_US); + if (ret) { + nr_fail++; + dev_err(smmu->dev, "ecmdq[%d] disable failed\n", i); + } + } + + if (nr_fail) { + smmu->ecmdq_enabled = 0; + pr_warn("Suppress ecmdq feature, switch to normal cmdq\n"); + return -EIO; + } + + return 0; +} +#else +static int arm_smmu_ecmdq_disable(struct device *dev) +{ + return 0; +} +#endif + static int arm_smmu_suspend(struct device *dev) { + arm_smmu_ecmdq_disable(dev); + /* * The smmu is powered off and related registers are automatically * cleared when suspend. No need to do anything. diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h index 269b6fa705d48f2846f8a62b17239cddd55dad7e..cdcc0fc9c2f7cb4bafa59bc9d3447c7b4ad7415a 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h @@ -41,6 +41,7 @@ #define IDR0_S2P (1 << 0) #define ARM_SMMU_IDR1 0x4 +#define IDR1_ECMDQ (1 << 31) #define IDR1_TABLES_PRESET (1 << 30) #define IDR1_QUEUES_PRESET (1 << 29) #define IDR1_REL (1 << 28) @@ -113,6 +114,7 @@ #define ARM_SMMU_IRQ_CTRLACK 0x54 #define ARM_SMMU_GERROR 0x60 +#define GERROR_CMDQP_ERR (1 << 9) #define GERROR_SFM_ERR (1 << 8) #define GERROR_MSI_GERROR_ABT_ERR (1 << 7) #define GERROR_MSI_PRIQ_ABT_ERR (1 << 6) @@ -129,6 +131,26 @@ #define ARM_SMMU_GERROR_IRQ_CFG1 0x70 #define ARM_SMMU_GERROR_IRQ_CFG2 0x74 +#define ARM_SMMU_IDR6 0x190 +#define IDR6_LOG2NUMP GENMASK(27, 24) +#define IDR6_LOG2NUMQ GENMASK(19, 16) +#define IDR6_BA_DOORBELLS GENMASK(9, 0) + +#define ARM_SMMU_ECMDQ_BASE 0x00 +#define ARM_SMMU_ECMDQ_PROD 0x08 +#define ARM_SMMU_ECMDQ_CONS 0x0c +#define ECMDQ_MAX_SZ_SHIFT 8 +#define ECMDQ_PROD_EN (1 << 31) +#define ECMDQ_CONS_ENACK (1 << 31) +#define ECMDQ_CONS_ERR (1 << 23) +#define ECMDQ_PROD_ERRACK (1 << 23) + +#define ARM_SMMU_ECMDQ_CP_BASE 0x4000 +#define ECMDQ_CP_ADDR GENMASK_ULL(51, 12) +#define ECMDQ_CP_CMDQGS GENMASK_ULL(2, 1) +#define ECMDQ_CP_PRESET (1UL << 0) +#define ECMDQ_CP_RRESET_SIZE 0x10000 + #define ARM_SMMU_STRTAB_BASE 0x80 #define STRTAB_BASE_RA (1UL << 62) #define STRTAB_BASE_ADDR_MASK GENMASK_ULL(51, 6) @@ -527,6 +549,10 @@ struct arm_smmu_ll_queue { struct arm_smmu_queue { struct arm_smmu_ll_queue llq; int irq; /* Wired interrupt */ +#ifdef CONFIG_ARM_SMMU_V3_ECMDQ + u32 ecmdq_prod; + rwlock_t ecmdq_lock; +#endif __le64 *base; dma_addr_t base_dma; @@ -550,8 +576,18 @@ struct arm_smmu_cmdq { atomic_long_t *valid_map; atomic_t owner_prod; atomic_t lock; +#ifdef CONFIG_ARM_SMMU_V3_ECMDQ + int shared; +#endif }; +#ifdef CONFIG_ARM_SMMU_V3_ECMDQ +struct arm_smmu_ecmdq { + struct arm_smmu_cmdq cmdq; + void __iomem *base; +}; +#endif + struct arm_smmu_cmdq_batch { u64 cmds[CMDQ_BATCH_ENTRIES * CMDQ_ENT_DWORDS]; int num; @@ -646,6 +682,7 @@ struct arm_smmu_device { #define ARM_SMMU_FEAT_SVA (1 << 17) #define ARM_SMMU_FEAT_E2H (1 << 18) #define ARM_SMMU_FEAT_NESTING (1 << 19) +#define ARM_SMMU_FEAT_ECMDQ (1 << 20) u32 features; #define ARM_SMMU_OPT_SKIP_PREFETCH (1 << 0) @@ -654,6 +691,16 @@ struct arm_smmu_device { #define ARM_SMMU_OPT_CMDQ_FORCE_SYNC (1 << 3) u32 options; +#ifdef CONFIG_ARM_SMMU_V3_ECMDQ + union { + u32 nr_ecmdq; + u32 ecmdq_enabled; + }; + + struct arm_smmu_ecmdq *__percpu *ecmdqs; + struct arm_smmu_ecmdq __percpu *ecmdq; +#endif + struct arm_smmu_cmdq cmdq; struct arm_smmu_evtq evtq; struct arm_smmu_priq priq;