diff --git a/arch/x86/include/asm/dma-mapping.h b/arch/x86/include/asm/dma-mapping.h index d1dac96ee30b6c85dba641c764a4cabd5330e023..e398ab26715f7d2c0a2a0029cc52d10d1bd94c71 100644 --- a/arch/x86/include/asm/dma-mapping.h +++ b/arch/x86/include/asm/dma-mapping.h @@ -9,4 +9,12 @@ static inline const struct dma_map_ops *get_arch_dma_ops(void) return dma_ops; } +#ifdef CONFIG_PCI + +extern bool is_zhaoxin_kh40000; +extern const struct dma_map_ops kh40000_dma_direct_ops; +extern void kh40000_set_iommu_dma_ops(struct device *dev); + +#endif + #endif diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 32f2430be099c03a246fc35cb360f32b0c7fe313..448309c92f6be1abcecb6a713ee2ba293084330e 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -160,4 +160,5 @@ ifeq ($(CONFIG_X86_64),y) obj-$(CONFIG_MMCONF_FAM10H) += mmconf-fam10h_64.o obj-y += vsmp_64.o + obj-$(CONFIG_PCI) += zhaoxin_kh40000.o endif diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c index 11bd85bcbaf85b2e818d37acfc71f358cb709950..82cf25b58e68acb90ab73db7e130f195a72f6ebe 100644 --- a/arch/x86/kernel/early-quirks.c +++ b/arch/x86/kernel/early-quirks.c @@ -28,6 +28,7 @@ #include #include #include +#include static void __init early_pci_clear_msi(int bus, int slot, int func) { @@ -716,6 +717,18 @@ static void __init apple_airport_reset(int bus, int slot, int func) early_iounmap(mmio, BCM4331_MMIO_SIZE); } +static void quirk_zhaoxin_dma_patch(int num, int slot, int func) +{ + u8 revision; + + revision = read_pci_config_byte(num, slot, func, PCI_REVISION_ID); + if (revision == 0x10) { + is_zhaoxin_kh40000 = true; + dma_ops = &kh40000_dma_direct_ops; + pr_info("zhaoxin direct dma patch enabled\n"); + } +} + #define QFLAG_APPLY_ONCE 0x1 #define QFLAG_APPLIED 0x2 #define QFLAG_DONE (QFLAG_APPLY_ONCE|QFLAG_APPLIED) @@ -759,6 +772,10 @@ static struct chipset early_qrk[] __initdata = { PCI_CLASS_BRIDGE_HOST, PCI_ANY_ID, 0, force_disable_hpet}, { PCI_VENDOR_ID_BROADCOM, 0x4331, PCI_CLASS_NETWORK_OTHER, PCI_ANY_ID, 0, apple_airport_reset}, + { PCI_VENDOR_ID_ZHAOXIN, 0x1001, PCI_CLASS_BRIDGE_HOST, + PCI_BASE_CLASS_BRIDGE, QFLAG_APPLY_ONCE, quirk_zhaoxin_dma_patch }, + { PCI_VENDOR_ID_ZHAOXIN, 0x345B, PCI_CLASS_BRIDGE_HOST, + PCI_BASE_CLASS_BRIDGE, QFLAG_APPLY_ONCE, quirk_zhaoxin_dma_patch }, { PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0, early_pci_clear_msi}, {} }; diff --git a/arch/x86/kernel/zhaoxin_kh40000.c b/arch/x86/kernel/zhaoxin_kh40000.c new file mode 100644 index 0000000000000000000000000000000000000000..588ebfc09f588ffe4b8ea64c569fe4ed3837988f --- /dev/null +++ b/arch/x86/kernel/zhaoxin_kh40000.c @@ -0,0 +1,353 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "../../../kernel/dma/direct.h" + +bool is_zhaoxin_kh40000; + +/*** + * usage: + * set "zhaoxin_patch_bitmask=" in cmdline + * value description: + * bit 0: enable(1) node check or not(0). default 1 + */ +enum { + ZHAOXIN_P2CW_NODE_CHECK = BIT(0), + ZHAOXIN_PATCH_CODE_MAX = ZHAOXIN_P2CW_NODE_CHECK, +}; + +#define ZHAOXIN_PATCH_CODE_DEFAULT ZHAOXIN_P2CW_NODE_CHECK + +unsigned long zhaoxin_patch_code = ZHAOXIN_PATCH_CODE_DEFAULT; + +static int __init zhaoxin_patch_code_setup(char *str) +{ + int err = kstrtoul(str, 0, &zhaoxin_patch_code); + + if (err || (zhaoxin_patch_code > ZHAOXIN_PATCH_CODE_MAX)) { + pr_err("cmdline 'zhaoxin_patch_bitmask=%s' inappropriate\n", str); + zhaoxin_patch_code = ZHAOXIN_PATCH_CODE_DEFAULT; + return err; + } + + if (ZHAOXIN_P2CW_NODE_CHECK | zhaoxin_patch_code) + pr_info("zhaoxin dma patch node check is enabled\n"); + + return 0; +} +__setup("zhaoxin_patch_bitmask=", zhaoxin_patch_code_setup); + +static struct pci_dev *kh40000_get_pci_dev(struct device *dev) +{ + if (dev_is_pci(dev)) + return to_pci_dev(dev); + + if (dev->parent) + return kh40000_get_pci_dev(dev->parent); + + return NULL; +} + +static void kh40000_sync_single_dma_for_cpu(struct device *dev, dma_addr_t paddr, + enum dma_data_direction dir, bool is_iommu) +{ + u8 vid; + struct pci_dev *pci; + u64 dma_mask = *dev->dma_mask; + + /* check direction */ + if ((dir != DMA_FROM_DEVICE) && (dir != DMA_BIDIRECTIONAL)) + return; + + /* check dma capability */ + if (dma_mask <= DMA_BIT_MASK(32)) + return; + + /* check device type */ + pci = kh40000_get_pci_dev(dev); + if (pci == NULL) + return; + + /* get real physical address */ + if (is_iommu) { + struct iommu_domain *domain = iommu_get_dma_domain(dev); + + paddr = iommu_iova_to_phys(domain, paddr); + if (!paddr) + return; + } + + /* check node or not */ + if ((zhaoxin_patch_code & ZHAOXIN_P2CW_NODE_CHECK) + && pfn_to_nid(PFN_DOWN(paddr)) == dev_to_node(dev)) + return; + + /* flush data by one pci read cycle */ + pci_read_config_byte(pci, PCI_VENDOR_ID, &vid); +} + +/* zhaoxin kh-40000 direct dma ops */ +static void *kh40000_dma_direct_alloc(struct device *dev, size_t size, + dma_addr_t *addr, gfp_t gfp, unsigned long attrs) +{ + if (dev->coherent_dma_mask > DMA_BIT_MASK(32)) + gfp |= __GFP_THISNODE; + + return dma_direct_alloc(dev, size, addr, gfp, attrs); +} + +static void kh40000_dma_direct_unmap_page(struct device *dev, dma_addr_t addr, + size_t size, enum dma_data_direction dir, unsigned long attrs) +{ + kh40000_sync_single_dma_for_cpu(dev, addr, dir, 0); + dma_direct_unmap_page(dev, addr, size, dir, attrs); +} + +static void kh40000_dma_direct_sync_sg_for_cpu(struct device *dev, + struct scatterlist *sgl, int nents, enum dma_data_direction dir) +{ + struct scatterlist *sg; + int i; + + for_each_sg(sgl, sg, nents, i) + kh40000_sync_single_dma_for_cpu(dev, sg_dma_address(sg), dir, 0); + + dma_direct_sync_sg_for_cpu(dev, sgl, nents, dir); +} + +static void kh40000_dma_direct_sync_single_for_cpu(struct device *dev, + dma_addr_t addr, size_t size, enum dma_data_direction dir) +{ + kh40000_sync_single_dma_for_cpu(dev, addr, dir, 0); + dma_direct_sync_single_for_cpu(dev, addr, size, dir); +} + +static void kh40000_dma_direct_unmap_sg(struct device *dev, struct scatterlist *sgl, + int nents, enum dma_data_direction dir, unsigned long attrs) +{ + struct scatterlist *sg; + int i; + + for_each_sg(sgl, sg, nents, i) + kh40000_sync_single_dma_for_cpu(dev, sg_dma_address(sg), dir, 0); + + dma_direct_unmap_sg(dev, sgl, nents, dir, attrs); +} + +static void kh40000_dma_direct_unmap_resource(struct device *dev, dma_addr_t addr, + size_t size, enum dma_data_direction dir, unsigned long attrs) +{ + kh40000_sync_single_dma_for_cpu(dev, addr, dir, 0); +} + +const struct dma_map_ops kh40000_dma_direct_ops = { + .flags = DMA_F_PCI_P2PDMA_SUPPORTED, + .alloc = kh40000_dma_direct_alloc, + .sync_sg_for_cpu = kh40000_dma_direct_sync_sg_for_cpu, + .unmap_page = kh40000_dma_direct_unmap_page, + .sync_single_for_cpu = kh40000_dma_direct_sync_single_for_cpu, + .unmap_sg = kh40000_dma_direct_unmap_sg, + .unmap_resource = kh40000_dma_direct_unmap_resource, + .dma_supported = dma_direct_supported, + .free = dma_direct_free, + .alloc_pages = dma_direct_alloc_pages, + .free_pages = dma_direct_free_pages, + .sync_single_for_device = dma_direct_sync_single_for_device, + .sync_sg_for_device = dma_direct_sync_sg_for_device, + .get_required_mask = dma_direct_get_required_mask, + .max_mapping_size = dma_direct_max_mapping_size, + .mmap = dma_direct_mmap, + .get_sgtable = dma_direct_get_sgtable, + .map_page = dma_direct_map_page, + .map_sg = dma_direct_map_sg, + .map_resource = dma_direct_map_resource, +}; + +/* zhaoxin kh-40000 iommu dma ops */ +static const struct dma_map_ops *iommu_dma_ops; + +static void *kh40000_iommu_dma_alloc(struct device *dev, size_t size, + dma_addr_t *addr, gfp_t gfp, unsigned long attrs) +{ + gfp |= __GFP_THISNODE; + + return iommu_dma_ops->alloc(dev, size, addr, gfp, attrs); +} + +static void kh40000_iommu_dma_free(struct device *dev, size_t size, void *cpu_addr, + dma_addr_t handle, unsigned long attrs) +{ + iommu_dma_ops->free(dev, size, cpu_addr, handle, attrs); +} + +static struct page *kh40000_dma_common_alloc_pages(struct device *dev, size_t size, + dma_addr_t *dma_handle, enum dma_data_direction dir, gfp_t gfp) +{ + return iommu_dma_ops->alloc_pages(dev, size, dma_handle, dir, gfp); +} + +static void kh40000_dma_common_free_pages(struct device *dev, size_t size, struct page *page, + dma_addr_t dma_handle, enum dma_data_direction dir) +{ + iommu_dma_ops->free_pages(dev, size, page, dma_handle, dir); +} + +static struct sg_table *kh40000_iommu_dma_alloc_noncontiguous(struct device *dev, + size_t size, enum dma_data_direction dir, gfp_t gfp, + unsigned long attrs) +{ + return iommu_dma_ops->alloc_noncontiguous(dev, size, dir, gfp, attrs); +} + +static void kh40000_iommu_dma_free_noncontiguous(struct device *dev, size_t size, + struct sg_table *sgt, enum dma_data_direction dir) +{ + return iommu_dma_ops->free_noncontiguous(dev, size, sgt, dir); +} + +static int kh40000_iommu_dma_mmap(struct device *dev, struct vm_area_struct *vma, + void *cpu_addr, dma_addr_t dma_addr, size_t size, + unsigned long attrs) +{ + return iommu_dma_ops->mmap(dev, vma, cpu_addr, dma_addr, size, attrs); +} + +static void kh40000_iommu_dma_unmap_page(struct device *dev, dma_addr_t addr, + size_t size, enum dma_data_direction dir, unsigned long attrs) +{ + kh40000_sync_single_dma_for_cpu(dev, addr, dir, 1); + iommu_dma_ops->unmap_page(dev, addr, size, dir, attrs); +} + +static int kh40000_iommu_dma_get_sgtable(struct device *dev, struct sg_table *sgt, + void *cpu_addr, dma_addr_t dma_addr, size_t size, + unsigned long attrs) +{ + return iommu_dma_ops->get_sgtable(dev, sgt, cpu_addr, dma_addr, size, attrs); +} + +static dma_addr_t kh40000_iommu_dma_map_page(struct device *dev, struct page *page, + unsigned long offset, size_t size, enum dma_data_direction dir, + unsigned long attrs) +{ + return iommu_dma_ops->map_page(dev, page, offset, size, dir, attrs); +} + +static int kh40000_iommu_dma_map_sg(struct device *dev, struct scatterlist *sgl, + int nents, enum dma_data_direction dir, unsigned long attrs) +{ + return iommu_dma_ops->map_sg(dev, sgl, nents, dir, attrs); +} + +static void kh40000_iommu_dma_unmap_sg(struct device *dev, struct scatterlist *sgl, + int nelems, enum dma_data_direction dir, unsigned long attrs) +{ + struct scatterlist *sg; + int i; + + for_each_sg(sgl, sg, nelems, i) + kh40000_sync_single_dma_for_cpu(dev, sg_dma_address(sg), dir, 1); + iommu_dma_ops->unmap_sg(dev, sgl, nelems, dir, attrs); +} + +static void kh40000_iommu_dma_sync_single_for_cpu(struct device *dev, + dma_addr_t addr, size_t size, enum dma_data_direction dir) +{ + kh40000_sync_single_dma_for_cpu(dev, addr, dir, 1); + iommu_dma_ops->sync_single_for_cpu(dev, addr, size, dir); +} + +static void kh40000_iommu_dma_sync_single_for_device(struct device *dev, + dma_addr_t addr, size_t size, enum dma_data_direction dir) +{ + iommu_dma_ops->sync_single_for_device(dev, addr, size, dir); +} + +static void kh40000_iommu_dma_sync_sg_for_cpu(struct device *dev, + struct scatterlist *sgl, int nelems, + enum dma_data_direction dir) +{ + struct scatterlist *sg; + int i; + + for_each_sg(sgl, sg, nelems, i) + kh40000_sync_single_dma_for_cpu(dev, sg_dma_address(sg), dir, 1); + iommu_dma_ops->sync_sg_for_cpu(dev, sgl, nelems, dir); +} + +static void kh40000_iommu_dma_sync_sg_for_device(struct device *dev, + struct scatterlist *sgl, int nelems, + enum dma_data_direction dir) +{ + iommu_dma_ops->sync_sg_for_device(dev, sgl, nelems, dir); +} + +static dma_addr_t kh40000_iommu_dma_map_resource(struct device *dev, phys_addr_t phys, + size_t size, enum dma_data_direction dir, unsigned long attrs) +{ + return iommu_dma_ops->map_resource(dev, phys, size, dir, attrs); +} + +static void kh40000_iommu_dma_unmap_resource(struct device *dev, dma_addr_t addr, + size_t size, enum dma_data_direction dir, unsigned long attrs) +{ + kh40000_sync_single_dma_for_cpu(dev, addr, dir, 1); + iommu_dma_ops->unmap_resource(dev, addr, size, dir, attrs); +} + +static unsigned long kh40000_iommu_dma_get_merge_boundary(struct device *dev) +{ + return iommu_dma_ops->get_merge_boundary(dev); +} + +static size_t kh40000_iommu_dma_opt_mapping_size(void) +{ + return iommu_dma_ops->opt_mapping_size(); +} + +const struct dma_map_ops kh40000_dma_iommu_ops = { + .flags = DMA_F_PCI_P2PDMA_SUPPORTED, + .alloc = kh40000_iommu_dma_alloc, + .free = kh40000_iommu_dma_free, + .unmap_page = kh40000_iommu_dma_unmap_page, + .alloc_pages = kh40000_dma_common_alloc_pages, + .free_pages = kh40000_dma_common_free_pages, + .alloc_noncontiguous = kh40000_iommu_dma_alloc_noncontiguous, + .free_noncontiguous = kh40000_iommu_dma_free_noncontiguous, + .mmap = kh40000_iommu_dma_mmap, + .get_sgtable = kh40000_iommu_dma_get_sgtable, + .map_page = kh40000_iommu_dma_map_page, + .map_sg = kh40000_iommu_dma_map_sg, + .unmap_sg = kh40000_iommu_dma_unmap_sg, + .sync_single_for_cpu = kh40000_iommu_dma_sync_single_for_cpu, + .sync_single_for_device = kh40000_iommu_dma_sync_single_for_device, + .sync_sg_for_cpu = kh40000_iommu_dma_sync_sg_for_cpu, + .sync_sg_for_device = kh40000_iommu_dma_sync_sg_for_device, + .map_resource = kh40000_iommu_dma_map_resource, + .unmap_resource = kh40000_iommu_dma_unmap_resource, + .get_merge_boundary = kh40000_iommu_dma_get_merge_boundary, + .opt_mapping_size = kh40000_iommu_dma_opt_mapping_size, +}; + +void kh40000_set_iommu_dma_ops(struct device *dev) +{ + if (dev->dma_ops) { + iommu_dma_ops = dev->dma_ops; + set_dma_ops(dev, &kh40000_dma_iommu_ops); + pr_info_once("zhaoxin iommu dma patch enabled\n"); + } +} diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 61bb35046ea46fe99f6f1b06df8f89ddb47b9b3d..84620395e0fff6774517b41b355594a2509a32b8 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -4359,6 +4359,10 @@ static void intel_iommu_probe_finalize(struct device *dev) { set_dma_ops(dev, NULL); iommu_setup_dma_ops(dev, 0, U64_MAX); +#ifdef CONFIG_X86_64 + if (is_zhaoxin_kh40000) + kh40000_set_iommu_dma_ops(dev); +#endif } static void intel_iommu_get_resv_regions(struct device *device, diff --git a/kernel/dma/contiguous.c b/kernel/dma/contiguous.c index f005c66f378c32e9bae2d84b0ca80d1c126f0c15..43dd5c7b5aaf895877a5f81f88f70e0c48144c88 100644 --- a/kernel/dma/contiguous.c +++ b/kernel/dma/contiguous.c @@ -224,6 +224,10 @@ void __init dma_contiguous_reserve(phys_addr_t limit) dma_numa_cma_reserve(); +#if defined(CONFIG_X86_64) && defined(CONFIG_PCI) + if (is_zhaoxin_kh40000) + return; +#endif pr_debug("%s(limit %08lx)\n", __func__, (unsigned long)limit); if (size_cmdline != -1) {