diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 376931ab728a3182e46a45def90e9d80523bba29..6ccc012f62c2b6a680a7e3b3c61934f5693c7280 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -2081,7 +2081,7 @@ config ASCEND_CHARGE_MIGRATE_HUGEPAGES config ASCEND_SHARE_POOL bool "Enable support for the Share Pool Memory" default n - depends on HAVE_ARCH_HUGE_VMALLOC + depends on HAVE_ARCH_HUGE_VMALLOC && EXTEND_HUGEPAGE_MAPPING select ARCH_USES_HIGH_VMA_FLAGS help This feature allows multiple processes to share virtual memory both diff --git a/arch/arm64/configs/openeuler_defconfig b/arch/arm64/configs/openeuler_defconfig index 453b687e39fc3b8518e2bfaa823c41710b2b0d31..b0c2001b6f184adcbfb090a55b0ddfdd85861411 100644 --- a/arch/arm64/configs/openeuler_defconfig +++ b/arch/arm64/configs/openeuler_defconfig @@ -721,6 +721,8 @@ CONFIG_ACPI_HMAT=y CONFIG_HAVE_ACPI_APEI=y CONFIG_ACPI_APEI=y CONFIG_ACPI_APEI_GHES=y +CONFIG_ACPI_APEI_GHES_TS_CORE=y +CONFIG_ACPI_APEI_GHES_NOTIFY_ALL_RAS_ERR=y CONFIG_ACPI_APEI_PCIEAER=y CONFIG_ACPI_APEI_SEA=y CONFIG_ACPI_APEI_MEMORY_FAILURE=y @@ -1129,6 +1131,7 @@ CONFIG_PIN_MEMORY=y CONFIG_PID_RESERVE=y CONFIG_MEMORY_RELIABLE=y # CONFIG_CLEAR_FREELIST_PAGE is not set +CONFIG_EXTEND_HUGEPAGE_MAPPING=y # # Data Access Monitoring @@ -5927,6 +5930,7 @@ CONFIG_ARM_SMMU=y CONFIG_ARM_SMMU_DISABLE_BYPASS_BY_DEFAULT=y CONFIG_ARM_SMMU_V3=y CONFIG_ARM_SMMU_V3_SVA=y +CONFIG_ARM_SMMU_V3_PM=y # CONFIG_AGENT_SMMU_ATOS is not set # CONFIG_QCOM_IOMMU is not set # CONFIG_VIRTIO_IOMMU is not set @@ -6110,6 +6114,8 @@ CONFIG_THUNDERX2_PMU=m CONFIG_XGENE_PMU=y CONFIG_ARM_SPE_PMU=y CONFIG_HISI_PMU=m +CONFIG_HISI_L3T_PMU=m +CONFIG_HISI_LPDDRC_PMU=m CONFIG_HISI_PCIE_PMU=m CONFIG_HNS3_PMU=m # end of Performance monitor support diff --git a/drivers/acpi/apei/Kconfig b/drivers/acpi/apei/Kconfig index 6b18f8bc7be353403873df2c11ca0b72ef256eb6..bf65cce12f618ee029bbee8f6dcc57d17a36009a 100644 --- a/drivers/acpi/apei/Kconfig +++ b/drivers/acpi/apei/Kconfig @@ -33,6 +33,20 @@ config ACPI_APEI_GHES by firmware to produce more valuable hardware error information for Linux. +config ACPI_APEI_GHES_TS_CORE + bool "Support ts core ras process for ascend" + depends on ARM64 && ACPI_APEI_GHES + default n + help + Enable the support for ts core ras process for ascend. + +config ACPI_APEI_GHES_NOTIFY_ALL_RAS_ERR + bool "Notify all ras err to driver" + depends on ARM64 && ACPI_APEI_GHES + default n + help + Deliver all types of error to driver. + config ACPI_APEI_PCIEAER bool "APEI PCIe AER logging/recovering support" depends on ACPI_APEI && PCIEAER diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c index 9c38c2cdd2fd218356112ccf26fb5b1cfec31c9f..ec2c887bccb7ea3e0d71187bf8c2aaf86cc80d9c 100644 --- a/drivers/acpi/apei/ghes.c +++ b/drivers/acpi/apei/ghes.c @@ -118,8 +118,10 @@ module_param_named(disable, ghes_disable, bool, 0); static LIST_HEAD(ghes_hed); static DEFINE_MUTEX(ghes_list_mutex); +#ifdef CONFIG_ACPI_APEI_GHES_TS_CORE BLOCKING_NOTIFIER_HEAD(ghes_ts_err_chain); EXPORT_SYMBOL(ghes_ts_err_chain); +#endif /* * Because the memory area used to transfer hardware error information @@ -657,20 +659,26 @@ static bool ghes_do_proc(struct ghes *ghes, } else if (guid_equal(sec_type, &CPER_SEC_PROC_ARM)) { queued = ghes_handle_arm_hw_error(gdata, sev); +#ifdef CONFIG_ACPI_APEI_GHES_TS_CORE } else if (guid_equal(sec_type, &CPER_SEC_TS_CORE)) { blocking_notifier_call_chain(&ghes_ts_err_chain, 0, acpi_hest_get_payload(gdata)); +#endif } else { void *err = acpi_hest_get_payload(gdata); - +#ifndef CONFIG_ACPI_APEI_GHES_NOTIFY_ALL_RAS_ERR + ghes_defer_non_standard_event(gdata, sev); +#endif log_non_standard_event(sec_type, fru_id, fru_text, sec_sev, err, gdata->error_data_length); } +#ifdef CONFIG_ACPI_APEI_GHES_NOTIFY_ALL_RAS_ERR /* Customization deliver all types error to driver. */ ghes_defer_non_standard_event(gdata, sev); +#endif } return queued; diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig index d632d456ed3c9a6fbbecca0be645eaa36fdc24fc..f04a2bde00184eeefedf0c53dce9c0f8bcb5b1ee 100644 --- a/drivers/iommu/Kconfig +++ b/drivers/iommu/Kconfig @@ -336,6 +336,13 @@ config AGENT_SMMU_ATOS Say Y here if your system will be used in Ascend Advanced Accelerator with HCCS bus. Or want use the ATOS of SMMU. +config ARM_SMMU_V3_PM + bool "Add arm_smmu_v3 suspend and resume support" + depends on ARM_SMMU_V3 && PM_SLEEP + default n + help + Add support for suspend and resume support for arm smmu v3. + config S390_IOMMU def_bool y if S390 && PCI depends on S390 && PCI diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index 3f6d25bf0587e7b57bc3f58a01264300c90b064b..be8cc53659f8b0c505b2f42613cc6390a954d6eb 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -4349,7 +4349,7 @@ static void arm_smmu_write_msi_msg(struct msi_desc *desc, struct msi_msg *msg) doorbell = (((u64)msg->address_hi) << 32) | msg->address_lo; doorbell &= MSI_CFG0_ADDR_MASK; -#ifdef CONFIG_PM_SLEEP +#ifdef CONFIG_ARM_SMMU_V3_PM /* Saves the msg (base addr of msi irq) and restores it during resume */ desc->msg.address_lo = msg->address_lo; desc->msg.address_hi = msg->address_hi; @@ -4411,7 +4411,7 @@ static void arm_smmu_setup_msis(struct arm_smmu_device *smmu) devm_add_action(dev, arm_smmu_free_msis, dev); } -#ifdef CONFIG_PM_SLEEP +#ifdef CONFIG_ARM_SMMU_V3_PM static void arm_smmu_resume_msis(struct arm_smmu_device *smmu) { struct msi_desc *desc; @@ -5313,8 +5313,7 @@ static void __iomem *arm_smmu_ioremap(struct device *dev, resource_size_t start, return devm_ioremap_resource(dev, &res); } -#ifdef CONFIG_PM_SLEEP - +#ifdef CONFIG_ARM_SMMU_V3_PM static int arm_smmu_ecmdq_disable(struct device *dev) { int i, j; @@ -5521,7 +5520,7 @@ static const struct of_device_id arm_smmu_of_match[] = { }; MODULE_DEVICE_TABLE(of, arm_smmu_of_match); -#ifdef CONFIG_PM_SLEEP +#ifdef CONFIG_ARM_SMMU_V3_PM static const struct dev_pm_ops arm_smmu_pm_ops = { .suspend = arm_smmu_suspend, .resume = arm_smmu_resume, diff --git a/drivers/perf/hisilicon/Kconfig b/drivers/perf/hisilicon/Kconfig index 171bfc1b6bc27dc195c92e2ccc31665effce74c6..e19feba8b5d36928d4b1a6b967fecdb1c5856026 100644 --- a/drivers/perf/hisilicon/Kconfig +++ b/drivers/perf/hisilicon/Kconfig @@ -24,3 +24,22 @@ config HNS3_PMU devices. Adds the HNS3 PMU into perf events system for monitoring latency, bandwidth etc. + +config HISI_L3T_PMU + tristate "HiSilicon SoC L3T PMU drivers" + depends on HISI_PMU + default n + help + Support for HiSilicon SoC L3 Cache performance monitor, Hydra Home + Agent performance monitor and DDR Controller performance monitor. + L3T is a specialized PMU driver. + +config HISI_LPDDRC_PMU + tristate "HiSilicon SoC LDPPRC PMU drivers" + depends on HISI_PMU + default n + help + Support for HiSilicon SoC L3 Cache performance monitor, Hydra Home + Agent performance monitor and DDR Controller performance monitor. + LPDDRC is a specialize PMU driver. + diff --git a/drivers/perf/hisilicon/Makefile b/drivers/perf/hisilicon/Makefile index a35705795dfcf2dce31206546fddd8551da0d08a..fc7ed8c6cc0f4d92ad1bfcc741bdf363d209eb7b 100644 --- a/drivers/perf/hisilicon/Makefile +++ b/drivers/perf/hisilicon/Makefile @@ -1,9 +1,9 @@ # SPDX-License-Identifier: GPL-2.0-only obj-$(CONFIG_HISI_PMU) += hisi_uncore_pmu.o hisi_uncore_l3c_pmu.o \ hisi_uncore_hha_pmu.o hisi_uncore_ddrc_pmu.o hisi_uncore_sllc_pmu.o \ - hisi_uncore_pa_pmu.o hisi_uncore_cpa_pmu.o \ - hisi_uncore_l3t_pmu.o \ - hisi_uncore_lpddrc_pmu.o + hisi_uncore_pa_pmu.o hisi_uncore_cpa_pmu.o obj-$(CONFIG_HISI_PCIE_PMU) += hisi_pcie_pmu.o obj-$(CONFIG_HNS3_PMU) += hns3_pmu.o +obj-$(CONFIG_HISI_L3T_PMU) += hisi_uncore_l3t_pmu.o +obj-$(CONFIG_HISI_LPDDRC_PMU) += hisi_uncore_lpddrc_pmu.o diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index cfdd8cffe6d7ebafdd262f44fcd8e063cd7d84d6..b098177420c4a03112e443b2d6a4d8aa66cc8458 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -602,13 +602,20 @@ static void remove_inode_hugepages(struct inode *inode, loff_t lstart, * if the page is from buddy system, do not add to freed. * because freed is used for hugetlbfs reservation accounting. */ - if (!HPageTemporary(page)) { - freed++; - if (!truncate_op) { - if (unlikely(hugetlb_unreserve_pages(inode, + +#ifdef CONFIG_ASCEND_SHARE_POOL + if (HPageTemporary(page) != 0) { + unlock_page(page); + if (!truncate_op) + mutex_unlock(&hugetlb_fault_mutex_table[hash]); + continue; + } +#endif + freed++; + if (!truncate_op) { + if (unlikely(hugetlb_unreserve_pages(inode, index, index + 1, 1))) - hugetlb_fix_reserve_counts(inode); - } + hugetlb_fix_reserve_counts(inode); } unlock_page(page); @@ -1061,8 +1068,12 @@ static int hugetlbfs_error_remove_page(struct address_space *mapping, pgoff_t index = page->index; remove_huge_page(page); +#ifdef CONFIG_ASCEND_SHARE_POOL if (!HPageTemporary(page) && unlikely(hugetlb_unreserve_pages(inode, index, index + 1, 1))) +#else + if (unlikely(hugetlb_unreserve_pages(inode, index, index + 1, 1))) +#endif hugetlb_fix_reserve_counts(inode); return 0; diff --git a/include/acpi/ghes.h b/include/acpi/ghes.h index 89330e4872c0a54bd00c186a626df7a9163a19f6..ca054bf08ee9aa33a3521448dc4dac75184f876e 100644 --- a/include/acpi/ghes.h +++ b/include/acpi/ghes.h @@ -145,6 +145,8 @@ int ghes_notify_sea(void); static inline int ghes_notify_sea(void) { return -ENOENT; } #endif +#ifdef CONFIG_ACPI_APEI_GHES_TS_CORE extern struct blocking_notifier_head ghes_ts_err_chain; +#endif #endif /* GHES_H */ diff --git a/include/linux/cper.h b/include/linux/cper.h index 78cf8a0b05a58f8650448669e0b77de577d37652..4dbf03678cc6ac5201e3442d273cbb78973a617b 100644 --- a/include/linux/cper.h +++ b/include/linux/cper.h @@ -197,10 +197,13 @@ enum { #define CPER_SEC_DMAR_IOMMU \ GUID_INIT(0x036F84E1, 0x7F37, 0x428c, 0xA7, 0x9E, 0x57, 0x5F, \ 0xDF, 0xAA, 0x84, 0xEC) + +#ifdef CONFIG_ACPI_APEI_GHES_TS_CORE /* HISI ts core */ #define CPER_SEC_TS_CORE \ GUID_INIT(0xeb4c71f8, 0xbc76, 0x4c46, 0xbd, 0x9, 0xd0, 0xd3, \ 0x45, 0x0, 0x5a, 0x92) +#endif #define CPER_PROC_VALID_TYPE 0x0001 #define CPER_PROC_VALID_ISA 0x0002 diff --git a/include/linux/mm.h b/include/linux/mm.h index 430eb04ca390d1b69716738eb68db727715c3901..0b5ce84212d787c334684d3dfec042ac5ce7c301 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -235,11 +235,13 @@ int overcommit_policy_handler(struct ctl_table *, int, void *, size_t *, /* test whether an address (unsigned long or pointer) is aligned to PAGE_SIZE */ #define PAGE_ALIGNED(addr) IS_ALIGNED((unsigned long)(addr), PAGE_SIZE) +#ifdef CONFIG_EXTEND_HUGEPAGE_MAPPING /* to align the pointer to the (next) PMD hugepage boundary */ #define PMD_ALIGN(addr) ALIGN(addr, PMD_SIZE) /* test whether an address (unsigned long or pointer) is aligned to PMD_SIZE */ #define PMD_ALIGNED(addr) IS_ALIGNED((unsigned long)(addr), PMD_SIZE) +#endif #define lru_to_page(head) (list_entry((head)->prev, struct page, lru)) diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index 49c94afce25b9a6e28c3e4419fbd6afd179d75fc..1ebe364ed29a556aa256817fed897bb9d695aff9 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -27,7 +27,9 @@ struct notifier_block; /* in notifier.h */ #define VM_FLUSH_RESET_PERMS 0x00000100 /* reset direct map and flush TLB on unmap, can't be freed in atomic context */ #define VM_MAP_PUT_PAGES 0x00000200 /* put pages and free array in vfree */ #define VM_NO_HUGE_VMAP 0x00000400 /* force PAGE_SIZE pte mapping */ +#ifdef CONFIG_EXTEND_HUGEPAGE_MAPPING #define VM_HUGE_PAGES 0x00001000 /* used for vmalloc hugepages */ +#endif #ifdef CONFIG_ASCEND_SHARE_POOL #define VM_SHAREPOOL 0x00002000 /* remapped to sharepool */ #else @@ -142,8 +144,11 @@ extern void *__vmalloc_node_range(unsigned long size, unsigned long align, void *__vmalloc_node(unsigned long size, unsigned long align, gfp_t gfp_mask, int node, const void *caller); void *vmalloc_no_huge(unsigned long size); + +#ifdef CONFIG_EXTEND_HUGEPAGE_MAPPING extern void *vmalloc_hugepage(unsigned long size); extern void *vmalloc_hugepage_user(unsigned long size); +#endif extern void vfree(const void *addr); extern void vfree_atomic(const void *addr); @@ -160,6 +165,7 @@ extern int remap_vmalloc_range_partial(struct vm_area_struct *vma, extern int remap_vmalloc_range(struct vm_area_struct *vma, void *addr, unsigned long pgoff); +#ifdef CONFIG_EXTEND_HUGEPAGE_MAPPING extern void *vmap_hugepage(struct page **pages, unsigned int count, unsigned long flags, pgprot_t prot); extern int remap_vmalloc_hugepage_range_partial(struct vm_area_struct *vma, @@ -167,6 +173,7 @@ extern int remap_vmalloc_hugepage_range_partial(struct vm_area_struct *vma, unsigned long pgoff, unsigned long size); extern int remap_vmalloc_hugepage_range(struct vm_area_struct *vma, void *addr, unsigned long pgoff); +#endif /* * Architectures can set this mask to a combination of PGTBL_P?D_MODIFIED values diff --git a/mm/Kconfig b/mm/Kconfig index be7fd4ed2c4f7993b3fa01814d0293e24acd56f2..f66457168de968e7ca81d649efc461c2ce7357eb 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -992,6 +992,12 @@ config CLEAR_FREELIST_PAGE To enable this feature, kernel parameter "clear_freelist" also needs to be added. +config EXTEND_HUGEPAGE_MAPPING + bool "Extend for hugepages mapping" + depends on ARM64 + default n + help + Introduce vmalloc/vmap/remap interfaces that handle only hugepages. source "mm/damon/Kconfig" diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 03eca3aec0f6edf52b2b47c1d9538f32e7b2ed97..63880f3e682de18e32839f3f181834187380f2c3 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -46,6 +46,7 @@ #include #include "internal.h" #include "hugetlb_vmemmap.h" +#include "share_pool_internal.h" int hugetlb_max_hstate __read_mostly; unsigned int default_hstate_idx; @@ -1625,6 +1626,7 @@ void free_huge_page(struct page *page) h->resv_huge_pages++; if (HPageTemporary(page)) { + sp_memcg_uncharge_hpage(page); remove_hugetlb_page(h, page, false); spin_unlock_irqrestore(&hugetlb_lock, flags); update_and_free_page(h, page, true); diff --git a/mm/share_pool.c b/mm/share_pool.c index e9d2ae0a5352d6d03399161fa43f2972231c440b..ce4837da8a9a7e4c3be7a41bb1554aa9c417949b 100644 --- a/mm/share_pool.c +++ b/mm/share_pool.c @@ -96,57 +96,24 @@ static LIST_HEAD(spm_dvpp_list); /* mutex to protect insert/delete ops from master_list */ static DEFINE_MUTEX(spm_list_lock); -/* for kthread buff_module_guard_work */ -static struct sp_proc_stat kthread_stat; - -#ifndef __GENKSYMS__ -struct sp_spg_stat { - int spg_id; - /* record the number of hugepage allocation failures */ - atomic_t hugepage_failures; - /* number of sp_area */ - atomic_t spa_num; - /* total size of all sp_area from sp_alloc and k2u */ - atomic64_t size; - /* total size of all sp_area from sp_alloc 0-order page */ - atomic64_t alloc_nsize; - /* total size of all sp_area from sp_alloc hugepage */ - atomic64_t alloc_hsize; - /* total size of all sp_area from ap_alloc */ - atomic64_t alloc_size; - /* total size of all sp_area from sp_k2u */ - atomic64_t k2u_size; -}; - -/* per process memory usage statistics indexed by tgid */ -struct sp_proc_stat { - int tgid; - struct mm_struct *mm; - char comm[TASK_COMM_LEN]; - /* - * alloc amount minus free amount, may be negative when freed by - * another task in the same sp group. - */ - atomic64_t alloc_size; - atomic64_t alloc_nsize; - atomic64_t alloc_hsize; - atomic64_t k2u_size; -}; - -/* per process/sp-group memory usage statistics */ -struct spg_proc_stat { - int tgid; - int spg_id; /* 0 for non-group data, such as k2u_task */ - /* - * alloc amount minus free amount, may be negative when freed by - * another task in the same sp group. - */ - atomic64_t alloc_size; - atomic64_t alloc_nsize; - atomic64_t alloc_hsize; - atomic64_t k2u_size; +#define SEQ_printf(m, x...) \ +do { \ + if (m) \ + seq_printf(m, x); \ + else \ + pr_info(x); \ +} while (0) + +struct sp_meminfo { + /* not huge page size from sp_alloc */ + atomic64_t alloc_nsize; + /* huge page size from sp_alloc */ + atomic64_t alloc_hsize; + /* total size from sp_k2u */ + atomic64_t k2u_size; }; +#ifndef __GENKSYMS__ enum sp_mapping_type { SP_MAPPING_START, SP_MAPPING_DVPP = SP_MAPPING_START, @@ -208,10 +175,11 @@ struct sp_group { /* list head of sp_area. it is protected by spin_lock sp_area_lock */ struct list_head spa_list; /* group statistics */ - struct sp_spg_stat instat; + struct sp_meminfo meminfo; /* is_alive == false means it's being destroyed */ bool is_alive; atomic_t use_count; + atomic_t spa_num; /* protect the group internal elements, except spa_list */ struct rw_semaphore rw_lock; /* list node for dvpp mapping */ @@ -221,6 +189,7 @@ struct sp_group { /* a per-process(per mm) struct which manages a sp_group_node list */ struct sp_group_master { + pid_t tgid; /* * number of sp groups the process belongs to, * a.k.a the number of sp_node in node_list @@ -234,8 +203,9 @@ struct sp_group_master { * For example, sp_alloc non-share memory or k2task. */ struct sp_group *local; - struct sp_proc_stat instat; + struct sp_meminfo meminfo; struct list_head list_node; + char comm[TASK_COMM_LEN]; }; /* @@ -252,20 +222,81 @@ struct sp_group_node { struct sp_group_master *master; struct sp_group *spg; unsigned long prot; - struct spg_proc_stat instat; + + /* + * alloc amount minus free amount, may be negative when freed by + * another task in the same sp group. + */ + struct sp_meminfo meminfo; }; #endif -/* The caller should hold mmap_sem to protect master (TBD) */ -static void sp_init_group_master_stat(int tgid, struct mm_struct *mm, - struct sp_proc_stat *stat) +static inline void sp_add_group_master(struct sp_group_master *master) +{ + mutex_lock(&master_list_lock); + list_add_tail(&master->list_node, &master_list); + mutex_unlock(&master_list_lock); +} + +static inline void sp_del_group_master(struct sp_group_master *master) +{ + mutex_lock(&master_list_lock); + list_del(&master->list_node); + mutex_unlock(&master_list_lock); +} + +static void meminfo_init(struct sp_meminfo *meminfo) +{ + memset(meminfo, 0, sizeof(struct sp_meminfo)); +} + +static void meminfo_inc_usage(unsigned long size, bool huge, struct sp_meminfo *meminfo) +{ + if (huge) + atomic64_add(size, &meminfo->alloc_hsize); + else + atomic64_add(size, &meminfo->alloc_nsize); +} + +static void meminfo_dec_usage(unsigned long size, bool huge, struct sp_meminfo *meminfo) +{ + if (huge) + atomic64_sub(size, &meminfo->alloc_hsize); + else + atomic64_sub(size, &meminfo->alloc_nsize); +} + +static void meminfo_inc_k2u(unsigned long size, struct sp_meminfo *meminfo) +{ + atomic64_add(size, &meminfo->k2u_size); +} + +static void meminfo_dec_k2u(unsigned long size, struct sp_meminfo *meminfo) +{ + atomic64_sub(size, &meminfo->k2u_size); +} + +static inline long meminfo_alloc_sum(struct sp_meminfo *meminfo) { - atomic64_set(&stat->alloc_nsize, 0); - atomic64_set(&stat->alloc_hsize, 0); - atomic64_set(&stat->k2u_size, 0); - stat->mm = mm; - stat->tgid = tgid; - get_task_comm(stat->comm, current); + return atomic64_read(&meminfo->alloc_nsize) + + atomic64_read(&meminfo->alloc_hsize); +} + +static inline long meminfo_alloc_sum_byKB(struct sp_meminfo *meminfo) +{ + return byte2kb(meminfo_alloc_sum(meminfo)); +} + +static inline long meminfo_k2u_size(struct sp_meminfo *meminfo) +{ + return byte2kb(atomic64_read(&meminfo->k2u_size)); +} + +static inline long long meminfo_total_size(struct sp_meminfo *meminfo) +{ + return atomic64_read(&meminfo->alloc_nsize) + + atomic64_read(&meminfo->alloc_hsize) + + atomic64_read(&meminfo->k2u_size); } static unsigned long sp_mapping_type(struct sp_mapping *spm) @@ -433,11 +464,13 @@ static int sp_mapping_group_setup(struct mm_struct *mm, struct sp_group *spg) if (is_mapping_empty(local_dvpp_mapping)) { sp_mapping_merge(spg_dvpp_mapping, local_dvpp_mapping); if (is_conflict) - pr_warn_ratelimited("task address space conflict, spg_id=%d\n", spg->id); + pr_warn_ratelimited("task address space conflict, spg_id=%d\n", + spg->id); } else if (is_mapping_empty(spg_dvpp_mapping)) { sp_mapping_merge(local_dvpp_mapping, spg_dvpp_mapping); if (is_conflict) - pr_warn_ratelimited("group address space conflict, spg_id=%d\n", spg->id); + pr_warn_ratelimited("group address space conflict, spg_id=%d\n", + spg->id); } else { pr_info_ratelimited("Duplicate address space, id=%d\n", spg->id); return -EINVAL; @@ -470,12 +503,14 @@ static struct sp_mapping *sp_mapping_find(struct sp_group *spg, static struct sp_group *create_spg(int spg_id, unsigned long flag); static void free_new_spg_id(bool new, int spg_id); static void free_sp_group_locked(struct sp_group *spg); -static int local_group_add_task(struct mm_struct *mm, struct sp_group *spg); +static struct sp_group_node *group_add_task(struct mm_struct *mm, struct sp_group *spg, + unsigned long prot); static int init_local_group(struct mm_struct *mm) { int spg_id, ret; struct sp_group *spg; struct sp_mapping *spm; + struct sp_group_node *spg_node; struct sp_group_master *master = mm->sp_group_master; spg_id = ida_alloc_range(&sp_group_id_ida, SPG_ID_LOCAL_MIN, @@ -487,8 +522,8 @@ static int init_local_group(struct mm_struct *mm) spg = create_spg(spg_id, 0); if (IS_ERR(spg)) { - ret = PTR_ERR(spg); - goto free_spg_id; + free_new_spg_id(true, spg_id); + return PTR_ERR(spg); } master->local = spg; @@ -501,19 +536,20 @@ static int init_local_group(struct mm_struct *mm) sp_mapping_attach(master->local, sp_mapping_normal); sp_mapping_attach(master->local, sp_mapping_ro); - ret = local_group_add_task(mm, spg); - if (ret < 0) + spg_node = group_add_task(mm, spg, PROT_READ | PROT_WRITE); + if (IS_ERR(spg_node)) { /* The spm would be released while destroying the spg */ + ret = PTR_ERR(spg_node); goto free_spg; + } + mmget(mm); return 0; free_spg: + /* spg_id is freed in free_sp_group_locked */ free_sp_group_locked(spg); master->local = NULL; -free_spg_id: - free_new_spg_id(true, spg_id); - return ret; } @@ -533,12 +569,11 @@ static int sp_init_group_master_locked(struct task_struct *tsk, struct mm_struct INIT_LIST_HEAD(&master->node_list); master->count = 0; master->mm = mm; - sp_init_group_master_stat(tsk->tgid, mm, &master->instat); + master->tgid = tsk->tgid; + get_task_comm(master->comm, current); + meminfo_init(&master->meminfo); mm->sp_group_master = master; - - mutex_lock(&master_list_lock); - list_add_tail(&master->list_node, &master_list); - mutex_unlock(&master_list_lock); + sp_add_group_master(master); ret = init_local_group(mm); if (ret) @@ -547,9 +582,7 @@ static int sp_init_group_master_locked(struct task_struct *tsk, struct mm_struct return 0; free_master: - mutex_lock(&master_list_lock); - list_del(&master->list_node); - mutex_unlock(&master_list_lock); + sp_del_group_master(master); mm->sp_group_master = NULL; kfree(master); @@ -588,102 +621,30 @@ static struct sp_group *sp_get_local_group(struct task_struct *tsk, struct mm_st return master->local; } -static void update_spg_stat_alloc(unsigned long size, bool inc, - bool huge, struct sp_spg_stat *stat) -{ - if (inc) { - atomic_inc(&stat->spa_num); - atomic64_add(size, &stat->size); - atomic64_add(size, &stat->alloc_size); - if (huge) - atomic64_add(size, &stat->alloc_hsize); - else - atomic64_add(size, &stat->alloc_nsize); - } else { - atomic_dec(&stat->spa_num); - atomic64_sub(size, &stat->size); - atomic64_sub(size, &stat->alloc_size); - if (huge) - atomic64_sub(size, &stat->alloc_hsize); - else - atomic64_sub(size, &stat->alloc_nsize); - } -} - -static void update_spg_stat_k2u(unsigned long size, bool inc, - struct sp_spg_stat *stat) -{ - if (inc) { - atomic_inc(&stat->spa_num); - atomic64_add(size, &stat->size); - atomic64_add(size, &stat->k2u_size); - } else { - atomic_dec(&stat->spa_num); - atomic64_sub(size, &stat->size); - atomic64_sub(size, &stat->k2u_size); - } -} - static void update_mem_usage_alloc(unsigned long size, bool inc, bool is_hugepage, struct sp_group_node *spg_node) { - struct sp_proc_stat *proc_stat = &spg_node->master->instat; - if (inc) { - if (is_hugepage) { - atomic64_add(size, &spg_node->instat.alloc_hsize); - atomic64_add(size, &proc_stat->alloc_hsize); - return; - } - atomic64_add(size, &spg_node->instat.alloc_nsize); - atomic64_add(size, &proc_stat->alloc_nsize); - return; - } - - if (is_hugepage) { - atomic64_sub(size, &spg_node->instat.alloc_hsize); - atomic64_sub(size, &proc_stat->alloc_hsize); - return; + meminfo_inc_usage(size, is_hugepage, &spg_node->meminfo); + meminfo_inc_usage(size, is_hugepage, &spg_node->master->meminfo); + } else { + meminfo_dec_usage(size, is_hugepage, &spg_node->meminfo); + meminfo_dec_usage(size, is_hugepage, &spg_node->master->meminfo); } - atomic64_sub(size, &spg_node->instat.alloc_nsize); - atomic64_sub(size, &proc_stat->alloc_nsize); - return; } static void update_mem_usage_k2u(unsigned long size, bool inc, struct sp_group_node *spg_node) { - struct sp_proc_stat *proc_stat = &spg_node->master->instat; - if (inc) { - atomic64_add(size, &spg_node->instat.k2u_size); - atomic64_add(size, &proc_stat->k2u_size); + meminfo_inc_k2u(size, &spg_node->meminfo); + meminfo_inc_k2u(size, &spg_node->master->meminfo); } else { - atomic64_sub(size, &spg_node->instat.k2u_size); - atomic64_sub(size, &proc_stat->k2u_size); + meminfo_dec_k2u(size, &spg_node->meminfo); + meminfo_dec_k2u(size, &spg_node->master->meminfo); } } -static void sp_init_spg_proc_stat(struct spg_proc_stat *stat, int spg_id) -{ - stat->tgid = current->tgid; - stat->spg_id = spg_id; - atomic64_set(&stat->alloc_nsize, 0); - atomic64_set(&stat->alloc_hsize, 0); - atomic64_set(&stat->k2u_size, 0); -} - -static void sp_init_group_stat(struct sp_spg_stat *stat) -{ - atomic_set(&stat->hugepage_failures, 0); - atomic_set(&stat->spa_num, 0); - atomic64_set(&stat->size, 0); - atomic64_set(&stat->alloc_nsize, 0); - atomic64_set(&stat->alloc_hsize, 0); - atomic64_set(&stat->alloc_size, 0); - atomic64_set(&stat->k2u_size, 0); -} - /* statistics of all sp area, protected by sp_area_lock */ struct sp_spa_stat { unsigned int total_num; @@ -769,17 +730,17 @@ static void spa_inc_usage(struct sp_area *spa) case SPA_TYPE_ALLOC: spa_stat.alloc_num += 1; spa_stat.alloc_size += size; - update_spg_stat_alloc(size, true, is_huge, &spa->spg->instat); + meminfo_inc_usage(size, is_huge, &spa->spg->meminfo); break; case SPA_TYPE_K2TASK: spa_stat.k2u_task_num += 1; spa_stat.k2u_task_size += size; - update_spg_stat_k2u(size, true, &spa->spg->instat); + meminfo_inc_k2u(size, &spa->spg->meminfo); break; case SPA_TYPE_K2SPG: spa_stat.k2u_spg_num += 1; spa_stat.k2u_spg_size += size; - update_spg_stat_k2u(size, true, &spa->spg->instat); + meminfo_inc_k2u(size, &spa->spg->meminfo); break; default: WARN(1, "invalid spa type"); @@ -790,6 +751,7 @@ static void spa_inc_usage(struct sp_area *spa) spa_stat.dvpp_va_size += ALIGN(size, PMD_SIZE); } + atomic_inc(&spa->spg->spa_num); /* * all the calculations won't overflow due to system limitation and * parameter checking in sp_alloc_area() @@ -815,17 +777,17 @@ static void spa_dec_usage(struct sp_area *spa) case SPA_TYPE_ALLOC: spa_stat.alloc_num -= 1; spa_stat.alloc_size -= size; - update_spg_stat_alloc(size, false, is_huge, &spa->spg->instat); + meminfo_dec_usage(size, is_huge, &spa->spg->meminfo); break; case SPA_TYPE_K2TASK: spa_stat.k2u_task_num -= 1; spa_stat.k2u_task_size -= size; - update_spg_stat_k2u(size, false, &spa->spg->instat); + meminfo_dec_k2u(size, &spa->spg->meminfo); break; case SPA_TYPE_K2SPG: spa_stat.k2u_spg_num -= 1; spa_stat.k2u_spg_size -= size; - update_spg_stat_k2u(size, false, &spa->spg->instat); + meminfo_dec_k2u(size, &spa->spg->meminfo); break; default: WARN(1, "invalid spa type"); @@ -836,6 +798,7 @@ static void spa_dec_usage(struct sp_area *spa) spa_stat.dvpp_va_size -= ALIGN(size, PMD_SIZE); } + atomic_dec(&spa->spg->spa_num); spa_stat.total_num -= 1; spa_stat.total_size -= size; @@ -861,7 +824,7 @@ static void update_mem_usage(unsigned long size, bool inc, bool is_hugepage, } } -struct sp_group_node *find_spg_node_by_spg(struct mm_struct *mm, +static struct sp_group_node *find_spg_node_by_spg(struct mm_struct *mm, struct sp_group *spg) { struct sp_group_node *spg_node; @@ -913,12 +876,11 @@ struct sp_k2u_context { unsigned long size_aligned; unsigned long sp_flags; int state; - int spg_id; - bool to_task; + enum spa_type type; }; -static unsigned long sp_remap_kva_to_vma(unsigned long kva, struct sp_area *spa, - struct mm_struct *mm, unsigned long prot, struct sp_k2u_context *kc); +static unsigned long sp_remap_kva_to_vma(struct sp_area *spa, struct mm_struct *mm, + unsigned long prot, struct sp_k2u_context *kc); static void free_sp_group_id(int spg_id) { @@ -960,7 +922,7 @@ static void free_sp_group(struct sp_group *spg) up_write(&sp_group_sem); } -static void sp_group_drop_locked(struct sp_group *spg) +static void sp_group_put_locked(struct sp_group *spg) { lockdep_assert_held_write(&sp_group_sem); @@ -968,7 +930,7 @@ static void sp_group_drop_locked(struct sp_group *spg) free_sp_group_locked(spg); } -static void sp_group_drop(struct sp_group *spg) +static void sp_group_put(struct sp_group *spg) { if (atomic_dec_and_test(&spg->use_count)) free_sp_group(spg); @@ -1011,8 +973,8 @@ static bool is_process_in_group(struct sp_group *spg, return false; } -/* user must call sp_group_drop() after use */ -static struct sp_group *__sp_find_spg_locked(int tgid, int spg_id) +/* user must call sp_group_put() after use */ +static struct sp_group *sp_group_get_locked(int tgid, int spg_id) { struct sp_group *spg = NULL; struct task_struct *tsk = NULL; @@ -1041,12 +1003,12 @@ static struct sp_group *__sp_find_spg_locked(int tgid, int spg_id) return spg; } -static struct sp_group *__sp_find_spg(int tgid, int spg_id) +static struct sp_group *sp_group_get(int tgid, int spg_id) { struct sp_group *spg; down_read(&sp_group_sem); - spg = __sp_find_spg_locked(tgid, spg_id); + spg = sp_group_get_locked(tgid, spg_id); up_read(&sp_group_sem); return spg; } @@ -1128,17 +1090,32 @@ static bool is_online_node_id(int node_id) return node_id >= 0 && node_id < MAX_NUMNODES && node_online(node_id); } +static void sp_group_init(struct sp_group *spg, int spg_id, unsigned long flag) +{ + spg->id = spg_id; + spg->flag = flag; + spg->is_alive = true; + spg->proc_num = 0; + atomic_set(&spg->use_count, 1); + atomic_set(&spg->spa_num, 0); + INIT_LIST_HEAD(&spg->procs); + INIT_LIST_HEAD(&spg->spa_list); + INIT_LIST_HEAD(&spg->mnode); + init_rwsem(&spg->rw_lock); + meminfo_init(&spg->meminfo); +} + static struct sp_group *create_spg(int spg_id, unsigned long flag) { int ret; struct sp_group *spg; - char name[20]; + char name[DNAME_INLINE_LEN]; struct user_struct *user = NULL; int hsize_log = MAP_HUGE_2MB >> MAP_HUGE_SHIFT; if (unlikely(system_group_count + 1 == MAX_GROUP_FOR_SYSTEM && !is_local_group(spg_id))) { - pr_err_ratelimited("reach system max group num\n"); + pr_err("reach system max group num\n"); return ERR_PTR(-ENOSPC); } @@ -1146,50 +1123,40 @@ static struct sp_group *create_spg(int spg_id, unsigned long flag) if (spg == NULL) return ERR_PTR(-ENOMEM); - ret = idr_alloc(&sp_group_idr, spg, spg_id, spg_id + 1, GFP_KERNEL); - if (ret < 0) { - pr_err_ratelimited("group %d idr alloc failed %d\n", - spg_id, ret); - goto out_kfree; - } - - spg->id = spg_id; - spg->flag = flag; - spg->is_alive = true; - spg->proc_num = 0; - atomic_set(&spg->use_count, 1); - INIT_LIST_HEAD(&spg->procs); - INIT_LIST_HEAD(&spg->spa_list); - INIT_LIST_HEAD(&spg->mnode); - init_rwsem(&spg->rw_lock); - sp_init_group_stat(&spg->instat); - sprintf(name, "sp_group_%d", spg_id); - spg->file = shmem_kernel_file_setup(name, MAX_LFS_FILESIZE, - VM_NORESERVE); + spg->file = shmem_kernel_file_setup(name, MAX_LFS_FILESIZE, VM_NORESERVE); if (IS_ERR(spg->file)) { pr_err("spg file setup failed %ld\n", PTR_ERR(spg->file)); ret = PTR_ERR(spg->file); - goto out_idr; + goto out_kfree; } + sprintf(name, "sp_group_%d_huge", spg_id); spg->file_hugetlb = hugetlb_file_setup(name, MAX_LFS_FILESIZE, - VM_NORESERVE, &user, HUGETLB_ANONHUGE_INODE, hsize_log); + VM_NORESERVE, &user, HUGETLB_ANONHUGE_INODE, hsize_log); if (IS_ERR(spg->file_hugetlb)) { - pr_err("spg file_hugetlb setup failed %ld\n", - PTR_ERR(spg->file_hugetlb)); + pr_err("spg file_hugetlb setup failed %ld\n", PTR_ERR(spg->file_hugetlb)); ret = PTR_ERR(spg->file_hugetlb); goto out_fput; } + sp_group_init(spg, spg_id, flag); + + ret = idr_alloc(&sp_group_idr, spg, spg_id, spg_id + 1, GFP_KERNEL); + if (ret < 0) { + pr_err("group %d idr alloc failed %d\n", spg_id, ret); + goto out_fput_huge; + } + if (!is_local_group(spg_id)) system_group_count++; + return spg; +out_fput_huge: + fput(spg->file_hugetlb); out_fput: fput(spg->file); -out_idr: - idr_remove(&sp_group_idr, spg_id); out_kfree: kfree(spg); return ERR_PTR(ret); @@ -1200,7 +1167,7 @@ static struct sp_group *find_or_alloc_sp_group(int spg_id, unsigned long flag) { struct sp_group *spg; - spg = __sp_find_spg_locked(current->tgid, spg_id); + spg = sp_group_get_locked(current->tgid, spg_id); if (!spg) { spg = create_spg(spg_id, flag); @@ -1208,11 +1175,11 @@ static struct sp_group *find_or_alloc_sp_group(int spg_id, unsigned long flag) down_read(&spg->rw_lock); if (!spg_valid(spg)) { up_read(&spg->rw_lock); - sp_group_drop_locked(spg); + sp_group_put_locked(spg); return ERR_PTR(-ENODEV); } up_read(&spg->rw_lock); - /* spg->use_count has increased due to __sp_find_spg() */ + /* spg->use_count has increased due to sp_group_get() */ } return spg; @@ -1295,7 +1262,7 @@ static struct sp_group_node *create_spg_node(struct mm_struct *mm, spg_node->spg = spg; spg_node->master = master; spg_node->prot = prot; - sp_init_spg_proc_stat(&spg_node->instat, spg->id); + meminfo_init(&spg_node->meminfo); list_add_tail(&spg_node->group_node, &master->node_list); master->count++; @@ -1336,18 +1303,24 @@ static void free_spg_node(struct mm_struct *mm, struct sp_group *spg, kfree(spg_node); } -static int local_group_add_task(struct mm_struct *mm, struct sp_group *spg) +/* the caller must hold sp_group_sem and down_write(&spg->rw_lock) in order */ +static struct sp_group_node *group_add_task(struct mm_struct *mm, struct sp_group *spg, + unsigned long prot) { struct sp_group_node *node; + int ret; - node = create_spg_node(mm, PROT_READ | PROT_WRITE, spg); + node = create_spg_node(mm, prot, spg); if (IS_ERR(node)) - return PTR_ERR(node); + return node; - insert_spg_node(spg, node); - mmget(mm); + ret = insert_spg_node(spg, node); + if (unlikely(ret)) { + free_spg_node(mm, spg, node); + return ERR_PTR(ret); + } - return 0; + return node; } /** @@ -1395,7 +1368,7 @@ int mg_sp_group_add_task(int tgid, unsigned long prot, int spg_id) } if (spg_id >= SPG_ID_AUTO_MIN && spg_id <= SPG_ID_AUTO_MAX) { - spg = __sp_find_spg(tgid, spg_id); + spg = sp_group_get(tgid, spg_id); if (!spg) { pr_err_ratelimited("spg %d hasn't been created\n", spg_id); @@ -1406,12 +1379,12 @@ int mg_sp_group_add_task(int tgid, unsigned long prot, int spg_id) if (!spg_valid(spg)) { up_read(&spg->rw_lock); pr_err_ratelimited("add group failed, group id %d is dead\n", spg_id); - sp_group_drop(spg); + sp_group_put(spg); return -EINVAL; } up_read(&spg->rw_lock); - sp_group_drop(spg); + sp_group_put(spg); } if (spg_id == SPG_ID_AUTO) { @@ -1460,6 +1433,15 @@ int mg_sp_group_add_task(int tgid, unsigned long prot, int spg_id) goto out_put_task; } + if (mm->sp_group_master && mm->sp_group_master->tgid != tgid) { + up_write(&sp_group_sem); + pr_err("add: task(%d) is a vfork child of the original task(%d)\n", + tgid, mm->sp_group_master->tgid); + ret = -EINVAL; + free_new_spg_id(id_newly_generated, spg_id); + goto out_put_mm; + } + spg = find_or_alloc_sp_group(spg_id, flag); if (IS_ERR(spg)) { up_write(&sp_group_sem); @@ -1481,19 +1463,13 @@ int mg_sp_group_add_task(int tgid, unsigned long prot, int spg_id) goto out_drop_group; } - node = create_spg_node(mm, prot, spg); + node = group_add_task(mm, spg, prot); if (unlikely(IS_ERR(node))) { up_write(&spg->rw_lock); ret = PTR_ERR(node); goto out_drop_group; } - ret = insert_spg_node(spg, node); - if (unlikely(ret)) { - up_write(&spg->rw_lock); - goto out_drop_spg_node; - } - /* * create mappings of existing shared memory segments into this * new process' page table. @@ -1520,7 +1496,7 @@ int mg_sp_group_add_task(int tgid, unsigned long prot, int spg_id) spin_unlock(&sp_area_lock); if (spa->type == SPA_TYPE_K2SPG && spa->kva) { - addr = sp_remap_kva_to_vma(spa->kva, spa, mm, prot_spa, NULL); + addr = sp_remap_kva_to_vma(spa, mm, prot_spa, NULL); if (IS_ERR_VALUE(addr)) pr_warn("add group remap k2u failed %ld\n", addr); @@ -1574,7 +1550,6 @@ int mg_sp_group_add_task(int tgid, unsigned long prot, int spg_id) delete_spg_node(spg, node); up_write(&spg->rw_lock); -out_drop_spg_node: if (unlikely(ret)) free_spg_node(mm, spg, node); /* @@ -1585,7 +1560,7 @@ int mg_sp_group_add_task(int tgid, unsigned long prot, int spg_id) out_drop_group: if (unlikely(ret)) { up_write(&sp_group_sem); - sp_group_drop(spg); + sp_group_put(spg); } else up_write(&sp_group_sem); out_put_mm: @@ -1624,27 +1599,21 @@ int mg_sp_group_del_task(int tgid, int spg_id) return -EOPNOTSUPP; if (spg_id < SPG_ID_MIN || spg_id > SPG_ID_AUTO) { - pr_err_ratelimited("del from group failed, invalid group id %d\n", spg_id); + pr_err("del from group failed, invalid group id %d\n", spg_id); return -EINVAL; } - spg = __sp_find_spg(tgid, spg_id); + spg = sp_group_get(tgid, spg_id); if (!spg) { - pr_err_ratelimited("spg not found or get task failed."); + pr_err("spg not found or get task failed, tgid:%d, spg_id:%d\n", + tgid, spg_id); return -EINVAL; } down_write(&sp_group_sem); if (!spg_valid(spg)) { up_write(&sp_group_sem); - pr_err_ratelimited("spg dead."); - ret = -EINVAL; - goto out; - } - - if (!list_empty(&spg->spa_list)) { - up_write(&sp_group_sem); - pr_err_ratelimited("spa is not empty"); + pr_err("spg dead, spg_id:%d\n", spg_id); ret = -EINVAL; goto out; } @@ -1652,31 +1621,54 @@ int mg_sp_group_del_task(int tgid, int spg_id) ret = get_task(tgid, &tsk); if (ret) { up_write(&sp_group_sem); - pr_err_ratelimited("task is not found"); + pr_err("task is not found, tgid:%d\n", tgid); goto out; } mm = get_task_mm(tsk->group_leader); if (!mm) { up_write(&sp_group_sem); - pr_err_ratelimited("mm is not found"); + pr_err("mm is not found, tgid:%d\n", tgid); ret = -ESRCH; goto out_put_task; } + if (!mm->sp_group_master) { + up_write(&sp_group_sem); + pr_err("task(%d) is not in any group(%d)\n", tgid, spg_id); + ret = -EINVAL; + goto out_put_mm; + } + + if (mm->sp_group_master->tgid != tgid) { + up_write(&sp_group_sem); + pr_err("del: task(%d) is a vfork child of the original task(%d)\n", + tgid, mm->sp_group_master->tgid); + ret = -EINVAL; + goto out_put_mm; + } + spg_node = find_spg_node_by_spg(mm, spg); if (!spg_node) { up_write(&sp_group_sem); - pr_err_ratelimited("process not in group"); + pr_err("task(%d) not in group(%d)\n", tgid, spg_id); ret = -ESRCH; goto out_put_mm; } down_write(&spg->rw_lock); + + if (!list_empty(&spg->spa_list)) { + up_write(&spg->rw_lock); + up_write(&sp_group_sem); + pr_err("spa is not empty, task:%d, spg_id:%d\n", tgid, spg_id); + ret = -EINVAL; + goto out_put_mm; + } + if (list_is_singular(&spg->procs)) is_alive = spg->is_alive = false; - spg->proc_num--; - list_del(&spg_node->proc_node); - sp_group_drop(spg); + delete_spg_node(spg, spg_node); + sp_group_put(spg); up_write(&spg->rw_lock); if (!is_alive) blocking_notifier_call_chain(&sp_notifier_chain, 0, spg); @@ -1693,7 +1685,7 @@ int mg_sp_group_del_task(int tgid, int spg_id) out_put_task: put_task_struct(tsk); out: - sp_group_drop(spg); /* if spg dead, freed here */ + sp_group_put(spg); /* if spg dead, freed here */ return ret; } EXPORT_SYMBOL_GPL(mg_sp_group_del_task); @@ -2138,7 +2130,7 @@ static int sp_free_get_spa(struct sp_free_context *fc) struct sp_area *spa; struct sp_group *spg; - spg = __sp_find_spg(current->tgid, fc->spg_id); + spg = sp_group_get(current->tgid, fc->spg_id); if (!spg) { pr_debug("sp free get group failed %d\n", fc->spg_id); return -EINVAL; @@ -2147,7 +2139,7 @@ static int sp_free_get_spa(struct sp_free_context *fc) fc->state = FREE_CONT; spa = get_sp_area(spg, addr); - sp_group_drop(spg); + sp_group_put(spg); if (!spa) { pr_debug("sp free invalid input addr %lx\n", addr); return -EINVAL; @@ -2230,9 +2222,7 @@ int mg_sp_free(unsigned long addr, int id) sp_free_unmap_fallocate(fc.spa); - if (current->mm == NULL) - atomic64_sub(fc.spa->real_size, &kthread_stat.alloc_size); - else + if (current->mm != NULL) sp_update_process_stat(current, false, fc.spa); __sp_area_drop(fc.spa); /* match get_sp_area in sp_free_get_spa */ @@ -2319,7 +2309,7 @@ static int sp_alloc_prepare(unsigned long size, unsigned long sp_flags, sp_flags |= SP_HUGEPAGE; if (spg_id != SPG_ID_DEFAULT) { - spg = __sp_find_spg(current->tgid, spg_id); + spg = sp_group_get(current->tgid, spg_id); if (!spg) { pr_err_ratelimited("allocation failed, can't find group\n"); return -ENODEV; @@ -2329,14 +2319,14 @@ static int sp_alloc_prepare(unsigned long size, unsigned long sp_flags, down_read(&spg->rw_lock); if (!spg_valid(spg)) { up_read(&spg->rw_lock); - sp_group_drop(spg); + sp_group_put(spg); pr_err_ratelimited("allocation failed, spg is dead\n"); return -ENODEV; } if (!is_process_in_group(spg, current->mm)) { up_read(&spg->rw_lock); - sp_group_drop(spg); + sp_group_put(spg); pr_err_ratelimited("allocation failed, task not in group\n"); return -ENODEV; } @@ -2434,7 +2424,6 @@ static void sp_alloc_fallback(struct sp_area *spa, struct sp_alloc_context *ac) return; } - atomic_inc(&ac->spg->instat.hugepage_failures); if (!(ac->sp_flags & SP_HUGEPAGE_ONLY)) { ac->file = ac->spg->file; ac->size_aligned = ALIGN(ac->size, PAGE_SIZE); @@ -2455,7 +2444,6 @@ static int sp_alloc_populate(struct mm_struct *mm, struct sp_area *spa, * page fault later on, and more importantly sp_make_share_u2k() * depends on this feature (and MAP_LOCKED) to work correctly. */ - return do_mm_populate(mm, spa->va_start, ac->populate, 0); } @@ -2476,7 +2464,6 @@ static int __sp_alloc_mmap_populate(struct mm_struct *mm, struct sp_area *spa, int ret; ret = sp_alloc_mmap(mm, spa, spg_node, ac); - if (ret < 0) return ret; @@ -2498,6 +2485,7 @@ static int __sp_alloc_mmap_populate(struct mm_struct *mm, struct sp_area *spa, pr_warn_ratelimited("allocation failed due to mm populate failed(potential no enough memory when -12): %d\n", ret); } + return ret; } @@ -2550,8 +2538,7 @@ static int sp_alloc_mmap_populate(struct sp_area *spa, sp_fallocate(spa); /* if hugepage allocation fails, this will transfer to normal page - * and try again. (only if SP_HUGEPAGE_ONLY is not flagged - */ + * and try again. (only if SP_HUGEPAGE_ONLY is not flagged */ sp_alloc_fallback(spa, ac); return mmap_ret; @@ -2573,7 +2560,7 @@ static void sp_alloc_finish(int result, struct sp_area *spa, if (spa && !IS_ERR(spa)) __sp_area_drop(spa); - sp_group_drop(spg); + sp_group_put(spg); } /** @@ -2673,14 +2660,15 @@ static unsigned long __sp_remap_get_pfn(unsigned long kva) } /* when called by k2u to group, always make sure rw_lock of spg is down */ -static unsigned long sp_remap_kva_to_vma(unsigned long kva, struct sp_area *spa, - struct mm_struct *mm, unsigned long prot, struct sp_k2u_context *kc) +static unsigned long sp_remap_kva_to_vma(struct sp_area *spa, struct mm_struct *mm, + unsigned long prot, struct sp_k2u_context *kc) { struct vm_area_struct *vma; unsigned long ret_addr; unsigned long populate = 0; int ret = 0; unsigned long addr, buf, offset; + unsigned long kva = spa->kva; down_write(&mm->mmap_lock); if (unlikely(mm->core_state)) { @@ -2741,111 +2729,50 @@ static unsigned long sp_remap_kva_to_vma(unsigned long kva, struct sp_area *spa, return ret_addr; } -/** - * sp_make_share_kva_to_task() - Share kernel memory to current task. - * @kva: the VA of shared kernel memory - * @size: the size of area to share, should be aligned properly - * @sp_flags: the flags for the opreation - * - * Return: - * * if succeed, return the shared user address to start at. - * * if fail, return the pointer of -errno. - */ -static void *sp_make_share_kva_to_task(unsigned long kva, unsigned long size, unsigned long sp_flags) -{ - int ret; - void *uva; - struct sp_area *spa; - struct sp_group_node *spg_node; - unsigned long prot = PROT_READ | PROT_WRITE; - struct sp_k2u_context kc; - struct sp_group *spg; - - down_write(&sp_group_sem); - ret = sp_init_group_master_locked(current, current->mm); - if (ret) { - up_write(&sp_group_sem); - pr_err_ratelimited("k2u_task init local mapping failed %d\n", ret); - return ERR_PTR(ret); - } - - spg = current->mm->sp_group_master->local; - up_write(&sp_group_sem); - - spa = sp_alloc_area(size, sp_flags, spg, SPA_TYPE_K2TASK, current->tgid); - if (IS_ERR(spa)) { - pr_err_ratelimited("alloc spa failed in k2u_task (potential no enough virtual memory when -75): %ld\n", - PTR_ERR(spa)); - return spa; - } - - spa->kva = kva; - kc.sp_flags = sp_flags; - uva = (void *)sp_remap_kva_to_vma(kva, spa, current->mm, prot, &kc); - if (IS_ERR(uva)) - pr_err("remap k2u to task failed %ld\n", PTR_ERR(uva)); - else { - spg_node = find_spg_node_by_spg(current->mm, spa->spg); - update_mem_usage(size, true, spa->is_hugepage, spg_node, SPA_TYPE_K2TASK); - spa->mm = current->mm; - } - __sp_area_drop(spa); - - return uva; -} - /** * Share kernel memory to a spg, the current process must be in that group - * @kva: the VA of shared kernel memory - * @size: the size of area to share, should be aligned properly - * @sp_flags: the flags for the opreation + * @kc: the context for k2u, including kva, size, flags... * @spg: the sp group to be shared with * * Return: the shared user address to start at */ -static void *sp_make_share_kva_to_spg(unsigned long kva, unsigned long size, - unsigned long sp_flags, struct sp_group *spg) +static void *sp_make_share_kva_to_spg(struct sp_k2u_context *kc, struct sp_group *spg) { struct sp_area *spa; struct mm_struct *mm; struct sp_group_node *spg_node; - void *uva = ERR_PTR(-ENODEV); - struct sp_k2u_context kc; unsigned long ret_addr = -ENODEV; down_read(&spg->rw_lock); - spa = sp_alloc_area(size, sp_flags, spg, SPA_TYPE_K2SPG, current->tgid); + spa = sp_alloc_area(kc->size_aligned, kc->sp_flags, spg, kc->type, current->tgid); if (IS_ERR(spa)) { up_read(&spg->rw_lock); - pr_err_ratelimited("alloc spa failed in k2u_spg (potential no enough virtual memory when -75): %ld\n", + pr_err("alloc spa failed in k2u_spg (potential no enough virtual memory when -75): %ld\n", PTR_ERR(spa)); return spa; } - spa->kva = kva; - kc.sp_flags = sp_flags; + spa->kva = kc->kva_aligned; list_for_each_entry(spg_node, &spg->procs, proc_node) { mm = spg_node->master->mm; - kc.state = K2U_NORMAL; - ret_addr = sp_remap_kva_to_vma(kva, spa, mm, spg_node->prot, &kc); + kc->state = K2U_NORMAL; + ret_addr = sp_remap_kva_to_vma(spa, mm, spg_node->prot, kc); if (IS_ERR_VALUE(ret_addr)) { - if (kc.state == K2U_COREDUMP) + if (kc->state == K2U_COREDUMP) continue; - uva = (void *)ret_addr; - pr_err("remap k2u to spg failed %ld\n", PTR_ERR(uva)); + pr_err("remap k2u to spg failed %ld\n", ret_addr); __sp_free(spg, spa->va_start, spa_size(spa), mm); goto out; } - uva = (void *)ret_addr; } out: up_read(&spg->rw_lock); - if (!IS_ERR(uva)) + if (!IS_ERR_VALUE(ret_addr)) sp_update_process_stat(current, true, spa); __sp_area_drop(spa); - return uva; + return (void *)ret_addr; } static bool vmalloc_area_set_flag(unsigned long kva, unsigned long flags) @@ -2904,16 +2831,13 @@ static int sp_k2u_prepare(unsigned long kva, unsigned long size, return -EINVAL; } - kc->kva = kva; - kc->kva_aligned = kva_aligned; - kc->size = size; + kc->kva = kva; + kc->kva_aligned = kva_aligned; + kc->size = size; kc->size_aligned = size_aligned; - kc->sp_flags = sp_flags; - kc->spg_id = spg_id; - if (spg_id == SPG_ID_DEFAULT || spg_id == SPG_ID_NONE) - kc->to_task = true; - else - kc->to_task = false; + kc->sp_flags = sp_flags; + kc->type = (spg_id == SPG_ID_DEFAULT || spg_id == SPG_ID_NONE) + ? SPA_TYPE_K2TASK : SPA_TYPE_K2SPG; return 0; } @@ -2951,6 +2875,7 @@ void *mg_sp_make_share_k2u(unsigned long kva, unsigned long size, void *uva; int ret; struct sp_k2u_context kc; + struct sp_group *spg; if (!sp_is_enabled()) return ERR_PTR(-EOPNOTSUPP); @@ -2961,24 +2886,31 @@ void *mg_sp_make_share_k2u(unsigned long kva, unsigned long size, if (ret) return ERR_PTR(ret); - if (kc.to_task) { - uva = sp_make_share_kva_to_task(kc.kva_aligned, kc.size_aligned, kc.sp_flags); - } else { - struct sp_group *spg; - - spg = __sp_find_spg(current->tgid, kc.spg_id); - if (spg) { - ret = sp_check_caller_permission(spg, current->mm); - if (ret < 0) { - sp_group_drop(spg); - uva = ERR_PTR(ret); - goto out; - } - uva = sp_make_share_kva_to_spg(kc.kva_aligned, kc.size_aligned, kc.sp_flags, spg); - sp_group_drop(spg); - } else { - uva = ERR_PTR(-ENODEV); + if (kc.type == SPA_TYPE_K2TASK) { + down_write(&sp_group_sem); + ret = sp_init_group_master_locked(current, current->mm); + up_write(&sp_group_sem); + if (ret) { + pr_err("k2u_task init local mapping failed %d\n", ret); + uva = ERR_PTR(ret); + goto out; } + /* the caller could use SPG_ID_NONE */ + spg_id = SPG_ID_DEFAULT; + } + + spg = sp_group_get(current->tgid, spg_id); + if (spg) { + ret = sp_check_caller_permission(spg, current->mm); + if (ret < 0) { + sp_group_put(spg); + uva = ERR_PTR(ret); + goto out; + } + uva = sp_make_share_kva_to_spg(&kc, spg); + sp_group_put(spg); + } else { + uva = ERR_PTR(-ENODEV); } out: @@ -3294,54 +3226,42 @@ void *mg_sp_make_share_u2k(unsigned long uva, unsigned long size, int tgid) EXPORT_SYMBOL_GPL(mg_sp_make_share_u2k); /* - * Input parameters uva, tgid and spg_id are now useless. spg_id will be useful - * when supporting a process in multiple sp groups. + * sp_unshare_uva - unshare a uva from sp_make_share_k2u + * @uva: the uva to be unshared + * @size: not used actually and we just check it + * @group_id: specify the spg of the uva; for local group, it can be SPG_ID_DEFAULT + * unless current process is exiting. * * Procedure of unshare uva must be compatible with: * * 1. DVPP channel destroy procedure: * do_exit() -> exit_mm() (mm no longer in spg and current->mm == NULL) -> * exit_task_work() -> task_work_run() -> __fput() -> ... -> vdec_close() -> - * sp_unshare(uva, SPG_ID_DEFAULT) - * - * 2. Process A once was the target of k2u(to group), then it exits. - * Guard worker kthread tries to free this uva and it must succeed, otherwise - * spa of this uva leaks. - * - * This also means we must trust DVPP channel destroy and guard worker code. + * sp_unshare(uva, local_spg_id) */ static int sp_unshare_uva(unsigned long uva, unsigned long size, int group_id) { int ret = 0; - struct mm_struct *mm; struct sp_area *spa; - unsigned long uva_aligned; - unsigned long size_aligned; unsigned int page_size; struct sp_group *spg; - spg = __sp_find_spg(current->tgid, group_id); + spg = sp_group_get(current->tgid, group_id); if (!spg) { - pr_debug("sp unshare find group failed %d\n", group_id); + pr_err("sp unshare find group failed %d\n", group_id); return -EINVAL; } - /* - * at first we guess it's a hugepage addr - * we can tolerate at most PMD_SIZE or PAGE_SIZE which is matched in k2u - */ + /* All the spa are aligned to 2M. */ spa = get_sp_area(spg, ALIGN_DOWN(uva, PMD_SIZE)); if (!spa) { - spa = get_sp_area(spg, ALIGN_DOWN(uva, PAGE_SIZE)); - if (!spa) { - ret = -EINVAL; - pr_debug("invalid input uva %lx in unshare uva\n", (unsigned long)uva); - goto out; - } + ret = -EINVAL; + pr_err("invalid input uva %lx in unshare uva\n", (unsigned long)uva); + goto out; } if (spa->type != SPA_TYPE_K2TASK && spa->type != SPA_TYPE_K2SPG) { - pr_err_ratelimited("unshare wrong type spa\n"); + pr_err("unshare wrong type spa\n"); ret = -EINVAL; goto out_drop_area; } @@ -3352,103 +3272,56 @@ static int sp_unshare_uva(unsigned long uva, unsigned long size, int group_id) * Thus input parameter size is not necessarily needed. */ page_size = (spa->is_hugepage ? PMD_SIZE : PAGE_SIZE); - uva_aligned = spa->va_start; - size_aligned = spa->real_size; - if (size_aligned < ALIGN(size, page_size)) { + if (spa->real_size < ALIGN(size, page_size)) { ret = -EINVAL; - pr_err_ratelimited("unshare uva failed, invalid parameter size %lu\n", size); + pr_err("unshare uva failed, invalid parameter size %lu\n", size); goto out_drop_area; } - if (spa->type == SPA_TYPE_K2TASK) { - if (spa->applier != current->tgid) { - pr_err_ratelimited("unshare uva(to task) no permission\n"); - ret = -EPERM; - goto out_drop_area; - } - - /* - * current thread may be exiting in a multithread process - * - * 1. never need a kthread to make unshare when process has exited - * 2. in dvpp channel destroy procedure, exit_mm() has been called - * and don't need to make unshare - */ - mm = get_task_mm(current->group_leader); - if (!mm) { - pr_info_ratelimited("no need to unshare uva(to task), target process mm is exiting\n"); - goto out_clr_flag; - } - - down_write(&mm->mmap_lock); - if (unlikely(mm->core_state)) { - ret = 0; - up_write(&mm->mmap_lock); - mmput(mm); - goto out_drop_area; - } - - ret = do_munmap(mm, uva_aligned, size_aligned, NULL); - up_write(&mm->mmap_lock); - mmput(mm); - /* we are not supposed to fail */ - if (ret) - pr_err("failed to unmap VA %pK when munmap in unshare uva\n", - (void *)uva_aligned); - sp_update_process_stat(current, false, spa); - - } else if (spa->type == SPA_TYPE_K2SPG) { - down_read(&spa->spg->rw_lock); - /* always allow kthread and dvpp channel destroy procedure */ - if (current->mm) { - if (!is_process_in_group(spa->spg, current->mm)) { - up_read(&spa->spg->rw_lock); - pr_err_ratelimited("unshare uva(to group) failed, caller process doesn't belong to target group\n"); - ret = -EPERM; - goto out_drop_area; - } - } + down_read(&spa->spg->rw_lock); + /* always allow dvpp channel destroy procedure */ + if (current->mm && !is_process_in_group(spa->spg, current->mm)) { up_read(&spa->spg->rw_lock); + pr_err("unshare uva failed, caller process doesn't belong to target group\n"); + ret = -EPERM; + goto out_drop_area; + } + up_read(&spa->spg->rw_lock); - down_write(&spa->spg->rw_lock); - if (!spg_valid(spa->spg)) { - up_write(&spa->spg->rw_lock); - pr_info_ratelimited("share pool: no need to unshare uva(to group), sp group of spa is dead\n"); - goto out_clr_flag; - } - /* the life cycle of spa has a direct relation with sp group */ - if (unlikely(spa->is_dead)) { - up_write(&spa->spg->rw_lock); - pr_err_ratelimited("unexpected double sp unshare\n"); - dump_stack(); - ret = -EINVAL; - goto out_drop_area; - } - spa->is_dead = true; + down_write(&spa->spg->rw_lock); + if (!spg_valid(spa->spg)) { + up_write(&spa->spg->rw_lock); + pr_info("no need to unshare uva, sp group of spa is dead\n"); + goto out_clr_flag; + } + /* the life cycle of spa has a direct relation with sp group */ + if (unlikely(spa->is_dead)) { up_write(&spa->spg->rw_lock); + pr_err("unexpected double sp unshare\n"); + dump_stack(); + ret = -EINVAL; + goto out_drop_area; + } + spa->is_dead = true; + up_write(&spa->spg->rw_lock); - down_read(&spa->spg->rw_lock); - __sp_free(spa->spg, uva_aligned, size_aligned, NULL); - up_read(&spa->spg->rw_lock); + down_read(&spa->spg->rw_lock); + __sp_free(spa->spg, spa->va_start, spa->real_size, NULL); + up_read(&spa->spg->rw_lock); - if (current->mm == NULL) - atomic64_sub(spa->real_size, &kthread_stat.k2u_size); - else - sp_update_process_stat(current, false, spa); - } else { - WARN(1, "unshare uva invalid spa type"); - } + if (current->mm != NULL) + sp_update_process_stat(current, false, spa); out_clr_flag: if (!vmalloc_area_clr_flag(spa->kva, VM_SHAREPOOL)) - pr_debug("clear spa->kva %ld is not valid\n", spa->kva); + pr_info("clear spa->kva %ld is not valid\n", spa->kva); spa->kva = 0; out_drop_area: __sp_area_drop(spa); out: - sp_group_drop(spg); + sp_group_put(spg); return ret; } @@ -3673,7 +3546,7 @@ bool mg_sp_config_dvpp_range(size_t start, size_t size, int device_id, int tgid) err = true; put_spg: - sp_group_drop(spg); + sp_group_put(spg); put_mm: mmput(mm); put_task: @@ -3743,17 +3616,6 @@ static void get_mm_rss_info(struct mm_struct *mm, unsigned long *anon, *total_rss = *anon + *file + *shmem; } -static long get_proc_k2u(struct sp_proc_stat *stat) -{ - return byte2kb(atomic64_read(&stat->k2u_size)); -} - -static long get_proc_alloc(struct sp_proc_stat *stat) -{ - return byte2kb(atomic64_read(&stat->alloc_nsize) + - atomic64_read(&stat->alloc_hsize)); -} - static void get_process_sp_res(struct sp_group_master *master, long *sp_res_out, long *sp_res_nsize_out) { @@ -3765,18 +3627,11 @@ static void get_process_sp_res(struct sp_group_master *master, list_for_each_entry(spg_node, &master->node_list, group_node) { spg = spg_node->spg; - *sp_res_out += byte2kb(atomic64_read(&spg->instat.alloc_nsize)); - *sp_res_out += byte2kb(atomic64_read(&spg->instat.alloc_hsize)); - *sp_res_nsize_out += byte2kb(atomic64_read(&spg->instat.alloc_nsize)); + *sp_res_out += meminfo_alloc_sum_byKB(&spg->meminfo); + *sp_res_nsize_out += byte2kb(atomic64_read(&spg->meminfo.alloc_nsize)); } } -static long get_sp_res_by_spg_proc(struct sp_group_node *spg_node) -{ - return byte2kb(atomic64_read(&spg_node->spg->instat.alloc_nsize) + - atomic64_read(&spg_node->spg->instat.alloc_hsize)); -} - /* * Statistics of RSS has a maximum 64 pages deviation (256KB). * Please check_sync_rss_stat(). @@ -3795,17 +3650,6 @@ static void get_process_non_sp_res(unsigned long total_rss, unsigned long shmem, *non_sp_shm_out = non_sp_shm; } -static long get_spg_proc_alloc(struct sp_group_node *spg_node) -{ - return byte2kb(atomic64_read(&spg_node->instat.alloc_nsize) + - atomic64_read(&spg_node->instat.alloc_hsize)); -} - -static long get_spg_proc_k2u(struct sp_group_node *spg_node) -{ - return byte2kb(atomic64_read(&spg_node->instat.k2u_size)); -} - static void print_process_prot(struct seq_file *seq, unsigned long prot) { if (prot == PROT_READ) @@ -3821,7 +3665,7 @@ int proc_sp_group_state(struct seq_file *m, struct pid_namespace *ns, { struct mm_struct *mm; struct sp_group_master *master; - struct sp_proc_stat *proc_stat; + struct sp_meminfo *meminfo; struct sp_group_node *spg_node; unsigned long anon, file, shmem, total_rss; long sp_res, sp_res_nsize, non_sp_res, non_sp_shm; @@ -3840,7 +3684,7 @@ int proc_sp_group_state(struct seq_file *m, struct pid_namespace *ns, goto out; get_mm_rss_info(mm, &anon, &file, &shmem, &total_rss); - proc_stat = &master->instat; + meminfo = &master->meminfo; get_process_sp_res(master, &sp_res, &sp_res_nsize); get_process_non_sp_res(total_rss, shmem, sp_res_nsize, &non_sp_res, &non_sp_shm); @@ -3850,9 +3694,9 @@ int proc_sp_group_state(struct seq_file *m, struct pid_namespace *ns, "PID", "COMM", "SP_ALLOC", "SP_K2U", "SP_RES", "Non-SP_RES", "Non-SP_Shm", "VIRT"); seq_printf(m, "%-8d %-16s %-9ld %-9ld %-9ld %-10ld %-10ld %-8ld\n", - proc_stat->tgid, proc_stat->comm, - get_proc_alloc(proc_stat), - get_proc_k2u(proc_stat), + master->tgid, master->comm, + meminfo_alloc_sum_byKB(meminfo), + meminfo_k2u_size(meminfo), sp_res, non_sp_res, non_sp_shm, page2kb(mm->total_vm)); @@ -3863,9 +3707,9 @@ int proc_sp_group_state(struct seq_file *m, struct pid_namespace *ns, list_for_each_entry(spg_node, &master->node_list, group_node) { seq_printf(m, "%-8d %-9ld %-9ld %-9ld ", spg_node->spg->id, - get_spg_proc_alloc(spg_node), - get_spg_proc_k2u(spg_node), - get_sp_res_by_spg_proc(spg_node)); + meminfo_alloc_sum_byKB(&spg_node->meminfo), + meminfo_k2u_size(&spg_node->meminfo), + meminfo_alloc_sum_byKB(&spg_node->spg->meminfo)); print_process_prot(m, spg_node->prot); seq_putc(m, '\n'); } @@ -3951,7 +3795,7 @@ static void spa_dvpp_stat_show(struct seq_file *seq) } -void spa_overview_show(struct seq_file *seq) +static void spa_overview_show(struct seq_file *seq) { unsigned int total_num, alloc_num, k2u_task_num, k2u_spg_num; unsigned long total_size, alloc_size, k2u_task_size, k2u_spg_size; @@ -3973,29 +3817,16 @@ void spa_overview_show(struct seq_file *seq) dvpp_va_size = spa_stat.dvpp_va_size; spin_unlock(&sp_area_lock); - if (seq != NULL) { - seq_printf(seq, "Spa total num %u.\n", total_num); - seq_printf(seq, "Spa alloc num %u, k2u(task) num %u, k2u(spg) num %u.\n", - alloc_num, k2u_task_num, k2u_spg_num); - seq_printf(seq, "Spa total size: %13lu KB\n", byte2kb(total_size)); - seq_printf(seq, "Spa alloc size: %13lu KB\n", byte2kb(alloc_size)); - seq_printf(seq, "Spa k2u(task) size: %13lu KB\n", byte2kb(k2u_task_size)); - seq_printf(seq, "Spa k2u(spg) size: %13lu KB\n", byte2kb(k2u_spg_size)); - seq_printf(seq, "Spa dvpp size: %13lu KB\n", byte2kb(dvpp_size)); - seq_printf(seq, "Spa dvpp va size: %13lu MB\n", byte2mb(dvpp_va_size)); - seq_puts(seq, "\n"); - } else { - pr_info("Spa total num %u.\n", total_num); - pr_info("Spa alloc num %u, k2u(task) num %u, k2u(spg) num %u.\n", - alloc_num, k2u_task_num, k2u_spg_num); - pr_info("Spa total size: %13lu KB\n", byte2kb(total_size)); - pr_info("Spa alloc size: %13lu KB\n", byte2kb(alloc_size)); - pr_info("Spa k2u(task) size: %13lu KB\n", byte2kb(k2u_task_size)); - pr_info("Spa k2u(spg) size: %13lu KB\n", byte2kb(k2u_spg_size)); - pr_info("Spa dvpp size: %13lu KB\n", byte2kb(dvpp_size)); - pr_info("Spa dvpp va size: %13lu MB\n", byte2mb(dvpp_va_size)); - pr_info("\n"); - } + SEQ_printf(seq, "Spa total num %u.\n", total_num); + SEQ_printf(seq, "Spa alloc num %u, k2u(task) num %u, k2u(spg) num %u.\n", + alloc_num, k2u_task_num, k2u_spg_num); + SEQ_printf(seq, "Spa total size: %13lu KB\n", byte2kb(total_size)); + SEQ_printf(seq, "Spa alloc size: %13lu KB\n", byte2kb(alloc_size)); + SEQ_printf(seq, "Spa k2u(task) size: %13lu KB\n", byte2kb(k2u_task_size)); + SEQ_printf(seq, "Spa k2u(spg) size: %13lu KB\n", byte2kb(k2u_spg_size)); + SEQ_printf(seq, "Spa dvpp size: %13lu KB\n", byte2kb(dvpp_size)); + SEQ_printf(seq, "Spa dvpp va size: %13lu MB\n", byte2mb(dvpp_va_size)); + SEQ_printf(seq, "\n"); } static int spg_info_show(int id, void *p, void *data) @@ -4006,60 +3837,52 @@ static int spg_info_show(int id, void *p, void *data) if (id >= SPG_ID_LOCAL_MIN && id <= SPG_ID_LOCAL_MAX) return 0; - if (seq != NULL) { - seq_printf(seq, "Group %6d ", id); - - down_read(&spg->rw_lock); - seq_printf(seq, "size: %lld KB, spa num: %d, total alloc: %lld KB, normal alloc: %lld KB, huge alloc: %lld KB\n", - byte2kb(atomic64_read(&spg->instat.size)), - atomic_read(&spg->instat.spa_num), - byte2kb(atomic64_read(&spg->instat.alloc_size)), - byte2kb(atomic64_read(&spg->instat.alloc_nsize)), - byte2kb(atomic64_read(&spg->instat.alloc_hsize))); - up_read(&spg->rw_lock); - } else { - pr_info("Group %6d ", id); + SEQ_printf(seq, "Group %6d ", id); - down_read(&spg->rw_lock); - pr_info("size: %lld KB, spa num: %d, total alloc: %lld KB, normal alloc: %lld KB, huge alloc: %lld KB\n", - byte2kb(atomic64_read(&spg->instat.size)), - atomic_read(&spg->instat.spa_num), - byte2kb(atomic64_read(&spg->instat.alloc_size)), - byte2kb(atomic64_read(&spg->instat.alloc_nsize)), - byte2kb(atomic64_read(&spg->instat.alloc_hsize))); - up_read(&spg->rw_lock); - } + down_read(&spg->rw_lock); + SEQ_printf(seq, "size: %lld KB, spa num: %d, total alloc: %ld KB, normal alloc: %lld KB, huge alloc: %lld KB\n", + byte2kb(meminfo_total_size(&spg->meminfo)), + atomic_read(&spg->spa_num), + meminfo_alloc_sum_byKB(&spg->meminfo), + byte2kb(atomic64_read(&spg->meminfo.alloc_nsize)), + byte2kb(atomic64_read(&spg->meminfo.alloc_hsize))); + up_read(&spg->rw_lock); return 0; } -void spg_overview_show(struct seq_file *seq) +static void spg_overview_show(struct seq_file *seq) { if (!sp_is_enabled()) return; - if (seq != NULL) { - seq_printf(seq, "Share pool total size: %lld KB, spa total num: %d.\n", - byte2kb(atomic64_read(&sp_overall_stat.spa_total_size)), - atomic_read(&sp_overall_stat.spa_total_num)); - } else { - pr_info("Share pool total size: %lld KB, spa total num: %d.\n", - byte2kb(atomic64_read(&sp_overall_stat.spa_total_size)), - atomic_read(&sp_overall_stat.spa_total_num)); - } + SEQ_printf(seq, "Share pool total size: %lld KB, spa total num: %d.\n", + byte2kb(atomic64_read(&sp_overall_stat.spa_total_size)), + atomic_read(&sp_overall_stat.spa_total_num)); down_read(&sp_group_sem); idr_for_each(&sp_group_idr, spg_info_show, seq); up_read(&sp_group_sem); - if (seq != NULL) - seq_puts(seq, "\n"); - else - pr_info("\n"); + SEQ_printf(seq, "\n"); +} + +static bool should_show_statistics(void) +{ + if (!capable(CAP_SYS_ADMIN)) + return false; + + if (task_active_pid_ns(current) != &init_pid_ns) + return false; + + return true; } static int spa_stat_show(struct seq_file *seq, void *offset) { + if (!should_show_statistics()) + return -EPERM; + spg_overview_show(seq); spa_overview_show(seq); /* print the file header */ @@ -4085,16 +3908,16 @@ static int proc_usage_by_group(int id, void *p, void *data) list_for_each_entry(spg_node, &spg->procs, proc_node) { master = spg_node->master; mm = master->mm; - tgid = master->instat.tgid; + tgid = master->tgid; get_mm_rss_info(mm, &anon, &file, &shmem, &total_rss); seq_printf(seq, "%-8d ", tgid); seq_printf(seq, "%-8d ", id); seq_printf(seq, "%-9ld %-9ld %-9ld %-8ld %-7ld %-7ld ", - get_spg_proc_alloc(spg_node), - get_spg_proc_k2u(spg_node), - get_sp_res_by_spg_proc(spg_node), + meminfo_alloc_sum_byKB(&spg_node->meminfo), + meminfo_k2u_size(&spg_node->meminfo), + meminfo_alloc_sum_byKB(&spg_node->spg->meminfo), page2kb(mm->total_vm), page2kb(total_rss), page2kb(shmem)); print_process_prot(seq, spg_node->prot); @@ -4108,6 +3931,9 @@ static int proc_usage_by_group(int id, void *p, void *data) static int proc_group_usage_show(struct seq_file *seq, void *offset) { + if (!should_show_statistics()) + return -EPERM; + spg_overview_show(seq); spa_overview_show(seq); @@ -4115,11 +3941,6 @@ static int proc_group_usage_show(struct seq_file *seq, void *offset) seq_printf(seq, "%-8s %-8s %-9s %-9s %-9s %-8s %-7s %-7s %-4s\n", "PID", "Group_ID", "SP_ALLOC", "SP_K2U", "SP_RES", "VIRT", "RES", "Shm", "PROT"); - /* print kthread buff_module_guard_work */ - seq_printf(seq, "%-8s %-8s %-9lld %-9lld\n", - "guard", "-", - byte2kb(atomic64_read(&kthread_stat.alloc_size)), - byte2kb(atomic64_read(&kthread_stat.k2u_size))); down_read(&sp_group_sem); idr_for_each(&sp_group_idr, proc_usage_by_group, seq); @@ -4133,7 +3954,10 @@ static int proc_usage_show(struct seq_file *seq, void *offset) struct sp_group_master *master = NULL; unsigned long anon, file, shmem, total_rss; long sp_res, sp_res_nsize, non_sp_res, non_sp_shm; - struct sp_proc_stat *proc_stat; + struct sp_meminfo *meminfo; + + if (!should_show_statistics()) + return -EPERM; seq_printf(seq, "%-8s %-16s %-9s %-9s %-9s %-10s %-10s %-8s\n", "PID", "COMM", "SP_ALLOC", "SP_K2U", "SP_RES", "Non-SP_RES", @@ -4142,15 +3966,15 @@ static int proc_usage_show(struct seq_file *seq, void *offset) down_read(&sp_group_sem); mutex_lock(&master_list_lock); list_for_each_entry(master, &master_list, list_node) { - proc_stat = &master->instat; + meminfo = &master->meminfo; get_mm_rss_info(master->mm, &anon, &file, &shmem, &total_rss); get_process_sp_res(master, &sp_res, &sp_res_nsize); get_process_non_sp_res(total_rss, shmem, sp_res_nsize, &non_sp_res, &non_sp_shm); seq_printf(seq, "%-8d %-16s %-9ld %-9ld %-9ld %-10ld %-10ld %-8ld\n", - proc_stat->tgid, proc_stat->comm, - get_proc_alloc(proc_stat), - get_proc_k2u(proc_stat), + master->tgid, master->comm, + meminfo_alloc_sum_byKB(meminfo), + meminfo_k2u_size(meminfo), sp_res, non_sp_res, non_sp_shm, page2kb(master->mm->total_vm)); } @@ -4206,6 +4030,7 @@ vm_fault_t sharepool_no_page(struct mm_struct *mm, int err; int node_id; struct sp_area *spa; + bool charge_hpage; spa = vma->vm_private_data; if (!spa) { @@ -4221,12 +4046,15 @@ vm_fault_t sharepool_no_page(struct mm_struct *mm, if (idx >= size) goto out; + charge_hpage = false; page = alloc_huge_page(vma, haddr, 0); if (IS_ERR(page)) { page = hugetlb_alloc_hugepage(node_id, HUGETLB_ALLOC_BUDDY | HUGETLB_ALLOC_NORECLAIM); if (!page) page = ERR_PTR(-ENOMEM); + else if (!PageMemcgKmem(page)) + charge_hpage = true; } if (IS_ERR(page)) { ptl = huge_pte_lock(h, mm, ptep); @@ -4239,6 +4067,13 @@ vm_fault_t sharepool_no_page(struct mm_struct *mm, ret = vmf_error(PTR_ERR(page)); goto out; } + + if (charge_hpage && mem_cgroup_charge(page, vma->vm_mm, GFP_KERNEL)) { + put_page(page); + ret = vmf_error(-ENOMEM); + goto out; + } + __SetPageUptodate(page); new_page = true; @@ -4271,9 +4106,8 @@ vm_fault_t sharepool_no_page(struct mm_struct *mm, spin_unlock(ptl); - if (new_page) { + if (new_page) SetPagePrivate(&page[1]); - } unlock_page(page); out: @@ -4286,7 +4120,7 @@ vm_fault_t sharepool_no_page(struct mm_struct *mm, goto out; } -/* +/** * The caller must ensure that this function is called * when the last thread in the thread group exits. */ @@ -4313,6 +4147,11 @@ int sp_group_exit(void) return 0; } + if (master->tgid != current->tgid) { + up_write(&sp_group_sem); + return 0; + } + list_for_each_entry_safe(spg_node, tmp, &master->node_list, group_node) { spg = spg_node->spg; @@ -4320,8 +4159,7 @@ int sp_group_exit(void) /* a dead group should NOT be reactive again */ if (spg_valid(spg) && list_is_singular(&spg->procs)) is_alive = spg->is_alive = false; - spg->proc_num--; - list_del(&spg_node->proc_node); + delete_spg_node(spg, spg_node); up_write(&spg->rw_lock); if (!is_alive) @@ -4342,7 +4180,7 @@ int sp_group_exit(void) void sp_group_post_exit(struct mm_struct *mm) { - struct sp_proc_stat *stat; + struct sp_meminfo *meminfo; long alloc_size, k2u_size; /* lockless visit */ struct sp_group_master *master = mm->sp_group_master; @@ -4367,16 +4205,13 @@ void sp_group_post_exit(struct mm_struct *mm) * A process not in an sp group doesn't need to print because there * wont't be any memory which is not freed. */ - stat = &master->instat; - if (stat) { - alloc_size = atomic64_read(&stat->alloc_nsize) + atomic64_read(&stat->alloc_hsize); - k2u_size = atomic64_read(&stat->k2u_size); - - if (alloc_size != 0 || k2u_size != 0) - pr_info("process %s(%d) exits. It applied %ld aligned KB, k2u shared %ld aligned KB\n", - stat->comm, stat->tgid, - byte2kb(alloc_size), byte2kb(k2u_size)); - } + meminfo = &master->meminfo; + alloc_size = meminfo_alloc_sum(meminfo); + k2u_size = atomic64_read(&meminfo->k2u_size); + if (alloc_size != 0 || k2u_size != 0) + pr_info("process %s(%d) exits. It applied %ld aligned KB, k2u shared %ld aligned KB\n", + master->comm, master->tgid, + byte2kb(alloc_size), byte2kb(k2u_size)); down_write(&sp_group_sem); list_for_each_entry_safe(spg_node, tmp, &master->node_list, group_node) { @@ -4389,9 +4224,7 @@ void sp_group_post_exit(struct mm_struct *mm) } up_write(&sp_group_sem); - mutex_lock(&master_list_lock); - list_del(&master->list_node); - mutex_unlock(&master_list_lock); + sp_del_group_master(master); kfree(master); } diff --git a/mm/share_pool_internal.h b/mm/share_pool_internal.h new file mode 100644 index 0000000000000000000000000000000000000000..222993618dca2286d3c20863f5cb74bd166b2d99 --- /dev/null +++ b/mm/share_pool_internal.h @@ -0,0 +1,22 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef SHARE_POOL_INTERNAL_H +#define SHARE_POOL_INTERNAL_H + +#include +#include + +#ifdef CONFIG_ASCEND_SHARE_POOL +static inline void sp_memcg_uncharge_hpage(struct page *page) +{ + if (!sp_is_enabled()) + return; + + mem_cgroup_uncharge(page); +} +#else +static inline void sp_memcg_uncharge_hpage(struct page *page) +{ +} +#endif + +#endif diff --git a/mm/vmalloc.c b/mm/vmalloc.c index d7a68eb0db429476724e9de15f7b52fd0d1f2735..e27cd716ca95c9c46b9734e4170da63d3cf7a89e 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -578,6 +578,7 @@ static int vmap_pages_range(unsigned long addr, unsigned long end, return err; } +#ifdef CONFIG_EXTEND_HUGEPAGE_MAPPING static int vmap_hugepages_range_noflush(unsigned long addr, unsigned long end, pgprot_t prot, struct page **pages, unsigned int page_shift) { @@ -609,6 +610,7 @@ static int vmap_hugepages_range(unsigned long addr, unsigned long end, return err; } +#endif /** * map_kernel_range_noflush - map kernel VM area with the specified pages @@ -2792,6 +2794,7 @@ void *vmap(struct page **pages, unsigned int count, } EXPORT_SYMBOL(vmap); +#ifdef CONFIG_EXTEND_HUGEPAGE_MAPPING /** * vmap_hugepage - map an array of huge pages into virtually contiguous space * @pages: array of huge page pointers (only the header) @@ -2830,6 +2833,7 @@ void *vmap_hugepage(struct page **pages, unsigned int count, return area->addr; } EXPORT_SYMBOL(vmap_hugepage); +#endif #ifdef CONFIG_VMAP_PFN struct vmap_pfn_data { @@ -3015,7 +3019,11 @@ void *__vmalloc_node_range(unsigned long size, unsigned long align, size_per_node = size; if (node == NUMA_NO_NODE) size_per_node /= num_online_nodes(); +#ifdef CONFIG_EXTEND_HUGEPAGE_MAPPING if (size_per_node >= PMD_SIZE || vm_flags & VM_HUGE_PAGES) { +#else + if (size_per_node >= PMD_SIZE) { +#endif shift = PMD_SHIFT; align = max(real_align, 1UL << shift); size = ALIGN(real_size, 1UL << shift); @@ -3050,8 +3058,12 @@ void *__vmalloc_node_range(unsigned long size, unsigned long align, return addr; fail: +#ifdef CONFIG_EXTEND_HUGEPAGE_MAPPING /* User could specify VM_HUGE_PAGES to alloc only hugepages. */ if (shift > PAGE_SHIFT && !(vm_flags & VM_HUGE_PAGES)) { +#else + if (shift > PAGE_SHIFT) { +#endif shift = PAGE_SHIFT; align = real_align; size = real_size; @@ -3261,6 +3273,7 @@ void *vmalloc_32_user(unsigned long size) } EXPORT_SYMBOL(vmalloc_32_user); +#ifdef CONFIG_EXTEND_HUGEPAGE_MAPPING /** * vmalloc_hugepage - allocate virtually contiguous hugetlb memory * @size: allocation size @@ -3298,6 +3311,7 @@ void *vmalloc_hugepage_user(unsigned long size) __builtin_return_address(0)); } EXPORT_SYMBOL(vmalloc_hugepage_user); +#endif /* * small helper routine , copy contents to buf from addr. @@ -3620,6 +3634,7 @@ int remap_vmalloc_range(struct vm_area_struct *vma, void *addr, } EXPORT_SYMBOL(remap_vmalloc_range); +#ifdef CONFIG_EXTEND_HUGEPAGE_MAPPING /** * remap_vmalloc_hugepage_range_partial - map vmalloc hugepages * to userspace @@ -3706,6 +3721,7 @@ int remap_vmalloc_hugepage_range(struct vm_area_struct *vma, void *addr, vma->vm_end - vma->vm_start); } EXPORT_SYMBOL(remap_vmalloc_hugepage_range); +#endif void free_vm_area(struct vm_struct *area) {