diff --git a/Documentation/filesystems/f2fs.rst b/Documentation/filesystems/f2fs.rst index 8c0fbdd8ce6fba671b8efc2b74678ef86cf676e1..cb18f7c1bea30ea356c5466a1f86a016e8c3b221 100644 --- a/Documentation/filesystems/f2fs.rst +++ b/Documentation/filesystems/f2fs.rst @@ -110,6 +110,12 @@ background_gc=%s Turn on/off cleaning operations, namely garbage on synchronous garbage collection running in background. Default value for this option is on. So garbage collection is on by default. +gc_merge When background_gc is on, this option can be enabled to + let background GC thread to handle foreground GC requests, + it can eliminate the sluggish issue caused by slow foreground + GC operation when GC is triggered from a process with limited + I/O and CPU resources. +nogc_merge Disable GC merge feature. disable_roll_forward Disable the roll-forward recovery routine norecovery Disable the roll-forward recovery routine, mounted read- only (i.e., -o ro,disable_roll_forward) diff --git a/fs/f2fs/Kconfig b/fs/f2fs/Kconfig index d13c5c6a978769b69eef060d50569c578616ba14..3dfc4f60de0c7f98920cb9c38011e15fe2235a53 100644 --- a/fs/f2fs/Kconfig +++ b/fs/f2fs/Kconfig @@ -137,3 +137,10 @@ config F2FS_FS_LZORLE default y help Support LZO-RLE compress algorithm, if unsure, say Y. + +config F2FS_GRADING_SSR + bool "F2FS grading ssr" + depends on F2FS_FS + default y + help + use grading ssr to improve the end performance diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 1b11a42847c48e92b8a3af471c5793c724ffcc66..f54de04e6b8782cba602252e5d3c45946a6fd796 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1390,7 +1390,7 @@ struct page *f2fs_get_new_data_page(struct inode *inode, return page; } -static int __allocate_data_block(struct dnode_of_data *dn, int seg_type) +static int __allocate_data_block(struct dnode_of_data *dn, int seg_type, int contig_level) { struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode); struct f2fs_summary sum; @@ -1417,7 +1417,7 @@ static int __allocate_data_block(struct dnode_of_data *dn, int seg_type) set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version); old_blkaddr = dn->data_blkaddr; f2fs_allocate_data_block(sbi, NULL, old_blkaddr, &dn->data_blkaddr, - &sum, seg_type, NULL); + &sum, seg_type, NULL, contig_level); if (GET_SEGNO(sbi, old_blkaddr) != NULL_SEGNO) invalidate_mapping_pages(META_MAPPING(sbi), old_blkaddr, old_blkaddr); @@ -1511,6 +1511,10 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, struct extent_info ei = {0,0,0}; block_t blkaddr; unsigned int start_pgofs; + int contig_level = SEQ_NONE; +#ifdef CONFIG_F2FS_GRADING_SSR + contig_level = check_io_seq(maxblocks); +#endif if (!maxblocks) return 0; @@ -1594,7 +1598,7 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, /* use out-place-update for driect IO under LFS mode */ if (f2fs_lfs_mode(sbi) && flag == F2FS_GET_BLOCK_DIO && map->m_may_create) { - err = __allocate_data_block(&dn, map->m_seg_type); + err = __allocate_data_block(&dn, map->m_seg_type, contig_level); if (err) goto sync_out; blkaddr = dn.data_blkaddr; @@ -1615,7 +1619,7 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, WARN_ON(flag != F2FS_GET_BLOCK_PRE_DIO && flag != F2FS_GET_BLOCK_DIO); err = __allocate_data_block(&dn, - map->m_seg_type); + map->m_seg_type, contig_level); if (!err) set_inode_flag(inode, FI_APPEND_WRITE); } diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 2d7799bd30b107fe39760e79880a9daeaceaa4aa..41222b59e59648fc1aff431bef1589cf88edcb96 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -99,6 +99,7 @@ extern const char *f2fs_fault_name[FAULT_MAX]; #define F2FS_MOUNT_DISABLE_CHECKPOINT 0x02000000 #define F2FS_MOUNT_NORECOVERY 0x04000000 #define F2FS_MOUNT_ATGC 0x08000000 +#define F2FS_MOUNT_GC_MERGE 0x20000000 #define F2FS_OPTION(sbi) ((sbi)->mount_opt) #define clear_opt(sbi, option) (F2FS_OPTION(sbi).opt &= ~F2FS_MOUNT_##option) @@ -275,11 +276,17 @@ struct discard_entry { /* default discard granularity of inner discard thread, unit: block count */ #define DEFAULT_DISCARD_GRANULARITY 16 +#define DISCARD_GRAN_BL 16 +#define DISCARD_GRAN_BG 512 +#define DISCARD_GRAN_FORCE 1 /* max discard pend list number */ #define MAX_PLIST_NUM 512 #define plist_idx(blk_num) ((blk_num) >= MAX_PLIST_NUM ? \ (MAX_PLIST_NUM - 1) : ((blk_num) - 1)) +#define FS_FREE_SPACE_PERCENT 20 +#define DEVICE_FREE_SPACE_PERCENT 10 +#define HUNDRED_PERCENT 100 enum { D_PREP, /* initial */ @@ -318,24 +325,37 @@ struct discard_cmd { enum { DPOLICY_BG, + DPOLICY_BALANCE, DPOLICY_FORCE, DPOLICY_FSTRIM, DPOLICY_UMOUNT, MAX_DPOLICY, }; +enum { + SUB_POLICY_BIG, + SUB_POLICY_MID, + SUB_POLICY_SMALL, + NR_SUB_POLICY, +}; + +struct discard_sub_policy { + unsigned int max_requests; + int interval; +}; + struct discard_policy { int type; /* type of discard */ unsigned int min_interval; /* used for candidates exist */ unsigned int mid_interval; /* used for device busy */ unsigned int max_interval; /* used for candidates not exist */ - unsigned int max_requests; /* # of discards issued per round */ unsigned int io_aware_gran; /* minimum granularity discard not be aware of I/O */ bool io_aware; /* issue discard in idle time */ bool sync; /* submit discard with REQ_SYNC flag */ bool ordered; /* issue discard by lba order */ bool timeout; /* discard timeout for put_super */ unsigned int granularity; /* discard granularity */ + struct discard_sub_policy sub_policy[NR_SUB_POLICY]; }; struct discard_cmd_control { @@ -357,6 +377,7 @@ struct discard_cmd_control { atomic_t discard_cmd_cnt; /* # of cached cmd count */ struct rb_root_cached root; /* root of discard rb-tree */ bool rbtree_check; /* config for consistence check */ + int discard_type; /* discard type */ }; /* for the list of fsync inodes, used only during recovery */ @@ -1342,6 +1363,20 @@ struct decompress_io_ctx { #define MAX_COMPRESS_LOG_SIZE 8 #define MAX_COMPRESS_WINDOW_SIZE(log_size) ((PAGE_SIZE) << (log_size)) +#ifdef CONFIG_F2FS_GRADING_SSR +struct f2fs_hot_cold_params { + unsigned int enable; + unsigned int hot_data_lower_limit; + unsigned int hot_data_waterline; + unsigned int warm_data_lower_limit; + unsigned int warm_data_waterline; + unsigned int hot_node_lower_limit; + unsigned int hot_node_waterline; + unsigned int warm_node_lower_limit; + unsigned int warm_node_waterline; +}; +#endif + struct f2fs_sb_info { struct super_block *sb; /* pointer to VFS super block */ struct proc_dir_entry *s_proc; /* proc entry */ @@ -1548,6 +1583,10 @@ struct f2fs_sb_info { struct kmem_cache *page_array_slab; /* page array entry */ unsigned int page_array_slab_size; /* default page array slab size */ #endif + +#ifdef CONFIG_F2FS_GRADING_SSR + struct f2fs_hot_cold_params hot_cold_params; +#endif }; struct f2fs_private_dio { @@ -3049,6 +3088,18 @@ static inline void f2fs_update_iostat(struct f2fs_sb_info *sbi, f2fs_record_iostat(sbi); } +static inline block_t fs_free_space_threshold(struct f2fs_sb_info *sbi) +{ + return (block_t)(SM_I(sbi)->main_segments * sbi->blocks_per_seg * + FS_FREE_SPACE_PERCENT) / HUNDRED_PERCENT; +} + +static inline block_t device_free_space_threshold(struct f2fs_sb_info *sbi) +{ + return (block_t)(SM_I(sbi)->main_segments * sbi->blocks_per_seg * + DEVICE_FREE_SPACE_PERCENT) / HUNDRED_PERCENT; +} + #define __is_large_section(sbi) ((sbi)->segs_per_sec > 1) #define __is_meta_io(fio) (PAGE_TYPE_OF_BIO((fio)->type) == META) @@ -3258,6 +3309,10 @@ void f2fs_destroy_node_manager_caches(void); /* * segment.c */ +unsigned long find_rev_next_bit(const unsigned long *addr, + unsigned long size, unsigned long offset); +unsigned long find_rev_next_zero_bit(const unsigned long *addr, + unsigned long size, unsigned long offset); bool f2fs_need_SSR(struct f2fs_sb_info *sbi); void f2fs_register_inmem_page(struct inode *inode, struct page *page); void f2fs_drop_inmem_pages_all(struct f2fs_sb_info *sbi, bool gc_failure); @@ -3315,7 +3370,7 @@ void f2fs_replace_block(struct f2fs_sb_info *sbi, struct dnode_of_data *dn, void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, block_t old_blkaddr, block_t *new_blkaddr, struct f2fs_summary *sum, int type, - struct f2fs_io_info *fio); + struct f2fs_io_info *fio, int contig_level); void f2fs_wait_on_page_writeback(struct page *page, enum page_type type, bool ordered, bool locked); void f2fs_wait_on_block_writeback(struct inode *inode, block_t blkaddr); diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 72f227f6ebad094c1712efae3eeedb0e2d47d7ba..a981e466cc7db79319ba009b43556409ca8fb619 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -31,19 +31,24 @@ static int gc_thread_func(void *data) struct f2fs_sb_info *sbi = data; struct f2fs_gc_kthread *gc_th = sbi->gc_thread; wait_queue_head_t *wq = &sbi->gc_thread->gc_wait_queue_head; + wait_queue_head_t *fggc_wq = &sbi->gc_thread->fggc_wq; unsigned int wait_ms; wait_ms = gc_th->min_sleep_time; set_freezable(); do { - bool sync_mode; + bool sync_mode, foreground = false; wait_event_interruptible_timeout(*wq, kthread_should_stop() || freezing(current) || + waitqueue_active(fggc_wq) || gc_th->gc_wake, msecs_to_jiffies(wait_ms)); + if (test_opt(sbi, GC_MERGE) && waitqueue_active(fggc_wq)) + foreground = true; + /* give it a try one time */ if (gc_th->gc_wake) gc_th->gc_wake = 0; @@ -90,7 +95,10 @@ static int gc_thread_func(void *data) goto do_gc; } - if (!down_write_trylock(&sbi->gc_lock)) { + if (foreground) { + down_write(&sbi->gc_lock); + goto do_gc; + } else if (!down_write_trylock(&sbi->gc_lock)) { stat_other_skip_bggc_count(sbi); goto next; } @@ -107,14 +115,22 @@ static int gc_thread_func(void *data) else increase_sleep_time(gc_th, &wait_ms); do_gc: - stat_inc_bggc_count(sbi->stat_info); + if (!foreground) + stat_inc_bggc_count(sbi->stat_info); sync_mode = F2FS_OPTION(sbi).bggc_mode == BGGC_MODE_SYNC; + /* foreground GC was been triggered via f2fs_balance_fs() */ + if (foreground) + sync_mode = false; + /* if return value is not zero, no victim was selected */ - if (f2fs_gc(sbi, sync_mode, true, false, NULL_SEGNO)) + if (f2fs_gc(sbi, sync_mode, !foreground, false, NULL_SEGNO)) wait_ms = gc_th->no_gc_sleep_time; + if (foreground) + wake_up_all(&gc_th->fggc_wq); + trace_f2fs_background_gc(sbi->sb, wait_ms, prefree_segments(sbi), free_segments(sbi)); @@ -148,6 +164,7 @@ int f2fs_start_gc_thread(struct f2fs_sb_info *sbi) sbi->gc_thread = gc_th; init_waitqueue_head(&sbi->gc_thread->gc_wait_queue_head); + init_waitqueue_head(&sbi->gc_thread->fggc_wq); sbi->gc_thread->f2fs_gc_task = kthread_run(gc_thread_func, sbi, "f2fs_gc-%u:%u", MAJOR(dev), MINOR(dev)); if (IS_ERR(gc_th->f2fs_gc_task)) { @@ -165,6 +182,7 @@ void f2fs_stop_gc_thread(struct f2fs_sb_info *sbi) if (!gc_th) return; kthread_stop(gc_th->f2fs_gc_task); + wake_up_all(&gc_th->fggc_wq); kfree(gc_th); sbi->gc_thread = NULL; } @@ -1220,7 +1238,7 @@ static int move_data_block(struct inode *inode, block_t bidx, } f2fs_allocate_data_block(fio.sbi, NULL, fio.old_blkaddr, &newaddr, - &sum, type, NULL); + &sum, type, NULL, SEQ_NONE); fio.encrypted_page = f2fs_pagecache_get_page(META_MAPPING(fio.sbi), newaddr, FGP_LOCK | FGP_CREAT, GFP_NOFS); diff --git a/fs/f2fs/gc.h b/fs/f2fs/gc.h index 0c8dae12dc512899f8850e6a8fec8a4abb207b93..3fe145e8e594f30b2e73b95785ca72e80342e58c 100644 --- a/fs/f2fs/gc.h +++ b/fs/f2fs/gc.h @@ -42,6 +42,12 @@ struct f2fs_gc_kthread { /* for changing gc mode */ unsigned int gc_wake; + + /* for GC_MERGE mount option */ + wait_queue_head_t fggc_wq; /* + * caller of f2fs_balance_fs() + * will wait on this wait queue. + */ }; struct gc_inode_list { diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index d04b449978aa8e4c8146527508bb2d6fcd60096b..24d22c2954b50727ce683749e83d76ebbd0c5b48 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -30,6 +30,24 @@ static struct kmem_cache *discard_cmd_slab; static struct kmem_cache *sit_entry_set_slab; static struct kmem_cache *inmem_entry_slab; +static struct discard_policy dpolicys[MAX_DPOLICY] = { + {DPOLICY_BG, 0, DEF_MID_DISCARD_ISSUE_TIME, DEF_MAX_DISCARD_ISSUE_TIME, + MAX_PLIST_NUM, false, true, false, false, DISCARD_GRAN_BG, + {{1, 0}, {0, 0}, {0, 0}}}, + {DPOLICY_BALANCE, 0, DEF_MID_DISCARD_ISSUE_TIME, DEF_MAX_DISCARD_ISSUE_TIME, + MAX_PLIST_NUM - 1, true, true, false, false, DISCARD_GRAN_BL, + {{1, 0}, {2, 50}, {0, 0}}}, + {DPOLICY_FORCE, 0, DEF_MID_DISCARD_ISSUE_TIME, DEF_MAX_DISCARD_ISSUE_TIME, + MAX_PLIST_NUM - 1, true, true, false, false, DISCARD_GRAN_FORCE, + {{1, 0}, {2, 50}, {4, 2000}}}, + {DPOLICY_FSTRIM, 0, DEF_MID_DISCARD_ISSUE_TIME, DEF_MAX_DISCARD_ISSUE_TIME, + MAX_PLIST_NUM, false, true, false, false, DISCARD_GRAN_FORCE, + {{8, 0}, {8, 0}, {8, 0}}}, + {DPOLICY_UMOUNT, 0, DEF_MID_DISCARD_ISSUE_TIME, DEF_MAX_DISCARD_ISSUE_TIME, + MAX_PLIST_NUM, false, true, false, false, DISCARD_GRAN_BG, + {{UINT_MAX, 0}, {0, 0}, {0, 0}}} +}; + static unsigned long __reverse_ulong(unsigned char *str) { unsigned long tmp = 0; @@ -93,7 +111,7 @@ static inline unsigned long __reverse_ffs(unsigned long word) * f2fs_set_bit(0, bitmap) => 1000 0000 * f2fs_set_bit(7, bitmap) => 0000 0001 */ -static unsigned long __find_rev_next_bit(const unsigned long *addr, +unsigned long find_rev_next_bit(const unsigned long *addr, unsigned long size, unsigned long offset) { const unsigned long *p = addr + BIT_WORD(offset); @@ -129,7 +147,7 @@ static unsigned long __find_rev_next_bit(const unsigned long *addr, return result - size + __reverse_ffs(tmp); } -static unsigned long __find_rev_next_zero_bit(const unsigned long *addr, +unsigned long find_rev_next_zero_bit(const unsigned long *addr, unsigned long size, unsigned long offset) { const unsigned long *p = addr + BIT_WORD(offset); @@ -183,6 +201,75 @@ bool f2fs_need_SSR(struct f2fs_sb_info *sbi) SM_I(sbi)->min_ssr_sections + reserved_sections(sbi)); } +#ifdef CONFIG_F2FS_GRADING_SSR +static bool need_ssr_by_type(struct f2fs_sb_info *sbi, int type, int contig_level) +{ + int node_secs = get_blocktype_secs(sbi, F2FS_DIRTY_NODES); + int dent_secs = get_blocktype_secs(sbi, F2FS_DIRTY_DENTS); + int imeta_secs = get_blocktype_secs(sbi, F2FS_DIRTY_IMETA); + u64 valid_blocks = sbi->total_valid_block_count; + u64 total_blocks = MAIN_SEGS(sbi) << sbi->log_blocks_per_seg; + u64 left_space = (total_blocks - valid_blocks) << 2; + unsigned int free_segs = free_segments(sbi); + unsigned int ovp_segments = overprovision_segments(sbi); + unsigned int lower_limit = 0; + unsigned int waterline = 0; + int dirty_sum = node_secs + 2 * dent_secs + imeta_secs; + + if (sbi->hot_cold_params.enable == GRADING_SSR_OFF) + return f2fs_need_SSR(sbi); + if (f2fs_lfs_mode(sbi)) + return false; + if (sbi->gc_mode == GC_URGENT_HIGH) + return true; + if (contig_level == SEQ_256BLKS && type == CURSEG_WARM_DATA && + free_sections(sbi) > dirty_sum + 3 * reserved_sections(sbi) / 2) + return false; + if (free_sections(sbi) <= (unsigned int)(dirty_sum + 2 * reserved_sections(sbi))) + return true; + if (contig_level >= SEQ_32BLKS || total_blocks <= SSR_MIN_BLKS_LIMIT) + return false; + + left_space -= ovp_segments * KBS_PER_SEGMENT; + if (unlikely(left_space == 0)) + return false; + + switch (type) { + case CURSEG_HOT_DATA: + lower_limit = sbi->hot_cold_params.hot_data_lower_limit; + waterline = sbi->hot_cold_params.hot_data_waterline; + break; + case CURSEG_WARM_DATA: + lower_limit = sbi->hot_cold_params.warm_data_lower_limit; + waterline = sbi->hot_cold_params.warm_data_waterline; + break; + case CURSEG_HOT_NODE: + lower_limit = sbi->hot_cold_params.hot_node_lower_limit; + waterline = sbi->hot_cold_params.hot_node_waterline; + break; + case CURSEG_WARM_NODE: + lower_limit = sbi->hot_cold_params.warm_node_lower_limit; + waterline = sbi->hot_cold_params.warm_node_waterline; + break; + default: + return false; + } + + if (left_space > lower_limit) + return false; + + if (div_u64((free_segs - ovp_segments) * 100, (left_space / KBS_PER_SEGMENT)) + <= waterline) { + trace_f2fs_grading_ssr_allocate( + (le64_to_cpu(sbi->raw_super->block_count) - sbi->total_valid_block_count), + free_segments(sbi), contig_level); + return true; + } else { + return false; + } +} +#endif + void f2fs_register_inmem_page(struct inode *inode, struct page *page) { struct inmem_pages *new; @@ -510,8 +597,19 @@ void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need) * dir/node pages without enough free segments. */ if (has_not_enough_free_secs(sbi, 0, 0)) { - down_write(&sbi->gc_lock); - f2fs_gc(sbi, false, false, false, NULL_SEGNO); + if (test_opt(sbi, GC_MERGE) && sbi->gc_thread && + sbi->gc_thread->f2fs_gc_task) { + DEFINE_WAIT(wait); + + prepare_to_wait(&sbi->gc_thread->fggc_wq, &wait, + TASK_UNINTERRUPTIBLE); + wake_up(&sbi->gc_thread->gc_wait_queue_head); + io_schedule(); + finish_wait(&sbi->gc_thread->fggc_wq, &wait); + } else { + down_write(&sbi->gc_lock); + f2fs_gc(sbi, false, false, false, NULL_SEGNO); + } } } @@ -1098,7 +1196,7 @@ static void __check_sit_bitmap(struct f2fs_sb_info *sbi, else size = max_blocks; map = (unsigned long *)(sentry->cur_valid_map); - offset = __find_rev_next_bit(map, size, offset); + offset = find_rev_next_bit(map, size, offset); f2fs_bug_on(sbi, offset != size); blk = START_BLOCK(sbi, segno + 1); } @@ -1106,43 +1204,41 @@ static void __check_sit_bitmap(struct f2fs_sb_info *sbi, } static void __init_discard_policy(struct f2fs_sb_info *sbi, - struct discard_policy *dpolicy, + struct discard_policy *policy, int discard_type, unsigned int granularity) { - /* common policy */ - dpolicy->type = discard_type; - dpolicy->sync = true; - dpolicy->ordered = false; - dpolicy->granularity = granularity; - - dpolicy->max_requests = DEF_MAX_DISCARD_REQUEST; - dpolicy->io_aware_gran = MAX_PLIST_NUM; - dpolicy->timeout = false; + struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; if (discard_type == DPOLICY_BG) { - dpolicy->min_interval = DEF_MIN_DISCARD_ISSUE_TIME; - dpolicy->mid_interval = DEF_MID_DISCARD_ISSUE_TIME; - dpolicy->max_interval = DEF_MAX_DISCARD_ISSUE_TIME; - dpolicy->io_aware = true; - dpolicy->sync = false; - dpolicy->ordered = true; - if (utilization(sbi) > DEF_DISCARD_URGENT_UTIL) { - dpolicy->granularity = 1; - dpolicy->max_interval = DEF_MIN_DISCARD_ISSUE_TIME; - } + *policy = dpolicys[DPOLICY_BG]; + } else if (discard_type == DPOLICY_BALANCE) { + *policy = dpolicys[DPOLICY_BALANCE]; } else if (discard_type == DPOLICY_FORCE) { - dpolicy->min_interval = DEF_MIN_DISCARD_ISSUE_TIME; - dpolicy->mid_interval = DEF_MID_DISCARD_ISSUE_TIME; - dpolicy->max_interval = DEF_MAX_DISCARD_ISSUE_TIME; - dpolicy->io_aware = false; + *policy = dpolicys[DPOLICY_FORCE]; } else if (discard_type == DPOLICY_FSTRIM) { - dpolicy->io_aware = false; + *policy = dpolicys[DPOLICY_FSTRIM]; + if (policy->granularity != granularity) + policy->granularity = granularity; } else if (discard_type == DPOLICY_UMOUNT) { - dpolicy->io_aware = false; - /* we need to issue all to keep CP_TRIMMED_FLAG */ - dpolicy->granularity = 1; - dpolicy->timeout = true; + *policy = dpolicys[DPOLICY_UMOUNT]; + } + dcc->discard_type = discard_type; +} + +static void select_sub_discard_policy(struct discard_sub_policy **spolicy, + int index, struct discard_policy *dpolicy) +{ + if (dpolicy->type == DPOLICY_FSTRIM) { + *spolicy = &dpolicy->sub_policy[SUB_POLICY_BIG]; + return; } + + if ((index + 1) >= DISCARD_GRAN_BG) + *spolicy = &dpolicy->sub_policy[SUB_POLICY_BIG]; + else if ((index + 1) >= DISCARD_GRAN_BL) + *spolicy = &dpolicy->sub_policy[SUB_POLICY_MID]; + else + *spolicy = &dpolicy->sub_policy[SUB_POLICY_SMALL]; } static void __update_discard_tree_range(struct f2fs_sb_info *sbi, @@ -1151,6 +1247,7 @@ static void __update_discard_tree_range(struct f2fs_sb_info *sbi, /* this function is copied from blkdev_issue_discard from block/blk-lib.c */ static int __submit_discard_cmd(struct f2fs_sb_info *sbi, struct discard_policy *dpolicy, + int spolicy_index, struct discard_cmd *dc, unsigned int *issued) { @@ -1162,9 +1259,12 @@ static int __submit_discard_cmd(struct f2fs_sb_info *sbi, struct list_head *wait_list = (dpolicy->type == DPOLICY_FSTRIM) ? &(dcc->fstrim_list) : &(dcc->wait_list); int flag = dpolicy->sync ? REQ_SYNC : 0; + struct discard_sub_policy *spolicy = NULL; block_t lstart, start, len, total_len; int err = 0; + select_sub_discard_policy(&spolicy, spolicy_index, dpolicy); + if (dc->state != D_PREP) return 0; @@ -1180,7 +1280,7 @@ static int __submit_discard_cmd(struct f2fs_sb_info *sbi, dc->len = 0; - while (total_len && *issued < dpolicy->max_requests && !err) { + while (total_len && *issued < spolicy->max_requests && !err) { struct bio *bio = NULL; unsigned long flags; bool last = true; @@ -1191,7 +1291,7 @@ static int __submit_discard_cmd(struct f2fs_sb_info *sbi, } (*issued)++; - if (*issued == dpolicy->max_requests) + if (*issued == spolicy->max_requests) last = true; dc->len += len; @@ -1438,7 +1538,8 @@ static int __queue_discard_cmd(struct f2fs_sb_info *sbi, } static unsigned int __issue_discard_cmd_orderly(struct f2fs_sb_info *sbi, - struct discard_policy *dpolicy) + struct discard_policy *dpolicy, + int spolicy_index) { struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; struct discard_cmd *prev_dc = NULL, *next_dc = NULL; @@ -1448,8 +1549,11 @@ static unsigned int __issue_discard_cmd_orderly(struct f2fs_sb_info *sbi, unsigned int pos = dcc->next_pos; unsigned int issued = 0; bool io_interrupted = false; + struct discard_sub_policy *spolicy = NULL; + select_sub_discard_policy(&spolicy, spolicy_index, dpolicy); mutex_lock(&dcc->cmd_lock); + dc = (struct discard_cmd *)f2fs_lookup_rb_tree_ret(&dcc->root, NULL, pos, (struct rb_entry **)&prev_dc, @@ -1473,9 +1577,9 @@ static unsigned int __issue_discard_cmd_orderly(struct f2fs_sb_info *sbi, } dcc->next_pos = dc->lstart + dc->len; - err = __submit_discard_cmd(sbi, dpolicy, dc, &issued); + err = __submit_discard_cmd(sbi, dpolicy, spolicy_index, dc, &issued); - if (issued >= dpolicy->max_requests) + if (issued >= spolicy->max_requests) break; next: node = rb_next(&dc->rb_node); @@ -1508,11 +1612,19 @@ static int __issue_discard_cmd(struct f2fs_sb_info *sbi, struct blk_plug plug; int i, issued; bool io_interrupted = false; + struct discard_sub_policy *spolicy = NULL; if (dpolicy->timeout) f2fs_update_time(sbi, UMOUNT_DISCARD_TIMEOUT); + /* only do this check in CHECK_FS, may be time consumed */ + if (unlikely(dcc->rbtree_check)) { + mutex_lock(&dcc->cmd_lock); + f2fs_bug_on(sbi, !f2fs_check_rb_tree_consistence(sbi, &dcc->root, false)); + mutex_unlock(&dcc->cmd_lock); + } retry: + blk_start_plug(&plug); issued = 0; for (i = MAX_PLIST_NUM - 1; i >= 0; i--) { if (dpolicy->timeout && @@ -1522,8 +1634,13 @@ static int __issue_discard_cmd(struct f2fs_sb_info *sbi, if (i + 1 < dpolicy->granularity) break; - if (i < DEFAULT_DISCARD_GRANULARITY && dpolicy->ordered) - return __issue_discard_cmd_orderly(sbi, dpolicy); + select_sub_discard_policy(&spolicy, i, dpolicy); + + if (i < DEFAULT_DISCARD_GRANULARITY && dpolicy->ordered) { + issued = __issue_discard_cmd_orderly(sbi, dpolicy, i); + blk_finish_plug(&plug); + return issued; + } pend_list = &dcc->pend_list[i]; @@ -1533,7 +1650,6 @@ static int __issue_discard_cmd(struct f2fs_sb_info *sbi, if (unlikely(dcc->rbtree_check)) f2fs_bug_on(sbi, !f2fs_check_rb_tree_consistence(sbi, &dcc->root, false)); - blk_start_plug(&plug); list_for_each_entry_safe(dc, tmp, pend_list, list) { f2fs_bug_on(sbi, dc->state != D_PREP); @@ -1544,22 +1660,24 @@ static int __issue_discard_cmd(struct f2fs_sb_info *sbi, if (dpolicy->io_aware && i < dpolicy->io_aware_gran && !is_idle(sbi, DISCARD_TIME)) { io_interrupted = true; - break; + goto skip; } - - __submit_discard_cmd(sbi, dpolicy, dc, &issued); - - if (issued >= dpolicy->max_requests) + __submit_discard_cmd(sbi, dpolicy, i, dc, &issued); +skip: + if (issued >= spolicy->max_requests) break; } - blk_finish_plug(&plug); next: mutex_unlock(&dcc->cmd_lock); - if (issued >= dpolicy->max_requests || io_interrupted) + if (issued >= spolicy->max_requests || io_interrupted) break; } + blk_finish_plug(&plug); + if (spolicy) + dpolicy->min_interval = spolicy->interval; + if (dpolicy->type == DPOLICY_UMOUNT && issued) { __wait_all_discard_cmd(sbi, dpolicy); goto retry; @@ -1720,8 +1838,7 @@ bool f2fs_issue_discard_timeout(struct f2fs_sb_info *sbi) struct discard_policy dpolicy; bool dropped; - __init_discard_policy(sbi, &dpolicy, DPOLICY_UMOUNT, - dcc->discard_granularity); + __init_discard_policy(sbi, &dpolicy, DPOLICY_UMOUNT, 0); __issue_discard_cmd(sbi, &dpolicy); dropped = __drop_discard_cmd(sbi); @@ -1732,6 +1849,29 @@ bool f2fs_issue_discard_timeout(struct f2fs_sb_info *sbi) return dropped; } +static int select_discard_type(struct f2fs_sb_info *sbi) +{ + struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; + block_t user_block_count = sbi->user_block_count; + block_t ovp_count = SM_I(sbi)->ovp_segments << sbi->log_blocks_per_seg; + block_t fs_available_blocks = user_block_count - + valid_user_blocks(sbi) + ovp_count; + int discard_type; + + if (fs_available_blocks >= fs_free_space_threshold(sbi) && + fs_available_blocks - dcc->undiscard_blks >= + device_free_space_threshold(sbi)) { + discard_type = DPOLICY_BG; + } else if (fs_available_blocks < fs_free_space_threshold(sbi) && + fs_available_blocks - dcc->undiscard_blks < + device_free_space_threshold(sbi)) { + discard_type = DPOLICY_FORCE; + } else { + discard_type = DPOLICY_BALANCE; + } + return discard_type; +} + static int issue_discard_thread(void *data) { struct f2fs_sb_info *sbi = data; @@ -1739,13 +1879,13 @@ static int issue_discard_thread(void *data) wait_queue_head_t *q = &dcc->discard_wait_queue; struct discard_policy dpolicy; unsigned int wait_ms = DEF_MIN_DISCARD_ISSUE_TIME; - int issued; + int issued, discard_type; set_freezable(); do { - __init_discard_policy(sbi, &dpolicy, DPOLICY_BG, - dcc->discard_granularity); + discard_type = select_discard_type(sbi); + __init_discard_policy(sbi, &dpolicy, discard_type, 0); wait_event_interruptible_timeout(*q, kthread_should_stop() || freezing(current) || @@ -1771,7 +1911,7 @@ static int issue_discard_thread(void *data) } if (sbi->gc_mode == GC_URGENT_HIGH) - __init_discard_policy(sbi, &dpolicy, DPOLICY_FORCE, 1); + __init_discard_policy(sbi, &dpolicy, DPOLICY_FORCE, 0); sb_start_intwrite(sbi->sb); @@ -1916,11 +2056,11 @@ static bool add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc, while (force || SM_I(sbi)->dcc_info->nr_discards <= SM_I(sbi)->dcc_info->max_discards) { - start = __find_rev_next_bit(dmap, max_blocks, end + 1); + start = find_rev_next_bit(dmap, max_blocks, end + 1); if (start >= max_blocks) break; - end = __find_rev_next_zero_bit(dmap, max_blocks, start + 1); + end = find_rev_next_zero_bit(dmap, max_blocks, start + 1); if (force && start && end != max_blocks && (end - start) < cpc->trim_minlen) continue; @@ -2088,7 +2228,7 @@ static int create_discard_cmd_control(struct f2fs_sb_info *sbi) if (!dcc) return -ENOMEM; - dcc->discard_granularity = DEFAULT_DISCARD_GRANULARITY; + dcc->discard_granularity = DISCARD_GRAN_BG; INIT_LIST_HEAD(&dcc->entry_list); for (i = 0; i < MAX_PLIST_NUM; i++) INIT_LIST_HEAD(&dcc->pend_list[i]); @@ -2631,7 +2771,7 @@ static void __next_free_blkoff(struct f2fs_sb_info *sbi, for (i = 0; i < entries; i++) target_map[i] = ckpt_map[i] | cur_map[i]; - pos = __find_rev_next_zero_bit(target_map, sbi->blocks_per_seg, start); + pos = find_rev_next_zero_bit(target_map, sbi->blocks_per_seg, start); seg->next_blkoff = pos; } @@ -2662,7 +2802,7 @@ bool f2fs_segment_has_free_slot(struct f2fs_sb_info *sbi, int segno) for (i = 0; i < entries; i++) target_map[i] = ckpt_map[i] | cur_map[i]; - pos = __find_rev_next_zero_bit(target_map, sbi->blocks_per_seg, 0); + pos = find_rev_next_zero_bit(target_map, sbi->blocks_per_seg, 0); return pos < sbi->blocks_per_seg; } @@ -2869,7 +3009,7 @@ static int get_ssr_segment(struct f2fs_sb_info *sbi, int type, * This function should be returned with success, otherwise BUG */ static void allocate_segment_by_default(struct f2fs_sb_info *sbi, - int type, bool force) + int type, bool force, int contig_level) { struct curseg_info *curseg = CURSEG_I(sbi, type); @@ -2882,8 +3022,12 @@ static void allocate_segment_by_default(struct f2fs_sb_info *sbi, is_next_segment_free(sbi, curseg, type) && likely(!is_sbi_flag_set(sbi, SBI_CP_DISABLED))) new_curseg(sbi, type, false); +#ifdef CONFIG_F2FS_GRADING_SSR + else if (need_ssr_by_type(sbi, type, contig_level) && get_ssr_segment(sbi, type, SSR, 0)) +#else else if (f2fs_need_SSR(sbi) && get_ssr_segment(sbi, type, SSR, 0)) +#endif change_curseg(sbi, type, true); else new_curseg(sbi, type, false); @@ -2941,7 +3085,7 @@ static void __allocate_new_segment(struct f2fs_sb_info *sbi, int type, return; alloc: old_segno = curseg->segno; - SIT_I(sbi)->s_ops->allocate_segment(sbi, type, true); + SIT_I(sbi)->s_ops->allocate_segment(sbi, type, true, SEQ_NONE); locate_dirty_segment(sbi, old_segno); } @@ -3003,8 +3147,17 @@ static unsigned int __issue_discard_cmd_range(struct f2fs_sb_info *sbi, struct rb_node **insert_p = NULL, *insert_parent = NULL; struct discard_cmd *dc; struct blk_plug plug; + struct discard_sub_policy *spolicy = NULL; int issued; unsigned int trimmed = 0; + /* fstrim each time 8 discard without no interrupt */ + select_sub_discard_policy(&spolicy, 0, dpolicy); + + if (dcc->rbtree_check) { + mutex_lock(&dcc->cmd_lock); + f2fs_bug_on(sbi, !f2fs_check_rb_tree_consistence(sbi, &dcc->root, false)); + mutex_unlock(&dcc->cmd_lock); + } next: issued = 0; @@ -3036,9 +3189,9 @@ static unsigned int __issue_discard_cmd_range(struct f2fs_sb_info *sbi, goto skip; } - err = __submit_discard_cmd(sbi, dpolicy, dc, &issued); + err = __submit_discard_cmd(sbi, dpolicy, 0, dc, &issued); - if (issued >= dpolicy->max_requests) { + if (issued >= spolicy->max_requests) { start = dc->lstart + dc->len; if (err) @@ -3332,13 +3485,17 @@ static int __get_segment_type(struct f2fs_io_info *fio) void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, block_t old_blkaddr, block_t *new_blkaddr, struct f2fs_summary *sum, int type, - struct f2fs_io_info *fio) + struct f2fs_io_info *fio, int contig_level) { struct sit_info *sit_i = SIT_I(sbi); struct curseg_info *curseg = CURSEG_I(sbi, type); unsigned long long old_mtime; bool from_gc = (type == CURSEG_ALL_DATA_ATGC); struct seg_entry *se = NULL; +#ifdef CONFIG_F2FS_GRADING_SSR + struct inode *inode = NULL; +#endif + int contig = SEQ_NONE; down_read(&SM_I(sbi)->curseg_lock); @@ -3385,11 +3542,25 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, update_sit_entry(sbi, old_blkaddr, -1); if (!__has_curseg_space(sbi, curseg)) { - if (from_gc) + if (from_gc) { get_atssr_segment(sbi, type, se->type, AT_SSR, se->mtime); - else - sit_i->s_ops->allocate_segment(sbi, type, false); + } else { +#ifdef CONFIG_F2FS_GRADING_SSR + if (contig_level != SEQ_NONE) { + contig = contig_level; + goto allocate_label; + } + + if (page && page->mapping && page->mapping != NODE_MAPPING(sbi) && + page->mapping != META_MAPPING(sbi)) { + inode = page->mapping->host; + contig = check_io_seq(get_dirty_pages(inode)); + } +allocate_label: +#endif + sit_i->s_ops->allocate_segment(sbi, type, false, contig); + } } /* * segment dirty status should be updated after segment allocation, @@ -3456,7 +3627,7 @@ static void do_write_page(struct f2fs_summary *sum, struct f2fs_io_info *fio) down_read(&fio->sbi->io_order_lock); reallocate: f2fs_allocate_data_block(fio->sbi, fio->page, fio->old_blkaddr, - &fio->new_blkaddr, sum, type, fio); + &fio->new_blkaddr, sum, type, fio, SEQ_NONE); if (GET_SEGNO(fio->sbi, fio->old_blkaddr) != NULL_SEGNO) invalidate_mapping_pages(META_MAPPING(fio->sbi), fio->old_blkaddr, fio->old_blkaddr); @@ -4825,7 +4996,7 @@ static int fix_curseg_write_pointer(struct f2fs_sb_info *sbi, int type) f2fs_notice(sbi, "Assign new section to curseg[%d]: " "curseg[0x%x,0x%x]", type, cs->segno, cs->next_blkoff); - allocate_segment_by_default(sbi, type, true); + allocate_segment_by_default(sbi, type, true, SEQ_NONE); /* check consistency of the zone curseg pointed to */ if (check_zone_write_pointer(sbi, zbd, &zone)) diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h index 1bf33fc27b8f83b69630c1266c67e36a45318627..fa18a6b6fc4c6158de938857c7d327feae40314c 100644 --- a/fs/f2fs/segment.h +++ b/fs/f2fs/segment.h @@ -130,7 +130,18 @@ static inline void sanity_check_seg_type(struct f2fs_sb_info *sbi, (((sector_t)blk_addr) << F2FS_LOG_SECTORS_PER_BLOCK) #define SECTOR_TO_BLOCK(sectors) \ ((sectors) >> F2FS_LOG_SECTORS_PER_BLOCK) +#ifdef CONFIG_F2FS_GRADING_SSR +#define KBS_PER_SEGMENT 2048 +#define SSR_MIN_BLKS_LIMIT (16 << 18) /* 16G */ +#define SSR_CONTIG_DIRTY_NUMS 32 /* Dirty pages for LFS alloction in grading ssr. */ +#define SSR_CONTIG_LARGE 256 /* Larege files */ +#endif +enum { + SEQ_NONE, + SEQ_32BLKS, + SEQ_256BLKS +}; /* * indicate a block allocation direction: RIGHT and LEFT. * RIGHT means allocating new sections towards the end of volume. @@ -180,6 +191,13 @@ enum { FORCE_FG_GC, }; +#ifdef CONFIG_F2FS_GRADING_SSR +enum { + GRADING_SSR_OFF = 0, + GRADING_SSR_ON +}; +#endif + /* for a function parameter to select a victim segment */ struct victim_sel_policy { int alloc_mode; /* LFS or SSR */ @@ -221,7 +239,7 @@ struct sec_entry { }; struct segment_allocation { - void (*allocate_segment)(struct f2fs_sb_info *, int, bool); + void (*allocate_segment)(struct f2fs_sb_info *, int, bool, int); }; #define MAX_SKIP_GC_COUNT 16 @@ -913,3 +931,14 @@ static inline void wake_up_discard_thread(struct f2fs_sb_info *sbi, bool force) dcc->discard_wake = 1; wake_up_interruptible_all(&dcc->discard_wait_queue); } + +#ifdef CONFIG_F2FS_GRADING_SSR +static inline int check_io_seq(int blks) +{ + if (blks >= SSR_CONTIG_LARGE) + return SEQ_256BLKS; + if (blks >= SSR_CONTIG_DIRTY_NUMS) + return SEQ_32BLKS; + return SEQ_NONE; +} +#endif diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index de543168b3708e331cd468b1810f95c691693aad..b305e024d46f4d1a68d0b76def9637940f9f1b4e 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -36,6 +36,19 @@ #define CREATE_TRACE_POINTS #include +#ifdef CONFIG_F2FS_GRADING_SSR +#define SSR_DEFALT_SPACE_LIMIT (5<<20) /* 5G default space limit */ +#define SSR_DEFALT_WATERLINE 80 /* 80% default waterline */ +#define SSR_HN_SAPCE_LIMIT_128G (8<<20) /* 8G default sapce limit for 128G devices */ +#define SSR_HN_WATERLINE_128G 80 /* 80% default hot node waterline for 128G devices */ +#define SSR_WN_SAPCE_LIMIT_128G (5<<20) /* 5G default warm node sapce limit for 128G devices */ +#define SSR_WN_WATERLINE_128G 70 /* 70% default warm node waterline for 128G devices */ +#define SSR_HD_SAPCE_LIMIT_128G (8<<20) /* 8G default hot data sapce limit for 128G devices */ +#define SSR_HD_WATERLINE_128G 65 /* 65% default hot data waterline for 128G devices */ +#define SSR_WD_SAPCE_LIMIT_128G (5<<20) /* 5G default warm data sapce limit for 128G devices */ +#define SSR_WD_WATERLINE_128G 60 /* 60% default warm data waterline for 128G devices */ +#endif + static struct kmem_cache *f2fs_inode_cachep; #ifdef CONFIG_F2FS_FAULT_INJECTION @@ -147,6 +160,8 @@ enum { Opt_compress_log_size, Opt_compress_extension, Opt_atgc, + Opt_gc_merge, + Opt_nogc_merge, Opt_err, }; @@ -215,6 +230,8 @@ static match_table_t f2fs_tokens = { {Opt_compress_log_size, "compress_log_size=%u"}, {Opt_compress_extension, "compress_extension=%s"}, {Opt_atgc, "atgc"}, + {Opt_gc_merge, "gc_merge"}, + {Opt_nogc_merge, "nogc_merge"}, {Opt_err, NULL}, }; @@ -944,6 +961,12 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount) case Opt_atgc: set_opt(sbi, ATGC); break; + case Opt_gc_merge: + set_opt(sbi, GC_MERGE); + break; + case Opt_nogc_merge: + clear_opt(sbi, GC_MERGE); + break; default: f2fs_err(sbi, "Unrecognized mount option \"%s\" or missing value", p); @@ -1536,6 +1559,9 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root) else if (F2FS_OPTION(sbi).bggc_mode == BGGC_MODE_OFF) seq_printf(seq, ",background_gc=%s", "off"); + if (test_opt(sbi, GC_MERGE)) + seq_puts(seq, ",gc_merge"); + if (test_opt(sbi, DISABLE_ROLL_FORWARD)) seq_puts(seq, ",disable_roll_forward"); if (test_opt(sbi, NORECOVERY)) @@ -1902,7 +1928,8 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) * option. Also sync the filesystem. */ if ((*flags & SB_RDONLY) || - F2FS_OPTION(sbi).bggc_mode == BGGC_MODE_OFF) { + (F2FS_OPTION(sbi).bggc_mode == BGGC_MODE_OFF && + !test_opt(sbi, GC_MERGE))) { if (sbi->gc_thread) { f2fs_stop_gc_thread(sbi); need_restart_gc = true; @@ -3489,6 +3516,35 @@ static void f2fs_tuning_parameters(struct f2fs_sb_info *sbi) sbi->readdir_ra = 1; } +#ifdef CONFIG_F2FS_GRADING_SSR +static void f2fs_init_grading_ssr(struct f2fs_sb_info *sbi) +{ + u32 total_blocks = le64_to_cpu(sbi->raw_super->block_count) >> 18; + + if (total_blocks > 64) { /* 64G */ + sbi->hot_cold_params.hot_data_lower_limit = SSR_HD_SAPCE_LIMIT_128G; + sbi->hot_cold_params.hot_data_waterline = SSR_HD_WATERLINE_128G; + sbi->hot_cold_params.warm_data_lower_limit = SSR_WD_SAPCE_LIMIT_128G; + sbi->hot_cold_params.warm_data_waterline = SSR_WD_WATERLINE_128G; + sbi->hot_cold_params.hot_node_lower_limit = SSR_HD_SAPCE_LIMIT_128G; + sbi->hot_cold_params.hot_node_waterline = SSR_HN_WATERLINE_128G; + sbi->hot_cold_params.warm_node_lower_limit = SSR_WN_SAPCE_LIMIT_128G; + sbi->hot_cold_params.warm_node_waterline = SSR_WN_WATERLINE_128G; + sbi->hot_cold_params.enable = GRADING_SSR_OFF; + } else { + sbi->hot_cold_params.hot_data_lower_limit = SSR_DEFALT_SPACE_LIMIT; + sbi->hot_cold_params.hot_data_waterline = SSR_DEFALT_WATERLINE; + sbi->hot_cold_params.warm_data_lower_limit = SSR_DEFALT_SPACE_LIMIT; + sbi->hot_cold_params.warm_data_waterline = SSR_DEFALT_WATERLINE; + sbi->hot_cold_params.hot_node_lower_limit = SSR_DEFALT_SPACE_LIMIT; + sbi->hot_cold_params.hot_node_waterline = SSR_DEFALT_WATERLINE; + sbi->hot_cold_params.warm_node_lower_limit = SSR_DEFALT_SPACE_LIMIT; + sbi->hot_cold_params.warm_node_waterline = SSR_DEFALT_WATERLINE; + sbi->hot_cold_params.enable = GRADING_SSR_OFF; + } +} +#endif + static int f2fs_fill_super(struct super_block *sb, void *data, int silent) { struct f2fs_sb_info *sbi; @@ -3781,7 +3837,9 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) err = -ENOMEM; goto free_node_inode; } - +#ifdef CONFIG_F2FS_GRADING_SSR + f2fs_init_grading_ssr(sbi); +#endif err = f2fs_register_sysfs(sbi); if (err) goto free_root_inode; @@ -3872,7 +3930,8 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) * If filesystem is not mounted as read-only then * do start the gc_thread. */ - if (F2FS_OPTION(sbi).bggc_mode != BGGC_MODE_OFF && !f2fs_readonly(sb)) { + if ((F2FS_OPTION(sbi).bggc_mode != BGGC_MODE_OFF || + test_opt(sbi, GC_MERGE)) && !f2fs_readonly(sb)) { /* After POR, we can run background GC thread.*/ err = f2fs_start_gc_thread(sbi); if (err) diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c index b8850c81068a0e861413041c0c78482f35dcd839..c90280c3168fba4626a3aca013f8e9492c6ed979 100644 --- a/fs/f2fs/sysfs.c +++ b/fs/f2fs/sysfs.c @@ -34,6 +34,9 @@ enum { FAULT_INFO_TYPE, /* struct f2fs_fault_info */ #endif RESERVED_BLOCKS, /* struct f2fs_sb_info */ +#ifdef CONFIG_F2FS_GRADING_SSR + F2FS_HOT_COLD_PARAMS, /* struct f2fs_hot_cold_params */ +#endif }; struct f2fs_attr { @@ -61,6 +64,10 @@ static unsigned char *__struct_ptr(struct f2fs_sb_info *sbi, int struct_type) return (unsigned char *)NM_I(sbi); else if (struct_type == F2FS_SBI || struct_type == RESERVED_BLOCKS) return (unsigned char *)sbi; +#ifdef CONFIG_F2FS_GRADING_SSR + else if (struct_type == F2FS_HOT_COLD_PARAMS) + return (unsigned char *)&sbi->hot_cold_params; +#endif #ifdef CONFIG_F2FS_FAULT_INJECTION else if (struct_type == FAULT_INFO_RATE || struct_type == FAULT_INFO_TYPE) @@ -542,6 +549,7 @@ F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, gc_urgent, gc_mode); F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, reclaim_segments, rec_prefree_segments); F2FS_RW_ATTR(DCC_INFO, discard_cmd_control, max_small_discards, max_discards); F2FS_RW_ATTR(DCC_INFO, discard_cmd_control, discard_granularity, discard_granularity); +F2FS_RW_ATTR(DCC_INFO, discard_cmd_control, discard_type, discard_type); F2FS_RW_ATTR(RESERVED_BLOCKS, f2fs_sb_info, reserved_blocks, reserved_blocks); F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, batched_trim_sections, trim_sections); F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, ipu_policy, ipu_policy); @@ -568,6 +576,26 @@ F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, iostat_period_ms, iostat_period_ms); F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, readdir_ra, readdir_ra); F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, gc_pin_file_thresh, gc_pin_file_threshold); F2FS_RW_ATTR(F2FS_SBI, f2fs_super_block, extension_list, extension_list); +#ifdef CONFIG_F2FS_GRADING_SSR +F2FS_RW_ATTR(F2FS_HOT_COLD_PARAMS, f2fs_hot_cold_params, + hc_hot_data_lower_limit, hot_data_lower_limit); +F2FS_RW_ATTR(F2FS_HOT_COLD_PARAMS, f2fs_hot_cold_params, + hc_hot_data_waterline, hot_data_waterline); +F2FS_RW_ATTR(F2FS_HOT_COLD_PARAMS, f2fs_hot_cold_params, + hc_warm_data_lower_limit, warm_data_lower_limit); +F2FS_RW_ATTR(F2FS_HOT_COLD_PARAMS, f2fs_hot_cold_params, + hc_warm_data_waterline, warm_data_waterline); +F2FS_RW_ATTR(F2FS_HOT_COLD_PARAMS, f2fs_hot_cold_params, + hc_hot_node_lower_limit, hot_node_lower_limit); +F2FS_RW_ATTR(F2FS_HOT_COLD_PARAMS, f2fs_hot_cold_params, + hc_hot_node_waterline, hot_node_waterline); +F2FS_RW_ATTR(F2FS_HOT_COLD_PARAMS, f2fs_hot_cold_params, + hc_warm_node_lower_limit, warm_node_lower_limit); +F2FS_RW_ATTR(F2FS_HOT_COLD_PARAMS, f2fs_hot_cold_params, + hc_warm_node_waterline, warm_node_waterline); +F2FS_RW_ATTR(F2FS_HOT_COLD_PARAMS, f2fs_hot_cold_params, + hc_enable, enable); +#endif #ifdef CONFIG_F2FS_FAULT_INJECTION F2FS_RW_ATTR(FAULT_INFO_RATE, f2fs_fault_info, inject_rate, inject_rate); F2FS_RW_ATTR(FAULT_INFO_TYPE, f2fs_fault_info, inject_type, inject_type); @@ -631,6 +659,7 @@ static struct attribute *f2fs_attrs[] = { ATTR_LIST(main_blkaddr), ATTR_LIST(max_small_discards), ATTR_LIST(discard_granularity), + ATTR_LIST(discard_type), ATTR_LIST(batched_trim_sections), ATTR_LIST(ipu_policy), ATTR_LIST(min_ipu_util), @@ -677,6 +706,17 @@ static struct attribute *f2fs_attrs[] = { ATTR_LIST(moved_blocks_foreground), ATTR_LIST(moved_blocks_background), ATTR_LIST(avg_vblocks), +#endif +#ifdef CONFIG_F2FS_GRADING_SSR + ATTR_LIST(hc_hot_data_lower_limit), + ATTR_LIST(hc_hot_data_waterline), + ATTR_LIST(hc_warm_data_lower_limit), + ATTR_LIST(hc_warm_data_waterline), + ATTR_LIST(hc_hot_node_lower_limit), + ATTR_LIST(hc_hot_node_waterline), + ATTR_LIST(hc_warm_node_lower_limit), + ATTR_LIST(hc_warm_node_waterline), + ATTR_LIST(hc_enable), #endif NULL, }; @@ -908,6 +948,66 @@ static int __maybe_unused victim_bits_seq_show(struct seq_file *seq, return 0; } +static int undiscard_info_seq_show(struct seq_file *seq, void *offset) +{ + struct super_block *sb = seq->private; + struct f2fs_sb_info *sbi = F2FS_SB(sb); + struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); + struct sit_info *sit_i = SIT_I(sbi); + unsigned int total_segs = le32_to_cpu(sbi->raw_super->segment_count_main); + unsigned int total = 0; + unsigned int i, j; + unsigned int max_blocks = sbi->blocks_per_seg; + unsigned long *dmap = SIT_I(sbi)->tmp_map; + + if (!f2fs_realtime_discard_enable(sbi)) + goto out; + + for (i = 0; i < total_segs; i++) { + struct seg_entry *se = get_seg_entry(sbi, i); + unsigned int entries = SIT_VBLOCK_MAP_SIZE / + sizeof(unsigned long); + unsigned long *ckpt_map = (unsigned long *)se->ckpt_valid_map; + unsigned long *discard_map = (unsigned long *)se->discard_map; + int start = 0, end = -1; + + down_write(&sit_i->sentry_lock); + if (se->valid_blocks == max_blocks) { + up_write(&sit_i->sentry_lock); + continue; + } + + if (se->valid_blocks == 0) { + mutex_lock(&dirty_i->seglist_lock); + if (test_bit((int)i, dirty_i->dirty_segmap[PRE])) + total += 512; + mutex_unlock(&dirty_i->seglist_lock); + } else { + for (j = 0; j < entries; j++) + dmap[j] = ~ckpt_map[j] & ~discard_map[j]; + while (1) { + start = (int)find_rev_next_bit(dmap, + (unsigned long)max_blocks, + (unsigned long)(end + 1)); + + if ((unsigned int)start >= max_blocks) + break; + + end = (int)find_rev_next_zero_bit(dmap, + (unsigned long)max_blocks, + (unsigned long)(start + 1)); + total += (unsigned int)(end - start); + } + } + + up_write(&sit_i->sentry_lock); + } + +out: + seq_printf(seq, "total undiscard:%u K\n", total * 4); + return 0; +} + int __init f2fs_init_sysfs(void) { int ret; @@ -964,6 +1064,9 @@ int f2fs_register_sysfs(struct f2fs_sb_info *sbi) iostat_info_seq_show, sb); proc_create_single_data("victim_bits", S_IRUGO, sbi->s_proc, victim_bits_seq_show, sb); + proc_create_single_data("undiscard_info", S_IRUGO, sbi->s_proc, + undiscard_info_seq_show, sb); + } return 0; } @@ -975,6 +1078,7 @@ void f2fs_unregister_sysfs(struct f2fs_sb_info *sbi) remove_proc_entry("segment_info", sbi->s_proc); remove_proc_entry("segment_bits", sbi->s_proc); remove_proc_entry("victim_bits", sbi->s_proc); + remove_proc_entry("undiscard_info", sbi->s_proc); remove_proc_entry(sbi->sb->s_id, f2fs_proc_root); } kobject_del(&sbi->s_kobj); diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h index 56b113e3cd6aa48985c00c3385f6a7cb9afe7b2d..b4fe1db78eaed0fb640cedea35d69519f37850a1 100644 --- a/include/trace/events/f2fs.h +++ b/include/trace/events/f2fs.h @@ -1957,6 +1957,39 @@ TRACE_EVENT(f2fs_fiemap, __entry->ret) ); +#ifdef CONFIG_F2FS_GRADING_SSR +DECLARE_EVENT_CLASS(f2fs_grading_ssr, + + TP_PROTO(unsigned int left, unsigned int free, + unsigned int seq), + + TP_ARGS(left, free, seq), + + TP_STRUCT__entry( + __field(unsigned int, left) + __field(unsigned int, free) + __field(unsigned int, seq) + ), + + TP_fast_assign( + __entry->left = left; + __entry->free = free; + __entry->seq = seq; + ), + + TP_printk("ssr: left_space %u free_segments: %u is_seq: %u ", + __entry->left, __entry->free, __entry->seq) +); + +DEFINE_EVENT(f2fs_grading_ssr, f2fs_grading_ssr_allocate, + + TP_PROTO(unsigned int left, unsigned int free, + unsigned int seq), + + TP_ARGS(left, free, seq) +); +#endif + #endif /* _TRACE_F2FS_H */ /* This part must be outside protection */