diff --git a/gcc/auto-profile.c b/gcc/auto-profile.c index 7d09887c9e189a9ade2fa0479b0fe71bf469da50..aced8fca51a78a8ae67ef0b46984606ed6a549cc 100644 --- a/gcc/auto-profile.c +++ b/gcc/auto-profile.c @@ -49,6 +49,9 @@ along with GCC; see the file COPYING3. If not see #include "auto-profile.h" #include "tree-pretty-print.h" #include "gimple-pretty-print.h" +#include +#include +#include /* The following routines implements AutoFDO optimization. @@ -95,6 +98,7 @@ along with GCC; see the file COPYING3. If not see */ #define DEFAULT_AUTO_PROFILE_FILE "fbdata.afdo" +#define DEFAULT_CACHE_MISSES_PROFILE_FILE "cmsdata.gcov" #define AUTO_PROFILE_VERSION 1 namespace autofdo @@ -117,6 +121,14 @@ private: bool annotated_; }; +/* pair */ +static bool +event_count_cmp (std::pair &a, + std::pair &b) +{ + return a.second > b.second; +} + /* Represent a source location: (function_decl, lineno). */ typedef std::pair decl_lineno; @@ -338,6 +350,206 @@ static autofdo_source_profile *afdo_source_profile; /* gcov_summary structure to store the profile_info. */ static gcov_summary *afdo_profile_info; +/* Check opts->x_flags and put file name into EVENT_FILES. */ + +static bool +get_all_profile_names (const char **event_files) +{ + if (!(flag_auto_profile || flag_cache_misses_profile)) + { + return false; + } + + event_files[INST_EXEC] = auto_profile_file; + + if (cache_misses_profile_file == NULL) + { + cache_misses_profile_file = DEFAULT_CACHE_MISSES_PROFILE_FILE; + } + event_files[CACHE_MISSES] = cache_misses_profile_file; + + return true; +} + +static void read_profile (void); + +/* Maintain multiple profile data of different events with event_loc_count_map + and event_func_count_map. */ + +class extend_auto_profile +{ +public: + bool auto_profile_exist (enum event_type type); + gcov_type get_loc_count (location_t, event_type); + gcov_type get_func_count (unsigned, event_type); + struct rank_info get_func_rank (unsigned, enum event_type); + /* There should be only one instance of class EXTEND_AUTO_PROFILE. */ + static extend_auto_profile *create () + { + extend_auto_profile *map = new extend_auto_profile (); + if (map->read ()) + { + return map; + } + delete map; + return NULL; + } +private: + /* Basic maps of extend_auto_profile. */ + typedef std::map loc_count_map; + typedef std::map func_count_map; + + /* Map of function_uid to its descending order rank of counts. */ + typedef std::map rank_map; + + /* Mapping hardware events to corresponding basic maps. */ + typedef std::map event_loc_count_map; + typedef std::map event_func_count_map; + typedef std::map event_rank_map; + + extend_auto_profile () {} + bool read (); + void set_loc_count (); + void process_extend_source_profile (); + void read_extend_afdo_file (const char*, event_type); + void rank_all_func (); + void dump_event (); + event_loc_count_map event_loc_map; + event_func_count_map event_func_map; + event_rank_map func_rank; + event_type profile_type; +}; + +/* Member functions for extend_auto_profile. */ + +bool +extend_auto_profile::auto_profile_exist (enum event_type type) +{ + switch (type) + { + case INST_EXEC: + return event_func_map.count (INST_EXEC) != 0 + || event_loc_map.count (INST_EXEC) != 0; + case CACHE_MISSES: + return event_func_map.count (CACHE_MISSES) != 0 + || event_loc_map.count (CACHE_MISSES) != 0; + default: + return false; + } +} + +void +extend_auto_profile::dump_event () +{ + if (dump_file) + { + switch (profile_type) + { + case INST_EXEC: + fprintf (dump_file, "Processing event instruction execution.\n"); + break; + case CACHE_MISSES: + fprintf (dump_file, "Processing event cache misses.\n"); + break; + default: + break; + } + } +} + +/* Return true if any profile data was read. */ + +bool +extend_auto_profile::read () +{ + const char *event_files[EVENT_NUMBER] = {NULL}; + if (!get_all_profile_names (event_files)) + { + return false; + } + + /* Backup AFDO_STRING_TABLE and AFDO_SOURCE_PROFILE since we will create + new ones for each event_type. */ + autofdo::string_table *string_table_afdo = afdo_string_table; + autofdo::autofdo_source_profile *source_profile_afdo = afdo_source_profile; + + for (unsigned i = 0; i < EVENT_NUMBER; i++) + { + if (event_files[i] == NULL) + { + continue; + } + profile_type = (enum event_type) i; + dump_event (); + gcov_close (); + auto_profile_file = event_files[i]; + read_profile (); + gcov_close (); + + process_extend_source_profile (); + + delete afdo_source_profile; + delete afdo_string_table; + } + + /* Restore AFDO_STRING_TABLE and AFDO_SOURCE_PROFILE. Function + END_AUTO_PROFILE will free them at the end of compilation. */ + afdo_string_table = string_table_afdo; + afdo_source_profile = source_profile_afdo; + return true; +} + +/* Helper functions. */ + +gcov_type +extend_auto_profile::get_loc_count (location_t loc, event_type type) +{ + event_loc_count_map::iterator event_iter = event_loc_map.find (type); + if (event_iter != event_loc_map.end ()) + { + loc_count_map::iterator loc_iter = event_iter->second.find (loc); + if (loc_iter != event_iter->second.end ()) + { + return loc_iter->second; + } + } + return 0; +} + +struct rank_info +extend_auto_profile::get_func_rank (unsigned decl_uid, enum event_type type) +{ + struct rank_info info = {0, 0}; + event_rank_map::iterator event_iter = func_rank.find (type); + if (event_iter != func_rank.end ()) + { + rank_map::iterator func_iter = event_iter->second.find (decl_uid); + if (func_iter != event_iter->second.end ()) + { + info.rank = func_iter->second; + info.total = event_iter->second.size (); + } + } + return info; +} + +gcov_type +extend_auto_profile::get_func_count (unsigned decl_uid, event_type type) +{ + event_func_count_map::iterator event_iter = event_func_map.find (type); + if (event_iter != event_func_map.end ()) + { + func_count_map::iterator func_iter = event_iter->second.find (decl_uid); + if (func_iter != event_iter->second.end ()) + { + return func_iter->second; + } + } + return 0; +} + +static extend_auto_profile *extend_profile; + /* Helper functions. */ /* Return the original name of NAME: strip the suffix that starts @@ -1654,6 +1866,131 @@ auto_profile (void) return TODO_rebuild_cgraph_edges; } + +void +extend_auto_profile::rank_all_func () +{ + std::vector > func_sorted; + event_func_count_map::iterator event_iter + = event_func_map.find (profile_type); + if (event_iter != event_func_map.end ()) + { + func_count_map::iterator func_iter; + for (func_iter = event_iter->second.begin (); + func_iter != event_iter->second.end (); func_iter++) + { + func_sorted.push_back (std::make_pair (func_iter->first, + func_iter->second)); + } + + std::sort (func_sorted.begin (), func_sorted.end (), event_count_cmp); + + for (unsigned i = 0; i < func_sorted.size (); ++i) + { + func_rank[profile_type][func_sorted[i].first] = i + 1; + } + } +} + +/* Iterate stmts in cfun and maintain its count to EVENT_LOC_MAP. */ + +void +extend_auto_profile::set_loc_count () +{ + basic_block bb; + FOR_EACH_BB_FN (bb, cfun) + { + gimple_stmt_iterator gsi; + for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) + { + count_info info; + gimple *stmt = gsi_stmt (gsi); + if (gimple_clobber_p (stmt) || is_gimple_debug (stmt)) + { + continue; + } + if (afdo_source_profile->get_count_info (stmt, &info)) + { + location_t loc = gimple_location (stmt); + event_loc_map[profile_type][loc] += info.count; + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "stmt "); + print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM); + fprintf (dump_file, "counts %ld\n", + event_loc_map[profile_type][loc]); + } + } + } + } +} + +/* Process data in extend_auto_source_profile, save them into two maps. + 1. gimple_location to count. + 2. function_index to count. */ +void +extend_auto_profile::process_extend_source_profile () +{ + struct cgraph_node *node; + if (symtab->state == FINISHED) + { + return; + } + FOR_EACH_FUNCTION (node) + { + if (!gimple_has_body_p (node->decl) || node->inlined_to) + { + continue; + } + + /* Don't profile functions produced for builtin stuff. */ + if (DECL_SOURCE_LOCATION (node->decl) == BUILTINS_LOCATION) + { + continue; + } + + function *fn = DECL_STRUCT_FUNCTION (node->decl); + push_cfun (fn); + + const function_instance *s + = afdo_source_profile->get_function_instance_by_decl ( + current_function_decl); + + if (s == NULL) + { + pop_cfun (); + continue; + } + unsigned int decl_uid = DECL_UID (current_function_decl); + gcov_type count = s->total_count (); + if (dump_file) + { + fprintf (dump_file, "Extend auto-profile for function %s.\n", + node->dump_name ()); + } + event_func_map[profile_type][decl_uid] += count; + set_loc_count (); + pop_cfun (); + } + rank_all_func (); +} + +/* Main entry of extend_auto_profile. */ + +static void +extend_source_profile () +{ + extend_profile = autofdo::extend_auto_profile::create (); + if (dump_file) + { + if (extend_profile == NULL) + { + fprintf (dump_file, "No profile file is found.\n"); + return; + } + fprintf (dump_file, "Extend profile info generated.\n"); + } +} } /* namespace autofdo. */ /* Read the profile from the profile data file. */ @@ -1682,6 +2019,42 @@ end_auto_profile (void) profile_info = NULL; } +/* Extern function to get profile info in other passes. */ + +bool +profile_exist (enum event_type type) +{ + return autofdo::extend_profile != NULL + && autofdo::extend_profile->auto_profile_exist (type); +} + +gcov_type +event_get_loc_count (location_t loc, event_type type) +{ + return autofdo::extend_profile->get_loc_count (loc, type); +} + +gcov_type +event_get_func_count (unsigned decl_uid, event_type type) +{ + return autofdo::extend_profile->get_func_count (decl_uid, type); +} + +struct rank_info +event_get_func_rank (unsigned decl_uid, enum event_type type) +{ + return autofdo::extend_profile->get_func_rank (decl_uid, type); +} + +void +free_extend_profile_info () +{ + if (autofdo::extend_profile != NULL) + { + delete autofdo::extend_profile; + } +} + /* Returns TRUE if EDGE is hot enough to be inlined early. */ bool @@ -1743,8 +2116,50 @@ public: } // anon namespace +namespace +{ +const pass_data pass_data_ipa_extend_auto_profile = +{ + SIMPLE_IPA_PASS, /* type */ + "ex-afdo", /* name */ + OPTGROUP_NONE, /* optinfo_flags */ + TV_IPA_EXTEND_AUTO_PROFILE, /* tv_id */ + 0, /* properties_required */ + 0, /* properties_provided */ + 0, /* properties_destroyed */ + 0, /* todo_flags_start */ + 0, /* todo_flags_finish */ +}; + +class pass_ipa_extend_auto_profile : public simple_ipa_opt_pass +{ +public: + pass_ipa_extend_auto_profile (gcc::context *ctxt) + : simple_ipa_opt_pass (pass_data_ipa_extend_auto_profile, ctxt) + {} + + /* opt_pass methods: */ + virtual bool gate (function *) {return (flag_ipa_extend_auto_profile > 0);} + virtual unsigned int execute (function *); + +}; + +unsigned int +pass_ipa_extend_auto_profile::execute (function *fun) +{ + autofdo::extend_source_profile (); + return 0; +} +} // anon namespace + simple_ipa_opt_pass * make_pass_ipa_auto_profile (gcc::context *ctxt) { return new pass_ipa_auto_profile (ctxt); } + +simple_ipa_opt_pass * +make_pass_ipa_extend_auto_profile (gcc::context *ctxt) +{ + return new pass_ipa_extend_auto_profile (ctxt); +} \ No newline at end of file diff --git a/gcc/auto-profile.h b/gcc/auto-profile.h index f5cff091d85bc8ffe43d487696823e9ac01d7e7a..230d7e68a976395b935e876c2536fb9594317f54 100644 --- a/gcc/auto-profile.h +++ b/gcc/auto-profile.h @@ -21,6 +21,13 @@ along with GCC; see the file COPYING3. If not see #ifndef AUTO_PROFILE_H #define AUTO_PROFILE_H +enum event_type +{ + INST_EXEC = 0, + CACHE_MISSES, + EVENT_NUMBER +}; + /* Read, process, finalize AutoFDO data structures. */ extern void read_autofdo_file (void); extern void end_auto_profile (void); @@ -28,4 +35,25 @@ extern void end_auto_profile (void); /* Returns TRUE if EDGE is hot enough to be inlined early. */ extern bool afdo_callsite_hot_enough_for_early_inline (struct cgraph_edge *); +/* Chcek if profile exists before using this profile. */ +extern bool profile_exist (enum event_type); + +/* Given func decl_uid or gimple location and event_type, return count. + Count is 0 if function or gimple is not sampled. */ +extern gcov_type event_get_func_count (unsigned, enum event_type); +extern gcov_type event_get_loc_count (location_t, enum event_type); + +struct rank_info +{ + unsigned total; + unsigned rank; +}; + +/* Given function decl_uid and event type, return rank_info. Rank_info + is {0, 0} if function was not sampled. */ +extern struct rank_info event_get_func_rank (unsigned, enum event_type); + +/* Free memory allocated by autofdo::extern_profile. */ +extern void free_extend_profile_info (); + #endif /* AUTO_PROFILE_H */ diff --git a/gcc/common.opt b/gcc/common.opt index 73c24f28d22233eae3745f4746727c32f8e7958e..37cbbd8c07cc597915c052d8c6b9c91fc7ad6501 100644 --- a/gcc/common.opt +++ b/gcc/common.opt @@ -1074,6 +1074,16 @@ Common Joined RejectNegative Var(auto_profile_file) Use sample profile information for call graph node weights. The profile file is specified in the argument. +fcache-misses-profile +Common Report Var(flag_cache_misses_profile) +Use sample profile information for source code cache miss count. The default +profile file is cmsdata.gcov in `pwd`. + +fcache-misses-profile= +Common Joined RejectNegative Var(cache_misses_profile_file) +Use sample profile information for source code cache miss count. The profile +file is specified in the argument. + ; -fcheck-bounds causes gcc to generate array bounds checks. ; For C, C++ and ObjC: defaults off. ; For Java: defaults to on. @@ -1873,6 +1883,10 @@ fipa-struct-reorg Common Report Var(flag_ipa_struct_reorg) Init(0) Optimization Perform structure layout optimizations. +fipa-extend-auto-profile +Common Report Var(flag_ipa_extend_auto_profile) +Use sample profile information for source code. + fipa-vrp Common Report Var(flag_ipa_vrp) Optimization Perform IPA Value Range Propagation. diff --git a/gcc/opts.c b/gcc/opts.c index 6924a973a5b66b579403f9cea1a1f5ea07e03961..642327296322cc897be01cec4604de5d5a2dab13 100644 --- a/gcc/opts.c +++ b/gcc/opts.c @@ -1742,6 +1742,13 @@ enable_fdo_optimizations (struct gcc_options *opts, SET_OPTION_IF_UNSET (opts, opts_set, flag_tree_loop_distribution, value); } +static void +set_cache_misses_profile_params (struct gcc_options *opts, + struct gcc_options *opts_set) +{ + SET_OPTION_IF_UNSET (opts, opts_set, flag_prefetch_loop_arrays, 1); +} + /* -f{,no-}sanitize{,-recover}= suboptions. */ const struct sanitizer_opts_s sanitizer_opts[] = { @@ -2604,6 +2611,25 @@ common_handle_option (struct gcc_options *opts, param_early_inliner_max_iterations, 10); break; + case OPT_fipa_extend_auto_profile: + opts->x_flag_ipa_extend_auto_profile = opts->x_flag_cache_misses_profile + ? true : value; + break; + + case OPT_fcache_misses_profile_: + opts->x_cache_misses_profile_file = xstrdup (arg); + opts->x_flag_cache_misses_profile = true; + value = true; + /* No break here - do -fcache-misses-profile processing. */ + /* FALLTHRU */ + case OPT_fcache_misses_profile: + opts->x_flag_ipa_extend_auto_profile = value; + if (value) + { + set_cache_misses_profile_params (opts, opts_set); + } + break; + case OPT_fprofile_generate_: opts->x_profile_data_prefix = xstrdup (arg); value = true; diff --git a/gcc/passes.def b/gcc/passes.def index 63303ab65bb704c64bc958a29291386409e67263..e9c91d26e9b285445ec6235c7006f26fb41e4e80 100644 --- a/gcc/passes.def +++ b/gcc/passes.def @@ -133,6 +133,7 @@ along with GCC; see the file COPYING3. If not see NEXT_PASS (pass_target_clone); NEXT_PASS (pass_ipa_auto_profile); + NEXT_PASS (pass_ipa_extend_auto_profile); NEXT_PASS (pass_ipa_tree_profile); PUSH_INSERT_PASSES_WITHIN (pass_ipa_tree_profile) NEXT_PASS (pass_feedback_split_functions); diff --git a/gcc/timevar.def b/gcc/timevar.def index ee25eccbb675d83a9222a543213b511c061d7b3e..e873747a8f5c0fd2305c92ace74af4cfb56148a4 100644 --- a/gcc/timevar.def +++ b/gcc/timevar.def @@ -82,6 +82,7 @@ DEFTIMEVAR (TV_IPA_FNSPLIT , "ipa function splitting") DEFTIMEVAR (TV_IPA_COMDATS , "ipa comdats") DEFTIMEVAR (TV_IPA_REORDER_FIELDS , "ipa struct reorder fields optimization") DEFTIMEVAR (TV_IPA_STRUCT_REORG , "ipa struct reorg optimization") +DEFTIMEVAR (TV_IPA_EXTEND_AUTO_PROFILE, "ipa extend auto profile") DEFTIMEVAR (TV_IPA_OPT , "ipa various optimizations") DEFTIMEVAR (TV_IPA_LTO_DECOMPRESS , "lto stream decompression") DEFTIMEVAR (TV_IPA_LTO_COMPRESS , "lto stream compression") diff --git a/gcc/toplev.c b/gcc/toplev.c index eaed6f6c78005510af8811f2a5f17ffa159824a0..51e6bd400ea2e8ad09498e55e70518cc1aa545c0 100644 --- a/gcc/toplev.c +++ b/gcc/toplev.c @@ -577,6 +577,12 @@ compile_file (void) targetm.asm_out.output_ident (ident_str); } + /* Extend auto profile finalization. */ + if (flag_ipa_extend_auto_profile) + { + free_extend_profile_info (); + } + /* Auto profile finalization. */ if (flag_auto_profile) end_auto_profile (); diff --git a/gcc/tree-pass.h b/gcc/tree-pass.h index eb32c5d441b7cfbd79e4f5fbe9aaf825461dcb95..be6387768f416a5da5070a4baa18011a6e117443 100644 --- a/gcc/tree-pass.h +++ b/gcc/tree-pass.h @@ -511,6 +511,8 @@ extern ipa_opt_pass_d *make_pass_ipa_hsa (gcc::context *ctxt); extern ipa_opt_pass_d *make_pass_ipa_pure_const (gcc::context *ctxt); extern simple_ipa_opt_pass *make_pass_ipa_reorder_fields (gcc::context *ctxt); extern simple_ipa_opt_pass *make_pass_ipa_struct_reorg (gcc::context *ctxt); +extern simple_ipa_opt_pass *make_pass_ipa_extend_auto_profile (gcc::context + *ctxt); extern simple_ipa_opt_pass *make_pass_ipa_pta (gcc::context *ctxt); extern simple_ipa_opt_pass *make_pass_ipa_tm (gcc::context *ctxt); extern simple_ipa_opt_pass *make_pass_target_clone (gcc::context *ctxt);