diff --git a/python/akg/ops/nn/ascend/load_im2col.py b/python/akg/ops/nn/ascend/load_im2col.py index eaea71ed8aafdd31bf32bb262c53cdeb26d0efc9..30289017a93298fdba656ebcf3ea08ee73b4e8c9 100644 --- a/python/akg/ops/nn/ascend/load_im2col.py +++ b/python/akg/ops/nn/ascend/load_im2col.py @@ -1,4 +1,4 @@ -# Copyright 2019-2021 Huawei Technologies Co., Ltd +# Copyright 2019-2022 Huawei Technologies Co., Ltd # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -354,6 +354,7 @@ def get_attrs(): attr_map["enable_double_buffer"] = False attr_map["pragma_sink_last_axis"] = False attr_map["enable_hoist_insn"] = False + attr_map["pragma_enable_reschedule"] = False return attr_map @@ -405,10 +406,10 @@ def LoadIm2col(tensor_fmap, kernel, stride, pad, target=utils.CCE): load_im2col_dsl_output = load_im2col_dsl(load_im2col_dsl_input, fmap_shape_NC1HWCO, kernel, pad, stride) # calculate the tiling factor. - Ho = (fmap_h + pad[0] + pad[1] - filter_h) // (stride[0]) + 1 - Wo = (fmap_w + pad[2] + pad[3] - filter_w) // (stride[1]) + 1 + ho = (fmap_h + pad[0] + pad[1] - filter_h) // (stride[0]) + 1 + wo = (fmap_w + pad[2] + pad[3] - filter_w) // (stride[1]) + 1 - if not large_tensor(tensor_fmap) and ((Ho.value * Wo.value) % block_size > 0 or has_pad(pad)): + if not large_tensor(tensor_fmap) and ((ho.value * wo.value) % block_size > 0 or has_pad(pad)): load_im2col_dsl_output = load_im2col_dsl_no_padding(load_im2col_dsl_input, fmap_shape_NC1HWCO, kernel, pad, stride) attrs = get_attrs() diff --git a/src/poly/dump_log.cc b/src/poly/dump_log.cc index efb0e492de3d4929f7b04ec1cc1d36c8e3055678..46aa77f9317b35306b1d1cef95a4615bc0aa7776 100644 --- a/src/poly/dump_log.cc +++ b/src/poly/dump_log.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019 Huawei Technologies Co., Ltd + * Copyright 2019-2022 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -461,7 +461,6 @@ void UserConfig::DumpScopDataScheduleAttrs(std::ofstream &of) { of << "pragma_sink_last_axis : " << GetSinkLastAxis() << std::endl; of << "pragma_keep_outer_band_order : " << GetKeepOuterBandOrder() << std::endl; of << "pragma_disable_group : " << GetDisableGroup() << std::endl; - of << "pragma_tile_inner_band : " << GetTileInnerBand() << std::endl; of << "isolated_idx : " << GetIsolatedIdx() << std::endl; of << "pragma_outerband_need_split : " << GetOuterBandNeedSplit() << std::endl; diff --git a/src/poly/npu_isl_emitter.cc b/src/poly/npu_isl_emitter.cc index 4aa03cd8f9f1cb7525860d1f6dd8a64a2067a405..88eacc7aae973a9d5a29eeccf28e1866b0373cbb 100644 --- a/src/poly/npu_isl_emitter.cc +++ b/src/poly/npu_isl_emitter.cc @@ -1887,6 +1887,11 @@ Stmt NPUIslEmitter::EmitMarkFuseInst(const isl::ast_node_mark &node) { return stmt; } +Stmt NPUIslEmitter::EmitMarkReschedule(const isl::ast_node_mark &node) { + auto stmt = AttrStmt::make(make_zero(Int(32)), "pragma_reschedule", Expr(1), EmitAst(node.get_node())); + return stmt; +} + Stmt NPUIslEmitter::EmitMarkAllocRealizeOut(const isl::ast_node_mark &node) { Stmt body = EmitAst(node.get_node()); for (const auto &i : realize_out_) { @@ -1990,6 +1995,7 @@ void NPUIslEmitter::RealizeOut() { Stmt NPUIslEmitter::EmitMarkMulticore(const isl::ast_node_mark &node) { auto mark_name = node.get_id().get_name(); if (mark_name == FUSE_VECTOR) return EmitMarkFuseInst(node); + if (mark_name == RESCHEDULE) return EmitMarkReschedule(node); if (mark_name == ALLOC_REALIZE_OUT) return EmitMarkAllocRealizeOut(node); if (mark_name == ALLOC_C) return EmitMarkAllocC(node); #if SPEC_GEMM diff --git a/src/poly/npu_isl_emitter.h b/src/poly/npu_isl_emitter.h index 2c1f5f420ed97480a818f97dc20c6864fcc227a1..0578935afe5652cf5d4e8e485c05db1437a6b726 100644 --- a/src/poly/npu_isl_emitter.h +++ b/src/poly/npu_isl_emitter.h @@ -1,5 +1,5 @@ /** - * Copyright 2019 Huawei Technologies Co., Ltd + * Copyright 2019-2022 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -52,6 +52,7 @@ class NPUIslEmitter : public IslEmitter { // emit mark node Stmt EmitMarkMulticore(const isl::ast_node_mark &node); Stmt EmitMarkFuseInst(const isl::ast_node_mark &node); + Stmt EmitMarkReschedule(const isl::ast_node_mark &node); Stmt EmitMarkAllocRealizeOut(const isl::ast_node_mark &node); Stmt EmitMarkAllocC(const isl::ast_node_mark &node); Stmt EmitMarkSpecGemm(const isl::ast_node_mark &node); diff --git a/src/poly/poly.cc b/src/poly/poly.cc index e756b532ade43edab5996c42374790a8945acb79..07e2ac082e35521d17c1c222b9558aecaf427bae 100644 --- a/src/poly/poly.cc +++ b/src/poly/poly.cc @@ -1,5 +1,5 @@ /** - * Copyright 2019-2021 Huawei Technologies Co., Ltd + * Copyright 2019-2022 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -19,6 +19,47 @@ namespace akg { namespace ir { +class LoopMinFixer : public IRMutator { + public: + LoopMinFixer() {} + ~LoopMinFixer() override = default; + + Stmt Mutate_(const AttrStmt *op, const Stmt &s) final { + if (op->attr_key == "pragma_reschedule") { + in_reschedule_ = true; + auto stmt = IRMutator::Mutate(op->body); + in_reschedule_ = false; + return stmt; + } + return IRMutator::Mutate_(op, s); + } + + Stmt Mutate_(const For *op, const Stmt &s) final { + Stmt stmt; + if (!in_reschedule_) { + return IRMutator::Mutate_(op, s); + } + if (op->min.as() && op->min.as()->value == 0) { + stmt = IRMutator::Mutate_(op, s); + } else { + Expr extent = Substitute(op->extent, {{op->loop_var, Simplify(op->loop_var + op->min)}}); + Stmt body = Substitute(op->body, {{op->loop_var, Simplify(op->loop_var + op->min)}}); + body = CanonicalSimplify(body); + body = Mutate(body); + stmt = For::make(op->loop_var, 0, extent, op->for_type, op->device_api, body); + } + return stmt; + } + + private: + bool in_reschedule_{false}; +}; + +Stmt FixLoopMin(Stmt stmt) { + stmt = LoopMinFixer().Mutate(stmt); + return stmt; +} + /*! * \brief Poly entry */ @@ -57,6 +98,9 @@ class Poly { TIMER_START; stmt_ = scop_->GenHalide(sched); TIMER_SHOW("GenHalide", std::string(is_spec_gemm ? "_specgemm" : "")); + if (scop_->info_.user_config_.GetTarget() == TARGET_CCE) { + stmt_ = FixLoopMin(stmt_); + } if (is_dynamic) stmt_ = RestoreCombinedParams(stmt_, scop_->info_); diff --git a/src/poly/poly_util.h b/src/poly/poly_util.h index 356060a9a5ec21f0cb23b4de8f871029d11b7cea..6c02452238184a270dd071e4fd8fd27c129dd166 100644 --- a/src/poly/poly_util.h +++ b/src/poly/poly_util.h @@ -428,6 +428,7 @@ constexpr auto REALIZE = "realize"; constexpr auto CONV_GEMM = "conv_gemm"; constexpr auto CONV_KHKW_OUTER = "conv_khkw_outer"; constexpr auto FUSE_VECTOR = "fuse_vector"; +constexpr auto RESCHEDULE = "reschedule"; constexpr auto MULTICORE_COINCIDENT = "multicore_coincident_"; constexpr auto ALLOC_C = "alloc_C"; diff --git a/src/poly/reduce_manager.cc b/src/poly/reduce_manager.cc index 6eff4e8ab058c7b7989b4fc01fa20607712abc45..047d796a554549c867f16198902094dbe3c619c1 100644 --- a/src/poly/reduce_manager.cc +++ b/src/poly/reduce_manager.cc @@ -146,6 +146,19 @@ isl::schedule_node ReduceManager::SetAllCoincident(const isl::schedule_node &ori return band_node; } +// Loop distribution by serializing sccs +isl::schedule ReduceManager::RescheduleSerializeSccs(const isl::union_set &active_domain, const bool need_dist) const { + auto ctx = pass_info_.constraints_.ctx(); + auto wasSerializingSccs = isl_options_get_schedule_serialize_sccs(ctx.get()); + isl_stat status = isl_options_set_schedule_serialize_sccs(ctx.get(), static_cast(need_dist)); + CHECK(status == isl_stat_ok); + auto constraints = pass_info_.constraints_.intersect_domain(active_domain); + auto new_schedule = constraints.compute_schedule(); + status = isl_options_set_schedule_serialize_sccs(ctx.get(), wasSerializingSccs); + CHECK(status == isl_stat_ok); + return new_schedule; +} + isl::schedule_node ReduceManager::RescheduleForReduce(const isl::schedule_node &orig_node) { auto node = orig_node; size_t start_depth = node.get_tree_depth(); @@ -162,7 +175,6 @@ isl::schedule_node ReduceManager::RescheduleForReduce(const isl::schedule_node & } int child_number = static_cast(node.n_children()); - Reschedule reschedule(scop_info_, pass_info_); for (int i = 0; i < child_number; ++i) { auto child_node = node.child(i); if (!child_node.isa() || !child_node.has_children()) { @@ -180,7 +192,7 @@ isl::schedule_node ReduceManager::RescheduleForReduce(const isl::schedule_node & } auto active_domain = child_node.as().get_filter(); - auto after_reschedule_node = reschedule.RescheduleSerializeSccs(active_domain, false).get_root(); + auto after_reschedule_node = RescheduleSerializeSccs(active_domain, false).get_root(); after_reschedule_node = after_reschedule_node.has_children() ? after_reschedule_node.child(0) : after_reschedule_node; diff --git a/src/poly/reduce_manager.h b/src/poly/reduce_manager.h index 25f26ece5b93983e37abd5b942e59a0764c42f87..21c269a1ed96238b840b72b91d1ca7045d1ef6fc 100644 --- a/src/poly/reduce_manager.h +++ b/src/poly/reduce_manager.h @@ -50,6 +50,7 @@ class ReduceManager { isl::schedule_node ReorderStatements(const isl::schedule_node &node, isl::union_set before, isl::union_set after); bool AreSequentialStatements(isl::union_set first_statements, isl::union_set second_statements, isl::union_map dependences); + isl::schedule RescheduleSerializeSccs(const isl::union_set &active_domain, const bool need_dist) const; // After splitting the reduce fusion operator, reschedule all the filters, mainly because the reduce statement // affects other statements after the fusion. isl::schedule_node RescheduleForReduce(const isl::schedule_node &orig_node); diff --git a/src/poly/schedule_pass/label_realize_out_position.cc b/src/poly/schedule_pass/label_realize_out_position.cc index fc5349c2e17871d68dd56b1c48e5f33f55080fa3..0819bfe2b9a87fb1e2a1f18f994280f988ba7be5 100644 --- a/src/poly/schedule_pass/label_realize_out_position.cc +++ b/src/poly/schedule_pass/label_realize_out_position.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2020-2022 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -27,9 +27,8 @@ isl::schedule LabelRealizeOutPosition::Run(isl::schedule sch_label) { if (REALIZE_BUF == node.as().get_id().get_name() && node.child(0).isa()) { auto band = node.child(0).as(); - unsigned pos = UINT_MAX; - auto updatePos_ = [&pos](isl::schedule_node node) -> isl::schedule_node { + auto UpdatePos = [&pos](isl::schedule_node node) -> isl::schedule_node { if (node.isa()) { node = node.get_child(0); if (node.isa()) { @@ -47,7 +46,9 @@ isl::schedule LabelRealizeOutPosition::Run(isl::schedule sch_label) { return node; }; - static_cast(band.map_descendant_bottom_up(updatePos_)); + if (!node.parent().isa()) { + static_cast(band.map_descendant_bottom_up(UpdatePos)); + } for (unsigned i = 0; i < band.n_member(); ++i) { if (!band.member_get_coincident(i)) { diff --git a/src/poly/schedule_pass/mark_outer_most.cc b/src/poly/schedule_pass/mark_outer_most.cc index dd6dd39f6c171818aee9566bb843eaa7f055114b..b0462c2978d814d55ed2bf6f51a9cea8b86eb447 100644 --- a/src/poly/schedule_pass/mark_outer_most.cc +++ b/src/poly/schedule_pass/mark_outer_most.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2020-2022 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -84,6 +84,9 @@ bool MarkOuterMost::SingleMulticoreBand(isl::schedule_node &outer_band) { auto filter = node.as(); if (filter.has_children()) { auto node0 = filter.get_child(0); + while (node0.isa()) { + node0 = node0.get_child(0); + } if (node0.isa() && node0.as().n_member() >= 1) { multi_core_band++; } @@ -106,12 +109,17 @@ bool MarkOuterMost::InjectMulticoreToSchedule(isl::schedule_node &outer_band) { isl::schedule_node node = outer_band.get_child(i); if (node.isa()) { auto filter = node.as(); - if (filter.has_children() && filter.get_child(0).isa() && - filter.get_child(0).as().n_member() >= 1) { - isl::schedule_node tmp = filter.get_child(0); - bool injected = InjectMulticoreToBand(tmp); - outer_band = ObtainSequenceOrSetNodeAncestor(tmp); - return injected; + if (filter.has_children()) { + auto node0 = filter.get_child(0); + while (node0.isa()) { + node0 = node0.get_child(0); + } + if (node0.isa() && + node0.as().n_member() >= 1) { + bool injected = InjectMulticoreToBand(node0); + outer_band = ObtainSequenceOrSetNodeAncestor(node0); + return injected; + } } } } diff --git a/src/poly/schedule_pass/reorder_inner_band.cc b/src/poly/schedule_pass/reorder_inner_band.cc index a83c048f4d8a8ecdffbc33fb69deb9ae4dc78917..b4674bb3ca7c0d7789ac3160f8ee5b8c887660ff 100644 --- a/src/poly/schedule_pass/reorder_inner_band.cc +++ b/src/poly/schedule_pass/reorder_inner_band.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2020-2022 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -160,8 +160,8 @@ isl::schedule ReorderInnerBand::Run(isl::schedule curr_schedule) { isl::schedule_node root = curr_schedule.get_root(); auto cond_vars = cond_vars_; root = root.map_descendant_bottom_up([&cond_vars](const isl::schedule_node &node) -> isl::schedule_node { - bool is_leaf_band = - node.as() && node.n_children() == 1 && node.first_child().as(); + bool is_leaf_band = (node.as() && (node.n_children() == 1) + && !node.is_subtree_anchored() && node.first_child().as()); if (!is_leaf_band) return node; auto band = node.as(); diff --git a/src/poly/schedule_pass/reschedule.cc b/src/poly/schedule_pass/reschedule.cc index eeda0ac5fe7ec2c2a891c3972f2a49256bc73406..31b28b5773459cd789aa2518629a8b32b093015f 100644 --- a/src/poly/schedule_pass/reschedule.cc +++ b/src/poly/schedule_pass/reschedule.cc @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2020-2022 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -23,810 +23,85 @@ namespace akg { namespace ir { namespace poly { -bool Reschedule::IsL1OrUbMark(const isl::schedule_node &node) { - if (node.isa()) { - auto tag = node.as().get_id().get_name(); - if (tag == REALIZE_C1 || tag == REALIZE_BUF || tag == REALIZE_C1BUFC1) return true; - } - return false; -} - -bool Reschedule::IsL0OrUbL0Mark(const isl::schedule_node &node) { - if (node.isa()) { - auto tag = node.as().get_id().get_name(); - if (tag == REALIZE_C0 || tag == REALIZE_BUFC0 || tag == REALIZE_BUFC1) return true; - } - return false; -} - -/* Collect tile band data. - * - * The input node may either be an L1/UB tile band or an L0 tile band. - * - * First check whether "node" is a band node, return if not. Then set "l0_tiled" - * if "node" is marked by "realize_L0" or "realize_UBL0". Save the ast build - * options to "l0_build_options_" since we need to retrieve it after building - * the whole schedule. - */ -void Reschedule::CollectTileBandData(const isl::schedule_node &node, struct TileBandData *tile_band_data) { - CHECK(node.isa()) << "has to be a band node" << std::endl; - - tile_band_data->l0_tiled = false; - tile_band_data->mark = node.parent(); - tile_band_data->ast_build_options = node.as().get_ast_build_options(); - - if (tile_band_data->mark.isa()) { - auto marktag = tile_band_data->mark.as().get_id().get_name(); - if (marktag == REALIZE_C0 || marktag == REALIZE_BUFC0 || marktag == REALIZE_BUFC1) { - tile_band_data->l0_tiled = true; - l0_build_options_.push_back(tile_band_data->ast_build_options); - } else if (marktag == REALIZE_C1 || marktag == REALIZE_BUF || marktag == REALIZE_C1BUFC1) { - l1_build_options_.push_back(tile_band_data->ast_build_options); - } - tile_band_data->gemm_mark = node.parent().parent(); - } - - tile_band_data->n_member = node.as().n_member(); - tile_band_data->mupa = node.as().get_partial_schedule(); - tile_band_data->permutable = node.as().get_permutable(); - for (int i = 0; i < static_cast(tile_band_data->n_member); ++i) - tile_band_data->coincident.push_back(node.as().member_get_coincident(i)); -} - -/* Retrieve tile band data for "node". In particular, the ast build - * options could be retrieved directly when "node" is an L1/UB tile - * band, since the schedule tree is not anchored. - */ -isl::schedule_node Reschedule::RetrieveTileBandData(isl::schedule_node node, struct TileBandData *tile_band_data) { - node = node.insert_partial_schedule(tile_band_data->mupa); - CHECK(node.isa()) << "node has to be a band node" << std::endl; - node = node.as().set_permutable(static_cast(tile_band_data->permutable)); - for (int i = 0; i < static_cast(tile_band_data->n_member); ++i) - node = node.as().member_set_coincident(i, static_cast(tile_band_data->coincident[i])); - - if (tile_band_data->mark.isa()) { - auto marktag = tile_band_data->mark.as().get_id().get_name(); - node = node.insert_mark(tile_band_data->mark.as().get_id()); - if (marktag == REALIZE_C0) { - if (tile_band_data->gemm_mark.isa()) { - auto gemmtag = tile_band_data->gemm_mark.as().get_id().get_name(); - if (gemmtag == CONV_GEMM) { - node = node.insert_mark(tile_band_data->gemm_mark.as().get_id()); - } - } - } - } - - return node; -} - -isl::schedule_node Reschedule::RetrieveNodeList(isl::schedule_node node, - const std::vector &node_list) { - auto n = static_cast(node_list.size()); - if (!n) return node; - - for (unsigned int i = n; i >= 1; i--) { - auto candidate = node_list[i - 1]; - if (candidate.isa()) { - if (isl_schedule_node_is_subtree_anchored(node.get())) { - LOG(INFO) << "subtree of the schedule node depends on outer band, cannot insert partial schedule"; - continue; - } - auto mupa = candidate.as().get_partial_schedule(); - auto permutable = candidate.as().get_permutable(); - auto num = candidate.as().n_member(); - std::vector coincident; - for (int j = 0; j < static_cast(num); ++j) { - coincident.push_back(candidate.as().member_get_coincident(j)); - } - - node = node.insert_partial_schedule(mupa); - node = node.as().set_permutable(static_cast(permutable)); - for (int j = 0; j < static_cast(num); ++j) { - node = node.as().member_set_coincident(j, static_cast(coincident[j])); - } - coincident.clear(); - } else if (candidate.isa()) { - auto id = candidate.as().get_id(); - node = node.insert_mark(id); - } else if (candidate.isa()) { - auto context = candidate.as().get_context(); - node = node.insert_context(context); - } else if (candidate.isa()) { - auto guard = candidate.as().get_guard(); - node = node.insert_guard(guard); - } else { - LOG(WARNING) << "invalid node in node_list!!! " << candidate; - } - } - - return node; -} - -isl::schedule_node Reschedule::RetrieveAstBuildOptions(isl::schedule_node node, const isl::union_set &options) { - node = GetOuterBand(node); - if (node.isa()) { - node = node.as().set_ast_build_options(options); - return node; - } - return node; -} - -// Get the order of statement IDs in the leaf filter nodes of a sequence node. -std::vector GetStmtOrderInSequenceNode(const isl::schedule_node &node) { - std::vector filter_order; - if (!node.isa()) return filter_order; - - auto sequence = node.as(); - // check whether all children of the sequence node are point bands (i.e. leaf filter nodes) - for (int pos = 0; pos < static_cast(sequence.n_children()); ++pos) { - if (!node.child(pos).isa()) return filter_order; - auto filter = node.child(pos).as(); - // check if filter node is a point band - if (filter.n_children() >= 2) return filter_order; - if (filter.n_children() == 1 && filter.first_child().has_children()) return filter_order; - - filter.get_filter().foreach_set([&](const isl::set &set) -> void { - auto stmt_id = set.get_tuple_id(); - filter_order.push_back(stmt_id); - }); - } - return filter_order; -} - -// Get the order of statement IDs in the leaf filter nodes of a schedule tree. -std::vector GetStmtTotalOrdering(const isl::schedule_node &node) { - std::vector stmt_order; - node.foreach_descendant_top_down([&](const isl::schedule_node &node) -> bool { - auto filter_order = GetStmtOrderInSequenceNode(node); - for (const auto &it : filter_order) stmt_order.push_back(it); - return true; - }); - return stmt_order; -} - -/* Get the order of statement IDs in point bands of each leaf sequence node of a schedule tree. - * The result represents a vector of sequence nodes, and each sequence node has a vector of statement IDs. - */ -std::vector> GetStmtPartialOrdering(const isl::schedule_node &node) { - std::vector> sequence_nodes; - node.foreach_descendant_top_down([&](const isl::schedule_node &node) -> bool { - auto filter_order = GetStmtOrderInSequenceNode(node); - if (!filter_order.empty()) sequence_nodes.push_back(filter_order); - return true; - }); - return sequence_nodes; -} - -/* Reassign values of the map as a permutation of the keys of the map. - * The permutation ordering is determined by the ordering of the values (allow duplicates). - * Example: - * Input: 1 -> 0, 5 -> 1, 7 -> 1, 3 -> 2, 9 -> 3, 8 -> 4 - * Output: 1 -> 1, 5 -> 3, 7 -> 5, 3 -> 7, 9 -> 8, 8 -> 9 - */ -void ConstructNewOrder(std::unordered_map &map) { - std::set key_order; - std::multimap reverse_map; - for (const auto &it : map) { - key_order.insert(it.first); - reverse_map.insert(std::make_pair(it.second, it.first)); - } - std::unordered_map new_order; - auto key_order_it = key_order.begin(); - for (const auto &it : reverse_map) { - size_t new_key = it.second; - CHECK(key_order_it != key_order.end()); - size_t old_key = *key_order_it++; - new_order[new_key] = old_key; - } - map = new_order; -} - -// Restore the order of filter nodes. -isl::schedule_node RestoreOrderOfFilters(const isl::schedule_node &node, const std::vector &order) { - std::unordered_map id_to_order_map; - for (auto i = 0u; i < order.size(); ++i) { - id_to_order_map[order[i]] = i; - } - // map from original child position to new child position - std::unordered_map node_order_map; - for (int i = 0; i < static_cast(node.n_children()); ++i) { - if (!node.get_child(i).isa()) return node; - auto filter_node = node.get_child(i).as(); - filter_node.get_filter().foreach_set([&](const isl::set &set) -> void { - auto it = id_to_order_map.find(set.get_tuple_id()); - if (it == id_to_order_map.end()) return; - size_t order = it->second; - if (node_order_map.count(i) == 0) { - node_order_map[i] = order; - } else { - node_order_map[i] = std::min(node_order_map[i], order); - } - }); - } - - ConstructNewOrder(node_order_map); - return ReorderFilters(node, node_order_map); -} - -/* Restore the order of filter nodes after reschedule. - * "orders" represents a vector of ordering groups, and each group has a vector of statement IDs. - * Ordering of filter nodes within each group should be restored. - */ -isl::schedule_node RestoreOrderOfSequenceNodes(isl::schedule_node node, - const std::vector> &orders) { - for (const auto &order : orders) { - node = RestoreOrderOfFilters(node, order); - } - return node; -} - -bool Reschedule::ValidateReorderedSchedule(const isl::schedule &new_schedule) { - isl::union_map new_dependence = ComputeAllDependences(new_schedule, scop_info_.analysis_result_.GetReads(), - scop_info_.analysis_result_.GetWrites()); - bool is_valid = new_dependence.is_subset(pass_info_.dependences_); +bool Reschedule::ValidateSchedule(const isl::schedule &sch) { + auto sched = sch.get_map(); + auto psched = isl_union_map_lex_lt_union_map(sched.copy(), sched.copy()); + sched = isl::manage(psched); + auto identity = pass_info_.dependences_.domain().identity(); + auto dependences = pass_info_.dependences_.subtract(identity); + bool is_valid = dependences.is_subset(sched); return is_valid; } -isl::schedule_node Reschedule::TryRestoreStmtOrder(const isl::schedule_node &node, - const std::vector &filter_total_order, - const std::vector> &filter_partial_order) { - if (filter_total_order.empty()) return node; - if (filter_partial_order.empty()) return node; - - auto reordered_node = RestoreOrderOfFilters(node, filter_total_order); - if (ValidateReorderedSchedule(reordered_node.get_schedule())) { - LOG(INFO) << "reschedule: restored total order of point bands in sequence nodes."; - return reordered_node; - } else { - reordered_node = RestoreOrderOfSequenceNodes(node, filter_partial_order); - if (ValidateReorderedSchedule(reordered_node.get_schedule())) { - LOG(INFO) << "reschedule: restored partial order of point bands in sequence nodes."; - return reordered_node; - } - } - LOG(INFO) << "reschedule: dependences changed, do not restore order of point bands."; - return node; -} - -// Loop distribution by serializing sccs -isl::schedule Reschedule::RescheduleSerializeSccs(const isl::union_set &active_domain, const bool need_dist) const { - auto ctx = pass_info_.constraints_.ctx(); - auto wasSerializingSccs = isl_options_get_schedule_serialize_sccs(ctx.get()); - isl_stat status = isl_options_set_schedule_serialize_sccs(ctx.get(), static_cast(need_dist)); +isl::schedule_node Reschedule::RecomputeScheduleTree(const isl::schedule_node &node) { + auto ctx = node.ctx(); + auto origin_scc = isl_options_get_schedule_serialize_sccs(ctx.get()); + isl_stat status = isl_options_set_schedule_serialize_sccs(ctx.get(), 1); CHECK(status == isl_stat_ok); - auto constraints = pass_info_.constraints_.intersect_domain(active_domain); - auto new_schedule = constraints.compute_schedule(); - status = isl_options_set_schedule_serialize_sccs(ctx.get(), wasSerializingSccs); + auto pnode = isl_schedule_node_schedule(node.copy(), pass_info_.constraints_.copy()); + auto new_node = isl::manage(pnode); + status = isl_options_set_schedule_serialize_sccs(ctx.get(), origin_scc); CHECK(status == isl_stat_ok); - return new_schedule; -} - -// Save ordering of filter children, and restore the ordering after reschedule -isl::schedule_node Reschedule::ReschedulePreserveFilterOrder(const isl::schedule_node &node, - const isl::union_set &active_domain, - const bool need_dist) { - auto filter_total_order = GetStmtTotalOrdering(node); - auto filter_partial_order = GetStmtPartialOrdering(node); - - auto new_schedule = RescheduleSerializeSccs(active_domain, need_dist); - auto new_node = GetOuterBand(new_schedule.get_root()); - // Retrieve point band if a sequence/set node is introduced - if (IsSequenceOrSet(new_node)) { - return TryRestoreStmtOrder(new_node, filter_total_order, filter_partial_order); - } else { - return new_node; - } -} - -// Save partial schedule, permutable and coincident attrs of a band. -PointBandInfo Reschedule::SavePointBand(const isl::schedule_node &node) { - PointBandInfo point_band_info; - CHECK(node.isa()); - auto band = node.as(); - point_band_info.mupa = band.get_partial_schedule(); - point_band_info.permutable = band.get_permutable(); - point_band_info.n_member = band.n_member(); - for (int k = 0; k < static_cast(point_band_info.n_member); ++k) { - point_band_info.coincident.push_back(band.member_get_coincident(k)); - } - isl::union_pw_aff_list upa_list = point_band_info.mupa.get_union_pw_aff_list(); - for (unsigned int i = 0; i < upa_list.size(); ++i) { - isl::union_pw_aff upa = upa_list.get_at(i); - isl::pw_aff_list pa_list = upa.get_pw_aff_list(); - std::unordered_map pa_map; - for (unsigned int j = 0; j < pa_list.size(); ++j) { - isl::pw_aff pa = pa_list.get_at(j); - std::string pa_name = pa.domain().get_tuple_name(); - pa_map.insert(std::make_pair(pa_name, pa)); - } - point_band_info.pa_list_map[i] = pa_map; - } - return point_band_info; -} - -/* Restore saved partial schedule, permutable and coincident attrs of a band. - * Input must be a band node. - */ -isl::schedule_node Reschedule::SetPointBandInfo(isl::schedule_node node, const PointBandInfo &point_band_info) { - isl::multi_union_pw_aff mupa_origin = node.as().get_partial_schedule(); - isl::union_pw_aff_list upa_list_origin = mupa_origin.get_union_pw_aff_list(); - isl::pw_aff_list pa_list_origin = upa_list_origin.get_at(static_cast(0)).get_pw_aff_list(); - std::vector pa_name_vector; - for (size_t i = 0; i < pa_list_origin.size(); ++i) { - std::string pa_name = pa_list_origin.get_at(i).domain().get_tuple_name(); - pa_name_vector.emplace_back(pa_name); - } - isl::union_pw_aff_list upa_list_old = point_band_info.mupa.get_union_pw_aff_list(); - isl::union_pw_aff_list upa_list_new = isl::union_pw_aff_list(); - for (unsigned int i = 0; i < upa_list_old.size(); ++i) { - auto pa_map = point_band_info.pa_list_map.at(i); - isl::union_pw_aff upa = isl::union_pw_aff(); - for (size_t j = 0; j < pa_name_vector.size(); ++j) { - auto it = pa_map.find(pa_name_vector.at(j)); - if (it != pa_map.end()) { - isl::union_pw_aff upa_temp = isl::union_pw_aff(it->second); - if (upa.is_null()) { - upa = isl::union_pw_aff(upa_temp); - } else { - upa = upa.union_add(upa_temp); - } - } - } - if (upa_list_new.is_null()) { - upa_list_new = isl::union_pw_aff_list(upa); - } else { - upa_list_new = upa_list_new.add(upa); - } - } - isl::space space_old = point_band_info.mupa.get_space(); - isl::multi_union_pw_aff mupa_new = isl::multi_union_pw_aff(space_old, upa_list_new); - node = node.del(); - node = node.insert_partial_schedule(mupa_new); - auto n = node.as().n_member(); - node = node.as().set_permutable(static_cast(point_band_info.permutable)); - for (unsigned int j = 0; j < point_band_info.n_member && j < n; ++j) { - node = node.as().member_set_coincident(static_cast(j), - static_cast(point_band_info.coincident[j])); - } - return node; -} - -/* Restore saved partial schedule, permutable and coincident attrs of each band in the node. - * Input may be a sequence, set or band node. - */ -isl::schedule_node Reschedule::RestorePointBandInfo(isl::schedule_node node, const PointBandInfo &point_band_info) { - // Retrieve point band if a sequence/set node is introduced - if (IsSequenceOrSet(node)) { - // Update point band for each scc filter - for (auto i = 0u; i < node.n_children(); ++i) { - node = node.get_child(i); - node = GetOuterBand(node); - if (node.isa()) { - while (!node.isa()) node = node.parent(); - node = node.parent(); - continue; - } - node = SetPointBandInfo(node, point_band_info); - node = node.parent().parent(); - } - } else { - node = SetPointBandInfo(node, point_band_info); - } - return node; -} - -/* Reschedule point band with minimal fusion strategy for "root". - * "root" should be either a domain or filter node. - * - * "need_dist" is used to indicate whether reschedule is needed. - * In particular, only operators bypassing L0, i.e., those vector - * operators, should be rescheduled. Operators like convolution, - * multiplication, etc. should not be rescheduled. - * - * Rescheduling starts by checking the input node type, followed - * by a computation of the active domain for the given "root". - * In particular, the active domain of a filter node should be - * a subset of the whole schedule. - * - * First, try to obtain the outermost band node. It may either - * be a sequence/set node or a tile band node. If "node" refers - * to a sequence/set node, reschedule each filter node individually - * and construct a new schedule via a sequence/set node. If "node" - * moves to a tile band, record L1/UB tile band and its mark node. - * They should be retrieved to the generated schedule after - * rescheduling, together with its permutable, coincident, options, - * etc. - * - * When traversing from "root" to outermost band node, there may - * be some additional nodes that should be reserved in "node_list". - * Such nodes may be a band (in the case of node split during - * tiling), context, guard, mark (not L1/UB/L0/UBL0 mark) node. All - * these nodes should be retrieved after rescheduling. - * - * Then move down to the child node of L1/UB tile band. As - * convolution operator group and vector operator group branch - * into different buffers, the child node may be a sequence/set - * node. - * - * Reschedule each filter node of a given sequence/set node and - * construct a schedule with a sequence/set node by combining - * all schedules of the filter nodes. For the ops in the conv - * group, we reschedule it by maximizing fusion; for those ops - * in vector group, each operator should be distributed. Such - * groups are differentiated by checking the target local buffer, - * i.e., L0 (for conv group) or UBL0 (for vector group). - * - * L0 tile band may be reached by moving down from either a L1/UB - * tile band or a filter node. Record L1/UB tile band and its mark - * node. They should also be retrieved to the generated schedule - * after rescheduling, together with its permutable, coincident, - * options, etc. - * - * L0 tile may not happen when the input ops are not convolution-like - * ops. In such cases, one may reach point band directly from L1 - * tile band. - * - * Point band may be reached by moving down from L0 tile band. - * Record point band information for later reclaiming. Again, - * permutable, coincident, options, etc. should all be recovered. - * - * Try to reschedule the point band by serializing all sccs in - * the active domain when "need_dist" is true, with schedule - * constraints updated by intersecting with the active domain. The - * scheduling options should be first recorded and then recovered for - * the consistency along tile bands. - * - * Retrieve the original point band by intersecting each filter - * of the generated schedule. The original L0 tile band may also - * be retrieved after updating the introduced sequence/set node. - * Also, the L0 tile mark node should also be recovered if any. - * - * The L1/UB tile band and its mark node should be added to the - * generated schedule. "L1_tile_mupa" and "L1_mark" would be used - * to record L0 tile information when given a filter node. - * - * The saved "node_list" may be retrieved to the new schedule tree - * if any. - * - * Finally, the L1/UB AST build options may be introduced to the - * generated schedule, since one or more nodes in "node_list" may - * govern the L1/UB tile bands and/or L0/UBL0 tile bands. One may - * come across with anchored subtrees if the options were introduced - * before retrieving nodes in "node_list". - * - * Return the root of the schedule after rescheduling. + return new_node; +} + +/* Reschedule schedule tree with serialize sccs for "root". + * Currently, two patterns of rescheduling are implemented. + * pattern 1: + * Mark(realize_UB or realize_UBL0) + * | + * Tile band + * | + * Point band <--- reschedule position + * pattern 2: + * Mark(realize_L1) + * | + * Band + * | + * Mark(realize_UB or realize_UBL0) + * | + * Band <--- reschedule position + * Return the node of the schedule after rescheduling. */ isl::schedule_node Reschedule::RescheduleSchTree(const isl::schedule_node &root) { - bool need_dist = true; - // Return "root" if given an inappropriate node - if (!root.isa() && !root.isa()) return root; - - // Compute the active domain - auto active_domain = root.isa() ? root.as().get_domain() - : root.as().get_filter(); - // Save L1/UB band and mark node - auto node = GetOuterBand(root); - // Save all nodes along the path from root to L1/UB - if (!IsL1OrUbMark(node.parent()) && !IsL0OrUbL0Mark(node.parent())) { - node = root.get_child(0); - while (!IsL1OrUbMark(node) && !IsL0OrUbL0Mark(node) && !IsSequenceOrSet(node) && - !node.isa()) { - node_list_0_.push_back(node); - node = node.get_child(0); - } - if (IsL1OrUbMark(node) || IsL0OrUbL0Mark(node)) node = node.get_child(0); - } - - // Construct the schedule recursively - // when encountered a sequence/set node - if (IsSequenceOrSet(node)) { - // "schedule" is used to combine the schedules of all filters - isl::schedule schedule; - for (auto i = 0u; i < node.n_children(); ++i) { - auto child = node.get_child(i); - child = RescheduleSchTree(child); - if (!child.isa()) return root; - if (i == 0) { - schedule = child.get_schedule(); - } else { - if (node.isa()) { - schedule = schedule.sequence(child.get_schedule()); - } else { - schedule = schedule.set(child.get_schedule()); + auto fn = [&](isl::schedule_node node) -> isl::schedule_node { + if (node.isa()) { + auto tag = node.as().get_id().get_name(); + if ((tag == "realize_UBL0") || (tag == "realize_UB")) { + auto node_m = node.insert_mark(RESCHEDULE); + node_m = node_m.get_child(0).get_child(0); + if (node_m.isa()) { + if ((node.parent().isa()) + && (node.parent().parent().isa()) + && (node.parent().parent().as().get_id().get_name() == "realize_L1")) { + node_m = RecomputeScheduleTree(node_m); + } else { + node_m = node_m.get_child(0); + node_m = RecomputeScheduleTree(node_m); + node_m = node_m.parent(); + } } + node = node_m.parent().parent(); } } - node = GetOuterBand(schedule.get_root()); - // insert the original L1/UB band and its mark - node = RetrieveNodeList(node, node_list_0_); - - // retrieve ast build options for each filter - // The ast build options of L0/UBL0 have to be retrieved - // after building the whole schedule tree, since it may - // introduce an anchored subtree they were retrieved - // before constructing schedule by sequence/set. - node = GetOuterBand(node); - if (IsSequenceOrSet(node)) { - for (unsigned int i = 0; i < static_cast(node.n_children()) && i < l1_build_options_.size(); ++i) { - node = GetOuterBand(node.get_child(static_cast(i))); - if (node.as()) { - node = node.as().set_ast_build_options(l1_build_options_[i]); - } - node = node.parent(); - while (!node.isa()) node = node.parent(); - node = node.parent(); - } - } - - return node.get_schedule().get_root(); - } - - auto scalar_filter = [](const isl::schedule_node &node) { - if (!node.isa()) { - return false; - } - - auto filter = node.as(); - isl::union_set sets = filter.get_filter(); - bool scalar = true; - sets.foreach_set([&scalar](const isl::set &s) -> void { - if (s.n_dim() > 0) { - scalar = false; - } - }); - return scalar; + return node; }; - - if (node.isa() && scalar_filter(node.parent())) { - std::vector node_list_temp; - auto temp_node = node.parent(); - while (!temp_node.is_equal(root)) { - node_list_temp.push_back(temp_node); - temp_node = temp_node.parent(); - } - std::reverse(node_list_temp.begin(), node_list_temp.end()); - node = ReschedulePreserveFilterOrder(node, active_domain, need_dist); - node = RetrieveNodeList(node, node_list_temp); - return node.get_schedule().get_root(); - } - - if (!node.isa()) return root; - - struct TileBandData L1_Tile_Data; - CollectTileBandData(node, &L1_Tile_Data); - - if (root.isa()) { - if (IsL0OrUbL0Mark(L1_Tile_Data.mark)) { - auto L1tag = L1_Tile_Data.mark.as().get_id().get_name(); - if (L1tag == REALIZE_C0) { - need_dist = false; - } - } - } - - // Move down to the child of L1/UB band and save all nodes along - node = node.get_child(0); - while (!node.isa() && !node.isa() && !IsL0OrUbL0Mark(node) && - !IsSequenceOrSet(node)) { - node_list_1_.push_back(node); - node = node.get_child(0); - } - if (IsL0OrUbL0Mark(node)) node = node.get_child(0); - // Construct the schedule recursively - // when encountered a sequence/set node - if (IsSequenceOrSet(node)) { - // "schedule" is used to combine the schedules of all filters - isl::schedule schedule; - for (auto i = 0u; i < node.n_children(); ++i) { - auto child = node.get_child(i); - child = RescheduleSchTree(child); - if (!child.isa()) return root; - if (i == 0) { - schedule = child.get_schedule(); - } else { - if (node.isa()) { - schedule = schedule.sequence(child.get_schedule()); - } else { - schedule = schedule.set(child.get_schedule()); - } - } - } - node = GetOuterBand(schedule.get_root()); - - // retrieve all nodes from L1/UB to L0/UBL0 - node = RetrieveNodeList(node, node_list_1_); - - // insert the original L1/UB band and its mark - node = RetrieveTileBandData(node, &L1_Tile_Data); - - // set ast build options - node = RetrieveAstBuildOptions(node, l1_build_options_[0]); - - // retrieve ast build options for each filter - // The ast build options of L0/UBL0 have to be retrieved - // after building the whole schedule tree, since it may - // introduce an anchored subtree they were retrieved - // before constructing schedule by sequence/set. - node = GetOuterBand(node).get_child(0); - if (IsSequenceOrSet(node)) { - for (unsigned int i = 0; i < node.n_children() && i < static_cast(l0_build_options_.size()); ++i) { - node = GetOuterBand(node.get_child(i)); - node = - node.as().set_ast_build_options(l0_build_options_[static_cast(i)]); - node = node.parent(); - while (!node.isa()) node = node.parent(); - node = node.parent(); - } - } - - return node.get_schedule().get_root(); - } - - if (node.isa()) { - std::vector node_list_temp; - auto temp_node = node.parent(); - while (!temp_node.is_equal(root)) { - node_list_temp.push_back(temp_node); - temp_node = temp_node.parent(); - } - std::reverse(node_list_temp.begin(), node_list_temp.end()); - node = ReschedulePreserveFilterOrder(node, active_domain, need_dist); - node = RetrieveNodeList(node, node_list_temp); - return node.get_schedule().get_root(); - } - - if (!node.isa()) return root; - - // Save L0 band and mark node, if any - // "l0_tiled" is used to check L0 tiled or not - struct TileBandData L0_Tile_Data; - CollectTileBandData(node, &L0_Tile_Data); - - // Move down to point band if L0 tiled - if (L0_Tile_Data.l0_tiled) { - auto L0tag = L0_Tile_Data.mark.as().get_id().get_name(); - if (L0tag == REALIZE_C0) { - return root; - } - // Move down to the child of L0/UBL0 band - // and save all nodes along - node = node.get_child(0); - while (!node.isa() && !IsSequenceOrSet(node) && !node.isa()) { - node_list_2_.push_back(node); - node = node.get_child(0); - } - if (!node.isa()) { - if (IsSequenceOrSet(node)) { - LOG(WARNING) << "reschedule of sequence/set node under L0/UBL0 is still ongoing!"; - } - return root; - } - } - - // Save point band - auto point_band_info = SavePointBand(node); - - // core operation of reschedule - node = ReschedulePreserveFilterOrder(node, active_domain, need_dist); - - node = RestorePointBandInfo(node, point_band_info); - - // Retrieve L0 tile band and mark node if L0 tiled - if (L0_Tile_Data.l0_tiled) { - node = RetrieveNodeList(node, node_list_2_); - node = RetrieveTileBandData(node, &L0_Tile_Data); - } - - // retrieve all nodes from L1/UB to L0/UBL0 - node = RetrieveNodeList(node, node_list_1_); - - // Retrieve L1/UB tile band and its mark - node = RetrieveTileBandData(node, &L1_Tile_Data); - - // Retrieve all saved nodes along the path to L1/UB band, if any - node = RetrieveNodeList(node, node_list_0_); - - // Reset ast build options - while (!IsL1OrUbMark(node) && !IsL0OrUbL0Mark(node) && !IsSequenceOrSet(node) && - !node.isa()) { - node = node.get_child(0); - } - if (IsL1OrUbMark(node)) node = RetrieveAstBuildOptions(node, l1_build_options_[0]); - if (IsSequenceOrSet(node)) { - for (unsigned int i = 0; i < static_cast(node.n_children()) && i < l1_build_options_.size(); ++i) { - node = node.get_child(static_cast(i)); - node = RetrieveAstBuildOptions(node, l1_build_options_[i]); - while (!node.isa()) node = node.parent(); - node = node.parent(); - } + auto node = root.map_descendant_bottom_up(fn); + if (ValidateSchedule(node.get_schedule())) { + LOG(INFO) << "Schedule tree is valid, ^_^"; + } else { + LOG(WARNING) << "Schedule tree is invalid, pls check the correctness!"; } - return node.get_schedule().get_root(); -} - -static isl::schedule_node IslScheduleNodeReplaceChild(const isl::schedule_node &old_node, int pos, - const isl::schedule_node &child_node) { - auto tree = isl_schedule_node_get_tree(old_node.get()); - CHECK(tree != nullptr); - auto new_subtree = isl_schedule_node_get_tree(child_node.get()); - CHECK(new_subtree != nullptr); - auto new_tree = isl_schedule_tree_replace_child(tree, pos, new_subtree); - CHECK(new_tree != nullptr); - auto new_node = isl_schedule_node_graft_tree(old_node.copy(), new_tree); - CHECK(new_node != nullptr); - return isl::manage(new_node); + return node; } -/* Reschedule the subtree of each mark node for loop distribution. - * - * Reschedule::Reschedule assumes the mark nodes are the outer bands. - * This function do not have the assumption, so it supports tiled inner bands. - * - * Assume mark nodes are not nested, so this is only suitable for vector ops. - */ -isl::schedule_node Reschedule::RescheduleInnerBand(const isl::schedule_node &root) { - return root.map_descendant_bottom_up([this](const isl::schedule_node &node) -> isl::schedule_node { - if (!IsL1OrUbMark(node) && !IsL0OrUbL0Mark(node)) return node; - - CHECK_EQ(node.n_children(), 1) << "mark node must have one child"; - auto outer_band = node.first_child(); - CHECK(outer_band.isa()) << "the child of mark node must be a band node"; - auto inner_band = outer_band.first_child(); - CHECK(inner_band.isa()) << "the mark node must be tiled to outer and inner bands"; - - auto active_domain = inner_band.as().get_domain(); - auto need_dist = true; - auto point_band_info = SavePointBand(inner_band); - - auto new_schedule = ReschedulePreserveFilterOrder(inner_band, active_domain, need_dist); - - auto new_inner_band = RestorePointBandInfo(GetOuterBand(new_schedule), point_band_info); - auto new_outer_band = IslScheduleNodeReplaceChild(outer_band, 0, new_inner_band); - return new_outer_band.parent(); - }); +isl::schedule Reschedule::Run(isl::schedule sch) { + auto root = sch.get_root(); + auto node = RescheduleSchTree(root); + return node.get_schedule(); } -void Reschedule::Dump() { - std::ofstream of; - of.open("transform.log", std::ios::out); - if (!of.is_open()) { - return; - } - PrintHeader(of, "L1/UB tile band build options"); - for (const auto &option : l1_build_options_) { - of << option << std::endl; - } - - PrintHeader(of, "L0 tile band build options"); - for (const auto &option : l0_build_options_) { - of << option << std::endl; - } - - PrintHeader(of, "nodes from root to L1/UB band"); - for (const auto &node : node_list_0_) { - of << node << std::endl; - } - - PrintHeader(of, "nodes from L1/UB band to L0/UBL0 band"); - for (const auto &node : node_list_1_) { - of << node << std::endl; - } - - PrintHeader(of, "nodes from L0/UBL0 band to point band"); - for (const auto &node : node_list_2_) { - of << node << std::endl; - } -} -isl::schedule Reschedule::Run(isl::schedule curr_schedule) { - isl::schedule sched = curr_schedule; - isl::schedule_node root = sched.get_root(); - if (scop_info_.user_config_.GetTileInnerBand()) - sched = RescheduleInnerBand(root).get_schedule(); - else - sched = RescheduleSchTree(root).get_schedule(); - return sched; -} } // namespace poly } // namespace ir } // namespace akg diff --git a/src/poly/schedule_pass/reschedule.h b/src/poly/schedule_pass/reschedule.h index 75529421296f7a6e7cda6b9d82490b6d3648cbf6..832399791525b1fd96fe499cd1f8e26ee7edc636 100644 --- a/src/poly/schedule_pass/reschedule.h +++ b/src/poly/schedule_pass/reschedule.h @@ -1,5 +1,5 @@ /** - * Copyright 2019 Huawei Technologies Co., Ltd + * Copyright 2020-2022 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -22,34 +22,6 @@ namespace akg { namespace ir { namespace poly { -struct PointBandInfo { - isl::multi_union_pw_aff mupa; - size_t n_member{0}; - bool permutable{false}; - std::vector coincident; - std::unordered_map> pa_list_map; -}; - -// data structure for recording tile band data -struct TileBandData { - // flag indicating whether L0 tiled - bool l0_tiled; - // mark node of the tile band, if any - isl::schedule_node mark; - // mark node of conv_gemm, if any - isl::schedule_node gemm_mark; - // members of tile band - unsigned int n_member; - // schedule mupa - isl::multi_union_pw_aff mupa; - // permutable - bool permutable; - // coincident - std::vector coincident; - // ast build options - isl::union_set ast_build_options; -}; - class Reschedule : public SchedulePass { public: Reschedule(ScopInfo &scop_info, PassInfo &pass_info) : scop_info_(scop_info), pass_info_(pass_info) { @@ -58,44 +30,16 @@ class Reschedule : public SchedulePass { ~Reschedule() {} virtual isl::schedule Run(isl::schedule sch); - isl::schedule RescheduleSerializeSccs(const isl::union_set &active_domain, const bool need_dist) const; private: - static bool IsL1OrUbMark(const isl::schedule_node &node); - static bool IsL0OrUbL0Mark(const isl::schedule_node &node); - void CollectTileBandData(const isl::schedule_node &node, TileBandData *tile_band_data); - static isl::schedule_node RetrieveTileBandData(isl::schedule_node node, TileBandData *tile_band_data); - static isl::schedule_node RetrieveNodeList(isl::schedule_node node, const std::vector &node_list); - static isl::schedule_node RetrieveAstBuildOptions(isl::schedule_node node, const isl::union_set &options); - bool ValidateReorderedSchedule(const isl::schedule &new_schedule); - isl::schedule_node TryRestoreStmtOrder(const isl::schedule_node &node, const std::vector &filter_total_order, - const std::vector> &filter_partial_order); - isl::schedule_node ReschedulePreserveFilterOrder(const isl::schedule_node &node, const isl::union_set &active_domain, - const bool need_dist); - static PointBandInfo SavePointBand(const isl::schedule_node &node); - static isl::schedule_node SetPointBandInfo(isl::schedule_node node, const PointBandInfo &point_band_info); - static isl::schedule_node RestorePointBandInfo(isl::schedule_node node, const PointBandInfo &point_band_info); + bool ValidateSchedule(const isl::schedule &sch); + isl::schedule_node RecomputeScheduleTree(const isl::schedule_node &node); isl::schedule_node RescheduleSchTree(const isl::schedule_node &root); - isl::schedule_node RescheduleInnerBand(const isl::schedule_node &root); - void Dump(); private: ScopInfo &scop_info_; PassInfo &pass_info_; - // for recording L1/UB tile band build options - std::vector l1_build_options_; - - // for recording L0 tile band build options - std::vector l0_build_options_; - - // for recording nodes along the path from root to L1/UB band - std::vector node_list_0_; - - // for recording nodes along the path from L1/UB band to L0/UBL0 band - std::vector node_list_1_; - // for recording nodes along the path from L0/UBL0 band to point band - std::vector node_list_2_; }; } // namespace poly diff --git a/src/poly/scop_info.h b/src/poly/scop_info.h index 66e39190c6971de72bf7e07dc3831dac5c486f93..dce2dcfddafd27decfacf23ddae090debc07262a 100644 --- a/src/poly/scop_info.h +++ b/src/poly/scop_info.h @@ -270,7 +270,6 @@ class UserConfig { ParseBoolAttr(attrs, "pragma_keep_outer_band_order", &keep_outer_band_order_); ParseBoolAttr(attrs, "pragma_modshift", &mod_schedule_shift_); ParseBoolAttr(attrs, "pragma_disable_group", &disable_group_); - ParseBoolAttr(attrs, "pragma_tile_inner_band", &tile_inner_band_); ParseBoolAttr(attrs, "pragma_set_all_coincident", &pragma_set_all_coincident_); ParseBoolAttr(attrs, "pragma_enable_reschedule", &enable_reschedule_); @@ -417,7 +416,6 @@ class UserConfig { bool GetKeepOuterBandOrder() const { return keep_outer_band_order_; } bool GetModScheduleShift() const { return mod_schedule_shift_; } bool GetDisableGroup() const { return disable_group_; } - bool GetTileInnerBand() const { return tile_inner_band_; } bool GetPragmaSetAllCoincident() const { return pragma_set_all_coincident_; } bool GetConsiderCoincidence() const { return consider_conincidence_; } void SetConsiderCoincidence(bool consider_conincidence) { consider_conincidence_ = consider_conincidence; } @@ -772,7 +770,6 @@ class UserConfig { bool keep_outer_band_order_{false}; bool mod_schedule_shift_{false}; bool disable_group_{false}; - bool tile_inner_band_{false}; bool pragma_set_all_coincident_{false}; bool consider_conincidence_{true}; bool enable_reschedule_{true}; diff --git a/tests/common/test_run/ascend/fused_batch_norm_run.py b/tests/common/test_run/ascend/fused_batch_norm_run.py index 80a4e7899f13122c238e7067dc5d6717620d59e7..c7c21cfdecca7287c16b1d4e2c0c1d50a10b7d42 100644 --- a/tests/common/test_run/ascend/fused_batch_norm_run.py +++ b/tests/common/test_run/ascend/fused_batch_norm_run.py @@ -1,4 +1,4 @@ -# Copyright 2019-2021 Huawei Technologies Co., Ltd +# Copyright 2019-2022 Huawei Technologies Co., Ltd # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -100,6 +100,7 @@ def fused_batch_norm_run(shape, dtype, momentum, eps, is_training, data_format, expects, outputs = gen_data(dtype, eps, is_training, mean, mean_new, np_beta, np_data, np_gamma, support_list, var, var_new) + attrs["enable_double_buffer"] = False mod = utils.op_build_test(FusedBatchNorm, build_shape, [dtype, dtype, dtype, dtype, dtype], op_attrs=[momentum, eps, is_training, diff --git a/tests/common/test_run/unsorted_segment_sum_run.py b/tests/common/test_run/unsorted_segment_sum_run.py index 299ef05ad954e8a287328ccd3652020757bd6ff9..0005c465b1da2b8b087f61d2d8130d6b377803ee 100644 --- a/tests/common/test_run/unsorted_segment_sum_run.py +++ b/tests/common/test_run/unsorted_segment_sum_run.py @@ -1,4 +1,4 @@ -# Copyright 2020-2021 Huawei Technologies Co., Ltd +# Copyright 2020-2022 Huawei Technologies Co., Ltd # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -40,6 +40,7 @@ def unsorted_segment_sum_run(shape, ids_shape, num_segments, dtype, attrs): else: return mod else: + attrs["multicore_loop_switch_hoist"] = False mod = unsortedsegmentsum_compile(shape, ids_shape, num_segments, dtype, attrs) expect, input, output, segment_ids = gen_data_ascend(dtype, ids_shape, num_segments, shape) output = utils.mod_launch(mod, (input, segment_ids, output), expect=expect) diff --git a/third_party/patch/isl/isl-influence.patch b/third_party/patch/isl/isl-influence.patch index 192dab086747bf89dd7b3b1fcd8c46bbbe35803f..6367293b9381cee34470974cb3b8c932d3ca718f 100644 --- a/third_party/patch/isl/isl-influence.patch +++ b/third_party/patch/isl/isl-influence.patch @@ -1,6 +1,6 @@ -unchanged: ---- isl-0.22/include/isl/options.h 2021-10-08 14:34:24.500105000 +0800 -+++ isl/include/isl/options.h 2021-10-08 23:20:41.245515766 +0800 +diff -Npur isl-0.22/include/isl/options.h isl/include/isl/options.h +--- isl-0.22/include/isl/options.h 2022-01-27 20:56:35.679289311 +0800 ++++ isl/include/isl/options.h 2022-01-27 20:59:20.537831715 +0800 @@ -49,6 +49,13 @@ int isl_options_get_coalesce_bounded_wra isl_stat isl_options_set_coalesce_preserve_locals(isl_ctx *ctx, int val); int isl_options_get_coalesce_preserve_locals(isl_ctx *ctx); @@ -15,9 +15,9 @@ unchanged: #if defined(__cplusplus) } #endif -diff -u isl/include/isl/schedule.h isl/include/isl/schedule.h ---- isl/include/isl/schedule.h 2021-10-08 23:20:41.249515618 +0800 -+++ isl/include/isl/schedule.h 2021-12-28 09:51:36.316000000 +0800 +diff -Npur isl-0.22/include/isl/schedule.h isl/include/isl/schedule.h +--- isl-0.22/include/isl/schedule.h 2022-01-27 20:56:35.683289369 +0800 ++++ isl/include/isl/schedule.h 2022-01-27 21:02:25.000000000 +0800 @@ -9,7 +9,7 @@ #include #include @@ -27,7 +27,7 @@ diff -u isl/include/isl/schedule.h isl/include/isl/schedule.h #if defined(__cplusplus) extern "C" { #endif -@@ -209,6 +209,122 @@ +@@ -219,6 +219,123 @@ __isl_give isl_printer *isl_printer_prin void isl_schedule_dump(__isl_keep isl_schedule *schedule); __isl_give char *isl_schedule_to_str(__isl_keep isl_schedule *schedule); @@ -106,7 +106,8 @@ diff -u isl/include/isl/schedule.h isl/include/isl/schedule.h +/* isl_sched_node functions */ +int isl_sched_node_par_coef_offset(struct isl_sched_node *node); +int isl_sched_node_cst_coef_offset(struct isl_sched_node *node); -+__isl_give isl_map *isl_sched_node_extract_schedule(struct isl_sched_node *node); ++__isl_give isl_map *isl_sched_node_extract_schedule( ++ struct isl_sched_graph *graph, struct isl_sched_node *node); + +/* isl vec functions */ +int isl_inf_vec_get_size( isl_vec* vec); @@ -150,9 +151,9 @@ diff -u isl/include/isl/schedule.h isl/include/isl/schedule.h #if defined(__cplusplus) } #endif -unchanged: ---- isl-0.22/isl_options.c 2021-10-08 14:48:47.000000000 +0800 -+++ isl/isl_options.c 2021-10-08 23:20:41.249515618 +0800 +diff -Npur isl-0.22/isl_options.c isl/isl_options.c +--- isl-0.22/isl_options.c 2022-01-27 20:56:35.663289083 +0800 ++++ isl/isl_options.c 2022-01-27 20:59:20.509831256 +0800 @@ -228,6 +228,12 @@ ISL_ARG_BOOL(struct isl_options, print_s "print statistics for every isl_ctx") ISL_ARG_ULONG(struct isl_options, max_operations, 0, @@ -181,9 +182,9 @@ unchanged: +ISL_CTX_GET_BOOL_DEF(isl_options, struct isl_options, isl_options_args, + akg_influence_scheduler) +/* ======================= AKG influence patch -- end ======================= */ -unchanged: ---- isl-0.22/isl_options_private.h 2021-10-08 14:40:05.232984000 +0800 -+++ isl/isl_options_private.h 2021-10-08 23:20:41.249515618 +0800 +diff -Npur isl-0.22/isl_options_private.h isl/isl_options_private.h +--- isl-0.22/isl_options_private.h 2022-01-27 20:56:35.659289025 +0800 ++++ isl/isl_options_private.h 2022-01-27 20:59:20.509831256 +0800 @@ -71,6 +71,10 @@ struct isl_options { int print_stats; @@ -195,9 +196,9 @@ unchanged: }; #endif -diff -u isl/isl_scheduler.c isl/isl_scheduler.c ---- isl/isl_scheduler.c 2021-10-09 10:14:06.401347938 +0800 -+++ isl/isl_scheduler.c 2021-12-28 09:51:17.288000000 +0800 +diff -Npur isl-0.22/isl_scheduler.c isl/isl_scheduler.c +--- isl-0.22/isl_scheduler.c 2022-01-27 20:56:35.659289025 +0800 ++++ isl/isl_scheduler.c 2022-01-27 21:04:35.000000000 +0800 @@ -50,6 +50,15 @@ */ @@ -211,10 +212,10 @@ diff -u isl/isl_scheduler.c isl/isl_scheduler.c +int isl_influence_enabled = { 0 }; +/* ======================= AKG influence patch -- end ======================= */ + - /* Internal information about a node that is used during the construction - * of a schedule. - * space represents the original space in which the domain lives; -@@ -303,6 +312,12 @@ + /* Extract the linear part, i.e., the coefficients of the input variables + * and the local variables (if any), from the affine expression "ma". + */ +@@ -336,6 +345,12 @@ static int is_multi_edge_type(struct isl return is_condition(edge) || is_conditional_validity(edge); } @@ -227,7 +228,7 @@ diff -u isl/isl_scheduler.c isl/isl_scheduler.c /* Internal information about the dependence graph used during * the construction of the schedule. * -@@ -395,6 +410,11 @@ +@@ -434,6 +449,11 @@ struct isl_sched_graph { int weak; int max_weight; @@ -239,24 +240,16 @@ diff -u isl/isl_scheduler.c isl/isl_scheduler.c }; /* Initialize node_table based on the list of nodes. -@@ -757,6 +777,8 @@ - isl_hash_table_free(ctx, graph->edge_table[i]); +@@ -797,6 +817,8 @@ static void graph_free(isl_ctx *ctx, str isl_hash_table_free(ctx, graph->node_table); isl_basic_set_free(graph->lp); + isl_multi_union_pw_aff_free(graph->prefix); + if(isl_influence_enabled) + graph=isl_influence_sol_list_free(graph); } /* For each "set" on which this function is called, increment -@@ -3233,6 +3255,7 @@ - * In particular, the non-triviality region enforces that at least - * one of the linear combinations in the rows of node->indep is non-zero. - */ -+ - static __isl_give isl_vec *solve_lp(isl_ctx *ctx, struct isl_sched_graph *graph) - { - int i; -@@ -3251,8 +3274,49 @@ +@@ -3539,8 +3561,49 @@ static __isl_give isl_vec *solve_lp(isl_ graph->region[i].trivial = trivial; } lp = isl_basic_set_copy(graph->lp); @@ -306,7 +299,7 @@ diff -u isl/isl_scheduler.c isl/isl_scheduler.c for (i = 0; i < graph->n; ++i) isl_mat_free(graph->region[i].trivial); return sol; -@@ -3315,10 +3379,15 @@ +@@ -3603,10 +3666,15 @@ static int update_schedule(struct isl_sc if (sol->size == 0) isl_die(sol->ctx, isl_error_internal, "no solution found", goto error); @@ -324,7 +317,7 @@ diff -u isl/isl_scheduler.c isl/isl_scheduler.c for (i = 0; i < graph->n; ++i) { struct isl_sched_node *node = &graph->node[i]; int pos; -@@ -3351,6 +3420,13 @@ +@@ -3639,6 +3707,13 @@ static int update_schedule(struct isl_sc graph->n_row++; graph->n_total_row++; @@ -338,7 +331,7 @@ diff -u isl/isl_scheduler.c isl/isl_scheduler.c return 0; error: -@@ -3979,6 +4055,10 @@ +@@ -4272,6 +4347,10 @@ static isl_stat extract_sub_graph(isl_ct sub->max_row = graph->max_row; sub->n_total_row = graph->n_total_row; sub->band_start = graph->band_start; @@ -349,7 +342,7 @@ diff -u isl/isl_scheduler.c isl/isl_scheduler.c return isl_stat_ok; } -@@ -4008,6 +4088,11 @@ +@@ -4301,6 +4380,11 @@ static __isl_give isl_schedule_node *com { struct isl_sched_graph split = { 0 }; @@ -361,7 +354,7 @@ diff -u isl/isl_scheduler.c isl/isl_scheduler.c if (extract_sub_graph(ctx, graph, node_pred, edge_pred, data, &split) < 0) goto error; -@@ -5407,6 +5492,25 @@ +@@ -5700,6 +5784,25 @@ static __isl_give isl_vec *compute_carry return NULL; lp = isl_basic_set_copy(graph->lp); @@ -387,11 +380,10 @@ diff -u isl/isl_scheduler.c isl/isl_scheduler.c return non_neg_lexmin(graph, lp, n_edge, want_integral); } -@@ -6002,6 +6106,11 @@ +@@ -6295,6 +6398,10 @@ static isl_stat compute_schedule_wcc_ban int use_coincidence; int force_coincidence = 0; int check_conditional; -+ int coincidence_relaxed=0; +/* ====================== AKG influence patch -- start ====================== */ + const int akg_influence = isl_options_get_akg_influence_scheduler(ctx); + const int akg_debug = isl_options_get_akg_print_debug(ctx); @@ -399,7 +391,7 @@ diff -u isl/isl_scheduler.c isl/isl_scheduler.c if (sort_sccs(graph) < 0) return isl_stat_error; -@@ -6013,6 +6122,13 @@ +@@ -6306,6 +6413,13 @@ static isl_stat compute_schedule_wcc_ban if (ctx->opt->schedule_outer_coincidence) force_coincidence = 1; @@ -413,7 +405,7 @@ diff -u isl/isl_scheduler.c isl/isl_scheduler.c use_coincidence = has_coincidence; while (graph->n_row < graph->maxvar) { isl_vec *sol; -@@ -6025,19 +6141,28 @@ +@@ -6318,9 +6432,16 @@ static isl_stat compute_schedule_wcc_ban if (setup_lp(ctx, graph, use_coincidence) < 0) return isl_stat_error; sol = solve_lp(ctx, graph); @@ -430,19 +422,7 @@ diff -u isl/isl_scheduler.c isl/isl_scheduler.c int empty = graph->n_total_row == graph->band_start; isl_vec_free(sol); - if (use_coincidence && (!force_coincidence || !empty)) { - use_coincidence = 0; -+ coincidence_relaxed = 1; - continue; - } - return isl_stat_ok; - } - coincident = !has_coincidence || use_coincidence; -+ - if (update_schedule(graph, sol, coincident) < 0) - return isl_stat_error; - -@@ -7699,6 +7824,10 @@ +@@ -7992,6 +8113,10 @@ static __isl_give isl_schedule_node *com if (graph->scc <= 1 || isl_options_get_schedule_whole_component(ctx)) return compute_schedule_wcc_whole(node, graph); @@ -453,7 +433,7 @@ diff -u isl/isl_scheduler.c isl/isl_scheduler.c else return compute_schedule_wcc_clustering(node, graph); } -@@ -7786,7 +7915,10 @@ +@@ -8079,7 +8204,10 @@ static __isl_give isl_schedule_node *com return isl_schedule_node_free(node); } @@ -465,7 +445,10 @@ diff -u isl/isl_scheduler.c isl/isl_scheduler.c return compute_component_schedule(node, graph, 1); return compute_schedule_wcc(node, graph); -@@ -7863,0 +7996,693 @@ +@@ -8154,3 +8282,697 @@ __isl_give isl_schedule *isl_union_set_c + + return isl_schedule_constraints_compute_schedule(sc); + } + +/* ====================== AKG influence patch -- start ====================== */ + @@ -477,8 +460,9 @@ diff -u isl/isl_scheduler.c isl/isl_scheduler.c + return node_cst_coef_offset(node); +} + -+__isl_give isl_map *isl_sched_node_extract_schedule(struct isl_sched_node *node) { -+ return node_extract_schedule(node); ++__isl_give isl_map *isl_sched_node_extract_schedule( ++ struct isl_sched_graph *graph, struct isl_sched_node *node) { ++ return node_extract_schedule(graph, node); +} + +int isl_sched_node_get_nparam(const struct isl_sched_node *node) { @@ -749,7 +733,7 @@ diff -u isl/isl_scheduler.c isl/isl_scheduler.c + isl_basic_set *bset = NULL; + for (int i = 0; i < graph->n && bset == NULL; ++i) { + struct isl_sched_node *node = isl_sched_graph_get_node(graph, i); -+ isl_map *ma = isl_sched_node_extract_schedule(node); ++ isl_map *ma = isl_sched_node_extract_schedule(graph, node); + const char *strstat = isl_map_get_tuple_name(ma, isl_dim_in); + if (strcmp(strstat, name) == 0) { + bset = graph->lp; @@ -1032,7 +1016,7 @@ diff -u isl/isl_scheduler.c isl/isl_scheduler.c + + for (int i = 0; i < graph->n; ++i) { + struct isl_sched_node *node = isl_sched_graph_get_node(graph, i); -+ isl_map *ma = isl_sched_node_extract_schedule(node); ++ isl_map *ma = isl_sched_node_extract_schedule(graph, node); + isl_influence_log("statement:\n"); + if (ma != NULL) { + isl_printer *p; diff --git a/third_party/patch/isl/isl.patch b/third_party/patch/isl/isl.patch index 28838f7a1e86b5d53bc8b7633d6cc5e503b34375..c7ff8b29be955429a8b720ad7cbd6ccc2b7a9118 100644 --- a/third_party/patch/isl/isl.patch +++ b/third_party/patch/isl/isl.patch @@ -1,6 +1,6 @@ diff -Npur isl-0.22/include/isl/aff.h isl/include/isl/aff.h --- isl-0.22/include/isl/aff.h 2019-11-02 02:05:27.000000000 +0800 -+++ isl/include/isl/aff.h 2021-10-08 14:40:05.220984506 +0800 ++++ isl/include/isl/aff.h 2022-01-27 20:56:35.683289369 +0800 @@ -17,14 +17,19 @@ extern "C" { #endif @@ -320,7 +320,7 @@ diff -Npur isl-0.22/include/isl/aff.h isl/include/isl/aff.h __isl_overload diff -Npur isl-0.22/include/isl/aff_type.h isl/include/isl/aff_type.h --- isl-0.22/include/isl/aff_type.h 2019-11-02 02:05:27.000000000 +0800 -+++ isl/include/isl/aff_type.h 2021-10-08 14:40:05.220984506 +0800 ++++ isl/include/isl/aff_type.h 2022-01-27 20:56:35.679289311 +0800 @@ -10,13 +10,13 @@ extern "C" { struct __isl_subclass(isl_multi_aff) __isl_subclass(isl_pw_aff) isl_aff; typedef struct isl_aff isl_aff; @@ -339,7 +339,7 @@ diff -Npur isl-0.22/include/isl/aff_type.h isl/include/isl/aff_type.h __isl_subclass(isl_union_pw_multi_aff) isl_union_pw_aff; diff -Npur isl-0.22/include/isl/ast_build.h isl/include/isl/ast_build.h --- isl-0.22/include/isl/ast_build.h 2019-11-02 02:05:27.000000000 +0800 -+++ isl/include/isl/ast_build.h 2021-10-08 14:40:05.224984474 +0800 ++++ isl/include/isl/ast_build.h 2022-01-27 20:56:35.679289311 +0800 @@ -67,10 +67,14 @@ __isl_null isl_ast_build *isl_ast_build_ __isl_give isl_ast_build *isl_ast_build_set_options( __isl_take isl_ast_build *build, @@ -357,7 +357,7 @@ diff -Npur isl-0.22/include/isl/ast_build.h isl/include/isl/ast_build.h __isl_give isl_ast_node *(*fn)(__isl_take isl_ast_node *node, diff -Npur isl-0.22/include/isl/ast.h isl/include/isl/ast.h --- isl-0.22/include/isl/ast.h 2019-10-01 05:34:48.000000000 +0800 -+++ isl/include/isl/ast.h 2021-10-08 14:40:05.224984474 +0800 ++++ isl/include/isl/ast.h 2022-01-27 20:56:35.683289369 +0800 @@ -84,9 +84,11 @@ __isl_give isl_ast_expr *isl_ast_expr_op int pos); __isl_give isl_ast_expr *isl_ast_expr_get_op_arg(__isl_keep isl_ast_expr *expr, @@ -391,7 +391,7 @@ diff -Npur isl-0.22/include/isl/ast.h isl/include/isl/ast.h __isl_export diff -Npur isl-0.22/include/isl/constraint.h isl/include/isl/constraint.h --- isl-0.22/include/isl/constraint.h 2019-09-25 00:10:51.000000000 +0800 -+++ isl/include/isl/constraint.h 2021-10-08 14:40:05.224984474 +0800 ++++ isl/include/isl/constraint.h 2022-01-27 20:56:35.679289311 +0800 @@ -37,6 +37,7 @@ __isl_give isl_constraint *isl_equality_ __isl_give isl_constraint *isl_inequality_alloc(__isl_take isl_local_space *ls); @@ -446,7 +446,7 @@ diff -Npur isl-0.22/include/isl/constraint.h isl/include/isl/constraint.h #if defined(__cplusplus) diff -Npur isl-0.22/include/isl/id.h isl/include/isl/id.h --- isl-0.22/include/isl/id.h 2019-11-02 02:05:27.000000000 +0800 -+++ isl/include/isl/id.h 2021-10-08 14:40:05.224984474 +0800 ++++ isl/include/isl/id.h 2022-01-27 20:56:35.679289311 +0800 @@ -17,6 +17,7 @@ ISL_DECLARE_EXPORTED_LIST_FN(id) ISL_DECLARE_MULTI(id) @@ -457,7 +457,7 @@ diff -Npur isl-0.22/include/isl/id.h isl/include/isl/id.h __isl_give isl_id *isl_id_alloc(isl_ctx *ctx, diff -Npur isl-0.22/include/isl/ilp.h isl/include/isl/ilp.h --- isl-0.22/include/isl/ilp.h 2019-09-25 00:10:51.000000000 +0800 -+++ isl/include/isl/ilp.h 2021-10-08 14:40:05.224984474 +0800 ++++ isl/include/isl/ilp.h 2022-01-27 20:56:35.679289311 +0800 @@ -31,11 +31,15 @@ __isl_give isl_val *isl_set_max_val(__is __isl_give isl_multi_val *isl_union_set_min_multi_union_pw_aff( __isl_keep isl_union_set *uset, __isl_keep isl_multi_union_pw_aff *obj); @@ -476,7 +476,7 @@ diff -Npur isl-0.22/include/isl/ilp.h isl/include/isl/ilp.h diff -Npur isl-0.22/include/isl/list.h isl/include/isl/list.h --- isl-0.22/include/isl/list.h 2019-11-02 02:05:27.000000000 +0800 -+++ isl/include/isl/list.h 2021-10-08 14:40:05.224984474 +0800 ++++ isl/include/isl/list.h 2022-01-27 20:56:35.679289311 +0800 @@ -43,6 +43,7 @@ __isl_give isl_##EL##_list *isl_##EL##_l __isl_give isl_##EL##_list *isl_##EL##_list_insert( \ __isl_take isl_##EL##_list *list, unsigned pos, \ @@ -495,7 +495,7 @@ diff -Npur isl-0.22/include/isl/list.h isl/include/isl/list.h EXPORT \ diff -Npur isl-0.22/include/isl/local_space.h isl/include/isl/local_space.h --- isl-0.22/include/isl/local_space.h 2019-09-25 00:10:51.000000000 +0800 -+++ isl/include/isl/local_space.h 2021-10-08 14:40:05.224984474 +0800 ++++ isl/include/isl/local_space.h 2022-01-27 20:56:35.679289311 +0800 @@ -10,11 +10,12 @@ extern "C" { #endif @@ -520,7 +520,7 @@ diff -Npur isl-0.22/include/isl/local_space.h isl/include/isl/local_space.h diff -Npur isl-0.22/include/isl/map.h isl/include/isl/map.h --- isl-0.22/include/isl/map.h 2019-11-02 02:05:27.000000000 +0800 -+++ isl/include/isl/map.h 2021-10-08 14:40:05.224984474 +0800 ++++ isl/include/isl/map.h 2022-01-27 20:56:35.679289311 +0800 @@ -34,6 +34,7 @@ isl_size isl_basic_map_total_dim(__isl_k isl_size isl_basic_map_dim(__isl_keep isl_basic_map *bmap, enum isl_dim_type type); @@ -675,7 +675,7 @@ diff -Npur isl-0.22/include/isl/map.h isl/include/isl/map.h } diff -Npur isl-0.22/include/isl/map_type.h isl/include/isl/map_type.h --- isl-0.22/include/isl/map_type.h 2019-10-01 05:34:48.000000000 +0800 -+++ isl/include/isl/map_type.h 2021-10-08 14:40:05.224984474 +0800 ++++ isl/include/isl/map_type.h 2022-01-27 20:56:35.679289311 +0800 @@ -13,7 +13,7 @@ typedef struct isl_basic_map isl_basic_m ISL_DECLARE_LIST_TYPE(basic_map) struct __isl_subclass(isl_union_map) isl_map; @@ -701,7 +701,7 @@ diff -Npur isl-0.22/include/isl/map_type.h isl/include/isl/map_type.h } diff -Npur isl-0.22/include/isl/multi.h isl/include/isl/multi.h --- isl-0.22/include/isl/multi.h 2019-11-02 02:05:27.000000000 +0800 -+++ isl/include/isl/multi.h 2021-10-08 14:40:05.224984474 +0800 ++++ isl/include/isl/multi.h 2022-01-27 20:56:35.679289311 +0800 @@ -36,12 +36,17 @@ isl_size isl_multi_##BASE##_size(__isl_k __isl_export \ __isl_give isl_##BASE *isl_multi_##BASE##_get_at( \ @@ -744,7 +744,7 @@ diff -Npur isl-0.22/include/isl/multi.h isl/include/isl/multi.h enum isl_dim_type type, __isl_take isl_id *id); \ diff -Npur isl-0.22/include/isl/schedule.h isl/include/isl/schedule.h --- isl-0.22/include/isl/schedule.h 2019-11-02 02:05:27.000000000 +0800 -+++ isl/include/isl/schedule.h 2021-10-08 14:40:05.224984474 +0800 ++++ isl/include/isl/schedule.h 2022-01-27 20:56:35.683289369 +0800 @@ -17,9 +17,16 @@ extern "C" { struct __isl_export isl_schedule_constraints; typedef struct isl_schedule_constraints isl_schedule_constraints; @@ -762,9 +762,24 @@ diff -Npur isl-0.22/include/isl/schedule.h isl/include/isl/schedule.h isl_stat isl_options_set_schedule_max_constant_term(isl_ctx *ctx, int val); int isl_options_get_schedule_max_constant_term(isl_ctx *ctx); -@@ -104,6 +111,10 @@ __isl_give isl_union_map * +@@ -79,6 +86,10 @@ isl_schedule_constraints_set_conditional + __isl_null isl_schedule_constraints *isl_schedule_constraints_free( + __isl_take isl_schedule_constraints *sc); + ++__isl_export ++__isl_give isl_schedule_constraints *isl_schedule_constraints_set_prefix( ++ __isl_take isl_schedule_constraints *sc, ++ __isl_take isl_multi_union_pw_aff *prefix); + isl_ctx *isl_schedule_constraints_get_ctx( + __isl_keep isl_schedule_constraints *sc); + __isl_export +@@ -103,7 +114,14 @@ __isl_export + __isl_give isl_union_map * isl_schedule_constraints_get_conditional_validity_condition( __isl_keep isl_schedule_constraints *sc); ++__isl_export ++__isl_give isl_multi_union_pw_aff *isl_schedule_constraints_get_prefix( ++ __isl_keep isl_schedule_constraints *sc); +__isl_export +__isl_give isl_schedule_constraints *isl_schedule_constraints_intersect_domain( @@ -773,7 +788,17 @@ diff -Npur isl-0.22/include/isl/schedule.h isl/include/isl/schedule.h __isl_give isl_schedule_constraints *isl_schedule_constraints_apply( __isl_take isl_schedule_constraints *sc, __isl_take isl_union_map *umap); -@@ -138,12 +149,14 @@ __isl_export +@@ -119,6 +137,9 @@ void isl_schedule_constraints_dump(__isl + __isl_give char *isl_schedule_constraints_to_str( + __isl_keep isl_schedule_constraints *sc); + ++__isl_give isl_schedule_node *isl_schedule_node_schedule( ++ __isl_take isl_schedule_node *node, ++ __isl_take isl_schedule_constraints *sc); + __isl_export + __isl_give isl_schedule *isl_schedule_constraints_compute_schedule( + __isl_take isl_schedule_constraints *sc); +@@ -138,12 +159,14 @@ __isl_export __isl_give isl_union_map *isl_schedule_get_map(__isl_keep isl_schedule *sched); isl_ctx *isl_schedule_get_ctx(__isl_keep isl_schedule *sched); @@ -788,7 +813,7 @@ diff -Npur isl-0.22/include/isl/schedule.h isl/include/isl/schedule.h __isl_give isl_union_set *isl_schedule_get_domain( __isl_keep isl_schedule *schedule); -@@ -158,13 +171,16 @@ __isl_give isl_schedule *isl_schedule_ma +@@ -158,13 +181,16 @@ __isl_give isl_schedule *isl_schedule_ma __isl_give isl_schedule *isl_schedule_insert_context( __isl_take isl_schedule *schedule, __isl_take isl_set *context); @@ -807,7 +832,7 @@ diff -Npur isl-0.22/include/isl/schedule.h isl/include/isl/schedule.h __isl_give isl_schedule *isl_schedule_intersect_domain( diff -Npur isl-0.22/include/isl/schedule_node.h isl/include/isl/schedule_node.h --- isl-0.22/include/isl/schedule_node.h 2019-11-02 02:05:27.000000000 +0800 -+++ isl/include/isl/schedule_node.h 2021-10-08 14:40:05.224984474 +0800 ++++ isl/include/isl/schedule_node.h 2022-01-27 20:56:35.683289369 +0800 @@ -78,6 +78,7 @@ __isl_export isl_size isl_schedule_node_get_ancestor_child_position( __isl_keep isl_schedule_node *node, @@ -935,7 +960,7 @@ diff -Npur isl-0.22/include/isl/schedule_node.h isl/include/isl/schedule_node.h #endif diff -Npur isl-0.22/include/isl/set.h isl/include/isl/set.h --- isl-0.22/include/isl/set.h 2019-11-02 02:05:27.000000000 +0800 -+++ isl/include/isl/set.h 2021-10-08 14:40:05.224984474 +0800 ++++ isl/include/isl/set.h 2022-01-27 20:56:35.683289369 +0800 @@ -31,6 +31,7 @@ isl_size isl_basic_set_total_dim(__isl_k isl_size isl_basic_set_dim(__isl_keep isl_basic_set *bset, enum isl_dim_type type); @@ -1055,7 +1080,7 @@ diff -Npur isl-0.22/include/isl/set.h isl/include/isl/set.h __isl_give char *isl_basic_set_to_str(__isl_keep isl_basic_set *bset); diff -Npur isl-0.22/include/isl/space.h isl/include/isl/space.h --- isl-0.22/include/isl/space.h 2019-11-02 02:05:27.000000000 +0800 -+++ isl/include/isl/space.h 2021-10-08 14:40:05.224984474 +0800 ++++ isl/include/isl/space.h 2022-01-27 20:56:35.679289311 +0800 @@ -26,6 +26,7 @@ __isl_give isl_space *isl_space_alloc(is unsigned nparam, unsigned n_in, unsigned n_out); __isl_give isl_space *isl_space_set_alloc(isl_ctx *ctx, @@ -1166,9 +1191,21 @@ diff -Npur isl-0.22/include/isl/space.h isl/include/isl/space.h isl_size isl_space_dim(__isl_keep isl_space *space, enum isl_dim_type type); __isl_export +diff -Npur isl-0.22/include/isl/stream.h isl/include/isl/stream.h +--- isl-0.22/include/isl/stream.h 2019-09-25 00:10:51.000000000 +0800 ++++ isl/include/isl/stream.h 2022-01-27 20:56:35.679289311 +0800 +@@ -76,6 +76,8 @@ enum isl_token_type isl_stream_register_ + struct isl_obj isl_stream_read_obj(__isl_keep isl_stream *s); + __isl_give isl_val *isl_stream_read_val(__isl_keep isl_stream *s); + __isl_give isl_multi_aff *isl_stream_read_multi_aff(__isl_keep isl_stream *s); ++__isl_give isl_multi_union_pw_aff *isl_stream_read_multi_union_pw_aff( ++ __isl_keep isl_stream *s); + __isl_give isl_map *isl_stream_read_map(__isl_keep isl_stream *s); + __isl_give isl_set *isl_stream_read_set(__isl_keep isl_stream *s); + __isl_give isl_pw_qpolynomial *isl_stream_read_pw_qpolynomial( diff -Npur isl-0.22/include/isl/stride_info.h isl/include/isl/stride_info.h --- isl-0.22/include/isl/stride_info.h 2019-09-25 00:10:51.000000000 +0800 -+++ isl/include/isl/stride_info.h 2021-10-08 14:40:05.228984442 +0800 ++++ isl/include/isl/stride_info.h 2022-01-27 20:56:35.679289311 +0800 @@ -12,11 +12,13 @@ extern "C" { #endif @@ -1186,7 +1223,7 @@ diff -Npur isl-0.22/include/isl/stride_info.h isl/include/isl/stride_info.h __isl_take isl_stride_info *si); diff -Npur isl-0.22/include/isl/union_map.h isl/include/isl/union_map.h --- isl-0.22/include/isl/union_map.h 2019-11-02 02:05:27.000000000 +0800 -+++ isl/include/isl/union_map.h 2021-10-08 14:40:05.228984442 +0800 ++++ isl/include/isl/union_map.h 2022-01-27 20:56:35.679289311 +0800 @@ -29,6 +29,7 @@ __isl_overload __isl_give isl_union_map *isl_union_map_empty_ctx(isl_ctx *ctx); __isl_give isl_union_map *isl_union_map_empty_space( @@ -1246,7 +1283,7 @@ diff -Npur isl-0.22/include/isl/union_map.h isl/include/isl/union_map.h __isl_take isl_multi_union_pw_aff *mupa); diff -Npur isl-0.22/include/isl/union_set.h isl/include/isl/union_set.h --- isl-0.22/include/isl/union_set.h 2019-11-02 02:05:27.000000000 +0800 -+++ isl/include/isl/union_set.h 2021-10-08 14:40:05.228984442 +0800 ++++ isl/include/isl/union_set.h 2022-01-27 20:56:35.679289311 +0800 @@ -20,6 +20,7 @@ __isl_overload __isl_give isl_union_set *isl_union_set_empty_ctx(isl_ctx *ctx); __isl_give isl_union_set *isl_union_set_empty_space( @@ -1290,7 +1327,7 @@ diff -Npur isl-0.22/include/isl/union_set.h isl/include/isl/union_set.h isl_stat isl_union_set_foreach_point(__isl_keep isl_union_set *uset, diff -Npur isl-0.22/interface/cpp.cc isl/interface/cpp.cc --- isl-0.22/interface/cpp.cc 2019-11-02 02:05:27.000000000 +0800 -+++ isl/interface/cpp.cc 2021-10-08 14:40:05.228984442 +0800 ++++ isl/interface/cpp.cc 2022-01-27 20:56:35.683289369 +0800 @@ -253,6 +253,7 @@ void cpp_generator::print_class(ostream print_ptr_decl(os, clazz); print_downcast_decl(os, clazz); @@ -1401,7 +1438,7 @@ diff -Npur isl-0.22/interface/cpp.cc isl/interface/cpp.cc /* Rename method "name" in case the method name in the C++ bindings should not diff -Npur isl-0.22/interface/cpp.h isl/interface/cpp.h --- isl-0.22/interface/cpp.h 2019-11-02 02:05:27.000000000 +0800 -+++ isl/interface/cpp.h 2021-10-08 14:40:05.228984442 +0800 ++++ isl/interface/cpp.h 2022-01-27 20:56:35.683289369 +0800 @@ -50,6 +50,7 @@ private: const isl_class &super); void print_downcast_decl(ostream &os, const isl_class &clazz); @@ -1420,7 +1457,7 @@ diff -Npur isl-0.22/interface/cpp.h isl/interface/cpp.h void print_methods_impl(ostream &os, const isl_class &clazz); diff -Npur isl-0.22/isl_aff.c isl/isl_aff.c --- isl-0.22/isl_aff.c 2019-11-02 02:05:27.000000000 +0800 -+++ isl/isl_aff.c 2021-10-08 14:40:05.228984442 +0800 ++++ isl/isl_aff.c 2022-01-27 20:56:35.663289083 +0800 @@ -57,6 +57,21 @@ #include @@ -1480,7 +1517,7 @@ diff -Npur isl-0.22/isl_aff.c isl/isl_aff.c diff -Npur isl-0.22/isl_ast_build.c isl/isl_ast_build.c --- isl-0.22/isl_ast_build.c 2019-09-25 00:10:51.000000000 +0800 -+++ isl/isl_ast_build.c 2021-10-14 13:21:00.908090659 +0800 ++++ isl/isl_ast_build.c 2022-01-27 20:56:35.663289083 +0800 @@ -142,6 +142,7 @@ __isl_give isl_ast_build *isl_ast_build_ build->options = isl_union_map_empty(isl_space_params_alloc(ctx, 0)); build->depth = n; @@ -1519,7 +1556,7 @@ diff -Npur isl-0.22/isl_ast_build.c isl/isl_ast_build.c * and that is (probably) not meaningful to any nested code generation. diff -Npur isl-0.22/isl_ast_build_private.h isl/isl_ast_build_private.h --- isl-0.22/isl_ast_build_private.h 2019-09-25 00:10:51.000000000 +0800 -+++ isl/isl_ast_build_private.h 2021-10-14 13:21:00.908090659 +0800 ++++ isl/isl_ast_build_private.h 2022-01-27 20:56:35.655288968 +0800 @@ -197,6 +197,7 @@ struct isl_ast_build { int n; enum isl_ast_loop_type *loop_type; @@ -1530,7 +1567,7 @@ diff -Npur isl-0.22/isl_ast_build_private.h isl/isl_ast_build_private.h __isl_give isl_ast_build *isl_ast_build_clear_local_info( diff -Npur isl-0.22/isl_ast_codegen.c isl/isl_ast_codegen.c --- isl-0.22/isl_ast_codegen.c 2019-11-02 02:05:27.000000000 +0800 -+++ isl/isl_ast_codegen.c 2021-12-16 15:30:29.000000000 +0800 ++++ isl/isl_ast_codegen.c 2022-01-27 20:56:35.663289083 +0800 @@ -1517,8 +1517,14 @@ static __isl_give isl_ast_graft *create_ bounds = isl_ast_build_specialize_basic_set(sub_build, bounds); sub_build = isl_ast_build_set_loop_bounds(sub_build, @@ -1571,7 +1608,7 @@ diff -Npur isl-0.22/isl_ast_codegen.c isl/isl_ast_codegen.c if (empty < 0) diff -Npur isl-0.22/isl_box.c isl/isl_box.c --- isl-0.22/isl_box.c 2019-11-02 02:05:27.000000000 +0800 -+++ isl/isl_box.c 2021-10-08 14:40:05.228984442 +0800 ++++ isl/isl_box.c 2022-01-27 20:56:35.659289025 +0800 @@ -302,6 +302,13 @@ static isl_stat compute_size_in_directio * Initialize the size with infinity and if no better size is found * then invalidate the box. Otherwise, set the offset and size @@ -1618,7 +1655,7 @@ diff -Npur isl-0.22/isl_box.c isl/isl_box.c box = isl_fixed_box_free(box); diff -Npur isl-0.22/isl_factorization.c isl/isl_factorization.c --- isl-0.22/isl_factorization.c 2019-09-25 00:10:51.000000000 +0800 -+++ isl/isl_factorization.c 2021-10-08 14:40:05.228984442 +0800 ++++ isl/isl_factorization.c 2022-01-27 20:56:35.655288968 +0800 @@ -18,8 +18,18 @@ #include #include @@ -1738,7 +1775,7 @@ diff -Npur isl-0.22/isl_factorization.c isl/isl_factorization.c +} diff -Npur isl-0.22/isl_factorization.h isl/isl_factorization.h --- isl-0.22/isl_factorization.h 2019-09-25 00:10:51.000000000 +0800 -+++ isl/isl_factorization.h 2021-10-08 14:40:05.228984442 +0800 ++++ isl/isl_factorization.h 2022-01-27 20:56:35.663289083 +0800 @@ -8,13 +8,14 @@ extern "C" { #endif @@ -1775,7 +1812,7 @@ diff -Npur isl-0.22/isl_factorization.h isl/isl_factorization.h #endif diff -Npur isl-0.22/isl_farkas.c isl/isl_farkas.c --- isl-0.22/isl_farkas.c 2019-11-02 02:05:27.000000000 +0800 -+++ isl/isl_farkas.c 2021-10-08 14:40:05.228984442 +0800 ++++ isl/isl_farkas.c 2022-01-27 20:56:35.659289025 +0800 @@ -12,6 +12,9 @@ #include #include @@ -2380,7 +2417,7 @@ diff -Npur isl-0.22/isl_farkas.c isl/isl_farkas.c return NULL; diff -Npur isl-0.22/isl_map.c isl/isl_map.c --- isl-0.22/isl_map.c 2019-11-02 02:05:27.000000000 +0800 -+++ isl/isl_map.c 2021-10-08 14:40:05.232984408 +0800 ++++ isl/isl_map.c 2022-01-27 20:56:35.659289025 +0800 @@ -3162,6 +3162,133 @@ __isl_give isl_set *isl_set_drop_constra return isl_map_drop_constraints_not_involving_dims(set, type, first, n); } @@ -2582,9 +2619,40 @@ diff -Npur isl-0.22/isl_map.c isl/isl_map.c /* If "bmap" obviously lies on a hyperplane where the given dimension * has a fixed value, then return that value. * Otherwise return NaN. +diff -Npur isl-0.22/isl_mat.c isl/isl_mat.c +--- isl-0.22/isl_mat.c 2019-09-25 00:10:51.000000000 +0800 ++++ isl/isl_mat.c 2022-01-27 20:56:35.683289369 +0800 +@@ -884,6 +884,18 @@ error: + return NULL; + } + ++/* Return a basis for the orthogonal complement of the space spanned ++ * by the rows of "M". ++ * That is, each of the rows of the result is orthogonal to each ++ * of the rows of "M". ++ * ++ * The complement is derived as the transpose of the right kernel. ++ */ ++__isl_give isl_mat *isl_mat_row_complement(__isl_take isl_mat *mat) ++{ ++ return isl_mat_transpose(isl_mat_right_kernel(mat)); ++} ++ + __isl_give isl_mat *isl_mat_lin_to_aff(__isl_take isl_mat *mat) + { + int i; +diff -Npur isl-0.22/isl_mat_private.h isl/isl_mat_private.h +--- isl-0.22/isl_mat_private.h 2019-11-02 02:05:27.000000000 +0800 ++++ isl/isl_mat_private.h 2022-01-27 20:56:35.663289083 +0800 +@@ -66,4 +66,5 @@ int isl_mat_get_element(__isl_keep isl_m + __isl_give isl_mat *isl_mat_set_element(__isl_take isl_mat *mat, + int row, int col, isl_int v); + ++__isl_give isl_mat *isl_mat_row_complement(__isl_take isl_mat *mat); + #endif diff -Npur isl-0.22/isl_multi_templ.c isl/isl_multi_templ.c --- isl-0.22/isl_multi_templ.c 2019-11-02 02:05:27.000000000 +0800 -+++ isl/isl_multi_templ.c 2021-10-08 14:40:05.232984408 +0800 ++++ isl/isl_multi_templ.c 2022-01-27 20:56:35.659289025 +0800 @@ -258,6 +258,25 @@ __isl_give MULTI(BASE) *FN(FN(MULTI(BASE return FN(MULTI(BASE),set_at)(multi, pos, el); } @@ -2613,7 +2681,7 @@ diff -Npur isl-0.22/isl_multi_templ.c isl/isl_multi_templ.c * directly or through its domain. It therefore passes along both, diff -Npur isl-0.22/isl_options.c isl/isl_options.c --- isl-0.22/isl_options.c 2019-09-25 00:10:51.000000000 +0800 -+++ isl/isl_options.c 2021-10-08 14:48:47.000000000 +0800 ++++ isl/isl_options.c 2022-01-27 20:56:35.663289083 +0800 @@ -137,10 +137,19 @@ ISL_ARG_BOOL(struct isl_options, coalesc ISL_ARG_BOOL(struct isl_options, coalesce_preserve_locals, 0, "coalesce-preserve-locals", 0, @@ -2666,7 +2734,7 @@ diff -Npur isl-0.22/isl_options.c isl/isl_options.c ISL_CTX_GET_INT_DEF(isl_options, struct isl_options, isl_options_args, diff -Npur isl-0.22/isl_options_private.h isl/isl_options_private.h --- isl-0.22/isl_options_private.h 2019-09-25 00:10:51.000000000 +0800 -+++ isl/isl_options_private.h 2021-10-08 14:40:05.232984408 +0800 ++++ isl/isl_options_private.h 2022-01-27 20:56:35.659289025 +0800 @@ -34,9 +34,11 @@ struct isl_options { int convex; @@ -2681,7 +2749,7 @@ diff -Npur isl-0.22/isl_options_private.h isl/isl_options_private.h int schedule_outer_coincidence; diff -Npur isl-0.22/isl_polynomial.c isl/isl_polynomial.c --- isl-0.22/isl_polynomial.c 2019-11-02 02:05:27.000000000 +0800 -+++ isl/isl_polynomial.c 2021-10-08 14:40:05.232984408 +0800 ++++ isl/isl_polynomial.c 2022-01-27 20:56:35.659289025 +0800 @@ -4756,6 +4756,32 @@ static __isl_give isl_pw_qpolynomial *co return isl_pw_qpolynomial_alloc(isl_set_from_basic_set(bset), qp); } @@ -2783,8 +2851,66 @@ diff -Npur isl-0.22/isl_polynomial.c isl/isl_polynomial.c return NULL; diff -Npur isl-0.22/isl_schedule_constraints.c isl/isl_schedule_constraints.c --- isl-0.22/isl_schedule_constraints.c 2019-11-02 02:05:27.000000000 +0800 -+++ isl/isl_schedule_constraints.c 2021-10-08 14:40:05.232984408 +0800 -@@ -157,6 +157,19 @@ error: ++++ isl/isl_schedule_constraints.c 2022-01-27 20:56:35.663289083 +0800 +@@ -11,6 +11,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -21,6 +22,10 @@ + * + * "context" specifies extra constraints on the parameters. + * ++ * "prefix" specifies an outer schedule within which the schedule ++ * should be computed. A zero-dimensional "prefix" means that ++ * there is no such outer schedule. ++ * + * "validity" constraints map domain elements i to domain elements + * that should be scheduled after i. (Hard constraint) + * "proximity" constraints map domain elements i to domains elements +@@ -42,6 +47,7 @@ struct isl_schedule_constraints { + isl_union_set *domain; + isl_set *context; + ++ isl_multi_union_pw_aff *prefix; + isl_union_map *constraint[isl_edge_last + 1]; + }; + +@@ -59,7 +65,8 @@ __isl_give isl_schedule_constraints *isl + + sc_copy->domain = isl_union_set_copy(sc->domain); + sc_copy->context = isl_set_copy(sc->context); +- if (!sc_copy->domain || !sc_copy->context) ++ sc_copy->prefix = isl_multi_union_pw_aff_copy(sc->prefix); ++ if (!sc_copy->domain || !sc_copy->context || !sc_copy->prefix) + return isl_schedule_constraints_free(sc_copy); + + for (i = isl_edge_first; i <= isl_edge_last; ++i) { +@@ -98,6 +105,11 @@ static __isl_give isl_schedule_constrain + space = isl_union_set_get_space(sc->domain); + if (!sc->context) + sc->context = isl_set_universe(isl_space_copy(space)); ++ if (!sc->prefix) { ++ isl_space *space_prefix; ++ space_prefix = isl_space_set_from_params(isl_space_copy(space)); ++ sc->prefix = isl_multi_union_pw_aff_zero(space_prefix); ++ } + empty = isl_union_map_empty(space); + for (i = isl_edge_first; i <= isl_edge_last; ++i) { + if (sc->constraint[i]) +@@ -108,7 +120,7 @@ static __isl_give isl_schedule_constrain + } + isl_union_map_free(empty); + +- if (!sc->domain || !sc->context) ++ if (!sc->domain || !sc->context || !sc->prefix) + return isl_schedule_constraints_free(sc); + + return sc; +@@ -157,6 +169,19 @@ error: return NULL; } @@ -2804,10 +2930,363 @@ diff -Npur isl-0.22/isl_schedule_constraints.c isl/isl_schedule_constraints.c /* Replace the context of "sc" by "context". */ __isl_give isl_schedule_constraints *isl_schedule_constraints_set_context( +@@ -237,6 +262,25 @@ isl_schedule_constraints_set_conditional + return sc; + } + ++/* Replace the schedule prefix of "sc" by "prefix". ++ */ ++__isl_give isl_schedule_constraints *isl_schedule_constraints_set_prefix( ++ __isl_take isl_schedule_constraints *sc, ++ __isl_take isl_multi_union_pw_aff *prefix) ++{ ++ if (!sc || !prefix) ++ goto error; ++ ++ isl_multi_union_pw_aff_free(sc->prefix); ++ sc->prefix = prefix; ++ ++ return sc; ++error: ++ isl_schedule_constraints_free(sc); ++ isl_multi_union_pw_aff_free(prefix); ++ return NULL; ++} ++ + __isl_null isl_schedule_constraints *isl_schedule_constraints_free( + __isl_take isl_schedule_constraints *sc) + { +@@ -247,6 +291,7 @@ __isl_null isl_schedule_constraints *isl + + isl_union_set_free(sc->domain); + isl_set_free(sc->context); ++ isl_multi_union_pw_aff_free(sc->prefix); + for (i = isl_edge_first; i <= isl_edge_last; ++i) + isl_union_map_free(sc->constraint[i]); + +@@ -335,6 +380,17 @@ isl_schedule_constraints_get_conditional + return isl_schedule_constraints_get(sc, isl_edge_condition); + } + ++/* Return the schedule prefix of "sc". ++ */ ++__isl_give isl_multi_union_pw_aff *isl_schedule_constraints_get_prefix( ++ __isl_keep isl_schedule_constraints *sc) ++{ ++ if (!sc) ++ return NULL; ++ ++ return isl_multi_union_pw_aff_copy(sc->prefix); ++} ++ + /* Add "c" to the constraints of type "type" in "sc". + */ + __isl_give isl_schedule_constraints *isl_schedule_constraints_add( +@@ -426,6 +482,7 @@ __isl_give isl_schedule_constraints *isl + __isl_take isl_schedule_constraints *sc, + __isl_take isl_union_map *umap) + { ++ int n; + enum isl_edge_type i; + + if (!sc || !umap) +@@ -439,7 +496,10 @@ __isl_give isl_schedule_constraints *isl + goto error; + } + sc->domain = isl_union_set_apply(sc->domain, umap); +- if (!sc->domain) ++ n = isl_multi_union_pw_aff_dim(sc->prefix, isl_dim_set); ++ sc->prefix = isl_multi_union_pw_aff_drop_dims(sc->prefix, ++ isl_dim_set, 0, n); ++ if (!sc->domain || !sc->prefix) + return isl_schedule_constraints_free(sc); + + return sc; +@@ -463,6 +523,7 @@ enum isl_sc_key { + isl_sc_key_proximity = isl_edge_proximity, + isl_sc_key_domain, + isl_sc_key_context, ++ isl_sc_key_prefix, + isl_sc_key_end + }; + +@@ -477,6 +538,7 @@ static char *key_str[] = { + [isl_sc_key_proximity] = "proximity", + [isl_sc_key_domain] = "domain", + [isl_sc_key_context] = "context", ++ [isl_sc_key_prefix] = "prefix", + }; + + #undef BASE +@@ -512,6 +574,25 @@ static __isl_give isl_printer *print_con + return p; + } + ++/* Print a key, value pair for the schedule prefix. ++ * ++ * If the schedule prefix is zero-dimensional, then ++ * it is not printed since a zero-dimensional prefix is the default. ++ */ ++static __isl_give isl_printer *print_prefix(__isl_take isl_printer *p, ++ __isl_keep isl_schedule_constraints *sc) ++{ ++ if (isl_multi_union_pw_aff_dim(sc->prefix, isl_dim_set) == 0) ++ return p; ++ ++ p = isl_printer_print_str(p, key_str[isl_sc_key_prefix]); ++ p = isl_printer_yaml_next(p); ++ p = isl_printer_print_multi_union_pw_aff(p, sc->prefix); ++ p = isl_printer_yaml_next(p); ++ ++ return p; ++} ++ + /* Print "sc" to "p" + * + * In particular, print the isl_schedule_constraints object as a YAML document. +@@ -540,6 +621,7 @@ __isl_give isl_printer *isl_printer_prin + p = print_constraint(p, sc, isl_edge_coincidence); + p = print_constraint(p, sc, isl_edge_condition); + p = print_constraint(p, sc, isl_edge_conditional_validity); ++ p = print_prefix(p, sc); + p = isl_printer_yaml_end_mapping(p); + + return p; +@@ -569,6 +651,10 @@ __isl_give isl_printer *isl_printer_prin + #define BASE union_map + #include "read_in_string_templ.c" + ++#undef BASE ++#define BASE multi_union_pw_aff ++#include "read_in_string_templ.c" ++ + /* Read an isl_schedule_constraints object from "s". + * + * Start off with an empty (invalid) isl_schedule_constraints object and +@@ -595,6 +681,7 @@ __isl_give isl_schedule_constraints *isl + isl_set *context; + isl_union_set *domain; + isl_union_map *constraints; ++ isl_multi_union_pw_aff *prefix; + + key = get_key(s); + if (isl_stream_yaml_next(s) < 0) +@@ -616,6 +703,12 @@ __isl_give isl_schedule_constraints *isl + if (!sc) + return NULL; + break; ++ case isl_sc_key_prefix: ++ prefix = read_multi_union_pw_aff(s); ++ sc = isl_schedule_constraints_set_prefix(sc, prefix); ++ if (!sc) ++ return NULL; ++ break; + case isl_sc_key_validity: + case isl_sc_key_coincidence: + case isl_sc_key_condition: +@@ -695,15 +788,20 @@ isl_schedule_constraints_align_params(__ + space = isl_space_align_params(space, + isl_union_map_get_space(sc->constraint[i])); + ++ space = isl_space_align_params(space, ++ isl_multi_union_pw_aff_get_space(sc->prefix)); ++ + for (i = isl_edge_first; i <= isl_edge_last; ++i) { + sc->constraint[i] = isl_union_map_align_params( + sc->constraint[i], isl_space_copy(space)); + if (!sc->constraint[i]) + space = isl_space_free(space); + } ++ sc->prefix = isl_multi_union_pw_aff_align_params(sc->prefix, ++ isl_space_copy(space)); + sc->context = isl_set_align_params(sc->context, isl_space_copy(space)); + sc->domain = isl_union_set_align_params(sc->domain, space); +- if (!sc->context || !sc->domain) ++ if (!sc->context || !sc->domain || !sc->prefix) + return isl_schedule_constraints_free(sc); + + return sc; +diff -Npur isl-0.22/isl_schedule_node.c isl/isl_schedule_node.c +--- isl-0.22/isl_schedule_node.c 2019-09-25 00:10:51.000000000 +0800 ++++ isl/isl_schedule_node.c 2022-01-27 20:56:35.663289083 +0800 +@@ -18,6 +18,7 @@ + #include + #include + #include ++#include + + /* Create a new schedule node in the given schedule, point at the given + * tree with given ancestors and child positions. +@@ -2792,6 +2793,27 @@ __isl_give isl_schedule_node *isl_schedu + return node; + } + ++/* Replace the subtree that "pos" points to by the one that "node" points to. ++ */ ++__isl_give isl_schedule_node *isl_schedule_node_graft( ++ __isl_take isl_schedule_node *pos, __isl_take isl_schedule_node *node) ++{ ++ isl_schedule_tree *tree; ++ ++ if (!pos || !node) ++ goto error; ++ ++ tree = isl_schedule_tree_copy(node->tree); ++ pos = isl_schedule_node_graft_tree(pos, tree); ++ isl_schedule_node_free(node); ++ ++ return pos; ++error: ++ isl_schedule_node_free(pos); ++ isl_schedule_node_free(node); ++ return NULL; ++} ++ + /* Internal data structure for the group_ancestor callback. + * + * If "finished" is set, then we no longer need to modify +@@ -4852,6 +4874,85 @@ __isl_give isl_schedule_node *isl_schedu + return isl_schedule_node_ancestor(node1, n1 - i); + } + ++/* Reschedule the subtree that "node" points to using ++ * the schedule constraints "sc" in the case where "node" ++ * points to the root of the schedule tree. ++ * ++ * In this case, "node" does not contain any useful information and ++ * a schedule is constructed directly from "sc". ++ */ ++static __isl_give isl_schedule_node *complete_schedule( ++ __isl_take isl_schedule_node *node, ++ __isl_take isl_schedule_constraints *sc) ++{ ++ isl_schedule *schedule; ++ ++ isl_schedule_node_free(node); ++ ++ schedule = isl_schedule_constraints_compute_schedule(sc); ++ node = isl_schedule_get_root(schedule); ++ isl_schedule_free(schedule); ++ ++ return node; ++} ++ ++/* Restrict the domain of "sc" to the domain elements reaching "node". ++ * The original domain is required to include all those elements. ++ */ ++static __isl_give isl_schedule_constraints *restrict_domain( ++ __isl_take isl_schedule_constraints *sc, ++ __isl_keep isl_schedule_node *node) ++{ ++ isl_bool valid; ++ isl_union_set *node_domain, *sc_domain; ++ ++ node_domain = isl_schedule_node_get_domain(node); ++ sc_domain = isl_schedule_constraints_get_domain(sc); ++ valid = isl_union_set_is_subset(node_domain, sc_domain); ++ sc = isl_schedule_constraints_intersect_domain(sc, node_domain); ++ isl_union_set_free(sc_domain); ++ ++ if (valid < 0) ++ return isl_schedule_constraints_free(sc); ++ if (!valid) ++ isl_die(isl_schedule_node_get_ctx(node), isl_error_invalid, ++ "invalid schedule constraints domain", ++ return isl_schedule_constraints_free(sc)); ++ return sc; ++} ++ ++/* Reschedule the subtree that "node" points to using ++ * the schedule constraints "sc". ++ * ++ * Restrict the domain of "sc" to the domain elements at "node", ++ * compute a schedule taking into account the prefix schedule at "node" and ++ * replace the subtree by the result. ++ * ++ * If "node" points to the root of a schedule tree, then no part ++ * of the schedule tree survives and a fresh schedule is computed instead. ++ */ ++__isl_give isl_schedule_node *isl_schedule_node_schedule( ++ __isl_take isl_schedule_node *node, ++ __isl_take isl_schedule_constraints *sc) ++{ ++ isl_multi_union_pw_aff *prefix; ++ isl_schedule *schedule; ++ isl_schedule_node *root; ++ ++ if (isl_schedule_node_get_type(node) == isl_schedule_node_domain) ++ return complete_schedule(node, sc); ++ ++ sc = restrict_domain(sc, node); ++ ++ prefix = isl_schedule_node_get_prefix_schedule_multi_union_pw_aff(node); ++ sc = isl_schedule_constraints_set_prefix(sc, prefix); ++ schedule = isl_schedule_constraints_compute_schedule(sc); ++ root = isl_schedule_get_root(schedule); ++ isl_schedule_free(schedule); ++ ++ return isl_schedule_node_graft(node, isl_schedule_node_child(root, 0)); ++} ++ + /* Print "node" to "p". + */ + __isl_give isl_printer *isl_printer_print_schedule_node( +diff -Npur isl-0.22/isl_schedule_node_private.h isl/isl_schedule_node_private.h +--- isl-0.22/isl_schedule_node_private.h 2019-09-25 00:10:51.000000000 +0800 ++++ isl/isl_schedule_node_private.h 2022-01-27 20:56:35.655288968 +0800 +@@ -33,6 +33,8 @@ struct isl_schedule_node { + __isl_give isl_schedule_node *isl_schedule_node_alloc( + __isl_take isl_schedule *schedule, __isl_take isl_schedule_tree *tree, + __isl_take isl_schedule_tree_list *ancestors, int *child_pos); ++__isl_give isl_schedule_node *isl_schedule_node_graft( ++ __isl_take isl_schedule_node *pos, __isl_take isl_schedule_node *node); + __isl_give isl_schedule_node *isl_schedule_node_graft_tree( + __isl_take isl_schedule_node *pos, __isl_take isl_schedule_tree *tree); + diff -Npur isl-0.22/isl_scheduler.c isl/isl_scheduler.c --- isl-0.22/isl_scheduler.c 2019-11-02 02:05:27.000000000 +0800 -+++ isl/isl_scheduler.c 2021-10-08 14:53:44.000000000 +0800 -@@ -73,6 +73,7 @@ ++++ isl/isl_scheduler.c 2022-01-27 20:56:35.659289025 +0800 +@@ -50,6 +50,39 @@ + */ + + ++/* Extract the linear part, i.e., the coefficients of the input variables ++ * and the local variables (if any), from the affine expression "ma". ++ */ ++static __isl_give isl_mat *extract_linear(__isl_take isl_multi_aff *ma) ++{ ++ int i, n, n_param, n_var, n_div; ++ isl_ctx *ctx; ++ isl_mat *rows; ++ ++ ma = isl_multi_aff_align_divs(ma); ++ if (!ma) ++ return NULL; ++ ++ ctx = isl_multi_aff_get_ctx(ma); ++ n = isl_multi_aff_dim(ma, isl_dim_out); ++ n_param = isl_multi_aff_dim(ma, isl_dim_param); ++ n_var = isl_multi_aff_dim(ma, isl_dim_in); ++ n_div = 0; ++ if (n > 0) ++ n_div = isl_aff_dim(ma->u.p[0], isl_dim_div); ++ rows = isl_mat_alloc(ctx, n, n_var + n_div); ++ if (!rows) ++ goto error; ++ for (i = 0; i < n; ++i) ++ isl_seq_cpy(rows->row[i], ++ ma->u.p[i]->v->el + 1 + 1 + n_param, n_var + n_div); ++ isl_multi_aff_free(ma); ++ return rows; ++error: ++ isl_multi_aff_free(ma); ++ return NULL; ++} ++ + /* Internal information about a node that is used during the construction + * of a schedule. + * space represents the original space in which the domain lives; +@@ -73,6 +106,7 @@ * nvar is the dimension of the (compressed) domain * nparam is the number of parameters or 0 if we are not constructing * a parametric schedule @@ -2815,7 +3294,7 @@ diff -Npur isl-0.22/isl_scheduler.c isl/isl_scheduler.c * * If compressed is set, then hull represents the constraints * that were used to derive the compression, while compress and -@@ -115,6 +116,7 @@ struct isl_sched_node { +@@ -115,6 +149,7 @@ struct isl_sched_node { int start; int nvar; int nparam; @@ -2823,7 +3302,345 @@ diff -Npur isl-0.22/isl_scheduler.c isl/isl_scheduler.c int scc; int cluster; -@@ -1928,30 +1930,53 @@ static isl_size coef_var_offset(__isl_ke +@@ -316,6 +351,10 @@ static int is_multi_edge_type(struct isl + * is the original, uncompressed dependence relation, while + * the value is the dual of the compressed dependence relation. + * ++ * prefix is the schedule prefix specified by the user. ++ * This field may be NULL if no (non-trivial) schedule prefix ++ * was specified. ++ * + * n is the number of nodes + * node is the list of nodes + * maxvar is the maximal number of variables over all nodes +@@ -363,6 +402,8 @@ struct isl_sched_graph { + isl_map_to_basic_set *intra_hmap_param; + isl_map_to_basic_set *inter_hmap; + ++ isl_multi_union_pw_aff *prefix; ++ + struct isl_sched_node *node; + int n; + int maxvar; +@@ -755,6 +796,7 @@ static void graph_free(isl_ctx *ctx, str + isl_hash_table_free(ctx, graph->edge_table[i]); + isl_hash_table_free(ctx, graph->node_table); + isl_basic_set_free(graph->lp); ++ isl_multi_union_pw_aff_free(graph->prefix); + } + + /* For each "set" on which this function is called, increment +@@ -781,13 +823,17 @@ static isl_stat init_n_maxvar(__isl_take + * for each basic map in the dependences. + * Note that it is practically impossible to exhaust both + * the number of dependences and the number of variables. ++ * If any prefix schedule was specified, then the initial rows ++ * are initialized from this prefix. Since the prefix may be ++ * completely trivial, it needs to be taken into account separately. + */ + static isl_stat compute_max_row(struct isl_sched_graph *graph, + __isl_keep isl_schedule_constraints *sc) + { +- int n_edge; ++ int n_prefix, n_edge; + isl_stat r; + isl_union_set *domain; ++ isl_multi_union_pw_aff *prefix; + + graph->n = 0; + graph->maxvar = 0; +@@ -796,10 +842,15 @@ static isl_stat compute_max_row(struct i + isl_union_set_free(domain); + if (r < 0) + return isl_stat_error; ++ prefix = isl_schedule_constraints_get_prefix(sc); ++ n_prefix = isl_multi_union_pw_aff_dim(prefix, isl_dim_set); ++ isl_multi_union_pw_aff_free(prefix); ++ if (!prefix) ++ return isl_stat_error; + n_edge = isl_schedule_constraints_n_basic_map(sc); + if (n_edge < 0) + return isl_stat_error; +- graph->max_row = n_edge + graph->maxvar; ++ graph->max_row = n_prefix + n_edge + graph->maxvar; + + return isl_stat_ok; + } +@@ -1241,6 +1292,30 @@ error: + return isl_stat_error; + } + ++/* Return a map in the same space as that of "map" that relates ++ * the elements with equal schedule prefix. ++ * Use the original schedule prefix specified by the user and ++ * not the linear information extracted from it for the purpose ++ * of avoiding redundant rows in the generated schedule. ++ */ ++static __isl_give isl_map *extract_equal_prefix(struct isl_sched_graph *graph, ++ __isl_keep isl_map *map) ++{ ++ isl_space *space; ++ isl_union_map *umap; ++ isl_multi_union_pw_aff *prefix; ++ ++ space = isl_map_get_space(map); ++ map = isl_map_universe(isl_space_copy(space)); ++ umap = isl_union_map_from_map(map); ++ prefix = isl_multi_union_pw_aff_copy(graph->prefix); ++ umap = isl_union_map_eq_at_multi_union_pw_aff(umap, prefix); ++ map = isl_union_map_extract_map(umap, space); ++ isl_union_map_free(umap); ++ ++ return map; ++} ++ + /* Add a new node to the graph representing the given set. + * + * If any of the set variables is defined by an equality, then +@@ -1475,6 +1550,11 @@ static isl_stat skip_edge(__isl_take isl + * This ensures that there are no schedule constraints defined + * outside of these domains, while the scheduler no longer has + * any control over those outside parts. ++ * ++ * If a (non-trivial) prefix schedule was specified by the user, ++ * then only retain dependences between instances with equal ++ * prefix values. If the specified prefix schedule was incomplete, ++ * then this may result in the removal of all dependences. + */ + static isl_stat extract_edge(__isl_take isl_map *map, void *user) + { +@@ -1496,6 +1576,14 @@ static isl_stat extract_edge(__isl_take + } + } + ++ if (graph->prefix) { ++ isl_map *equal_prefix; ++ equal_prefix = extract_equal_prefix(graph, map); ++ if (tagged) ++ tagged = map_intersect_domains(tagged, equal_prefix); ++ map = isl_map_intersect(map, equal_prefix); ++ } ++ + src = find_domain_node(ctx, graph, map); + dst = find_range_node(ctx, graph, map); + +@@ -1551,6 +1639,206 @@ error: + return isl_stat_error; + } + ++/* Extract (a basis for) the purely linear part of "ma", ++ * i.e., the coefficients of the input variables but not the local variables. ++ * ++ * There may be linear combinations of the elements of "ma" ++ * that do not involve local variables, while the elements themselves ++ * do involve local variables. ++ * Perform Gaussian elimination to remove local variables from ++ * as many rows as possible and subsequently remove the remaining rows ++ * involving local variables as well as the columns corresponding ++ * to the local variables. ++ */ ++static __isl_give isl_mat *extract_pure_linear(__isl_take isl_multi_aff *ma) ++{ ++ int i, n, n_var, n_div; ++ isl_mat *rows; ++ ++ if (!ma) ++ return NULL; ++ ++ n_var = isl_multi_aff_dim(ma, isl_dim_in); ++ ++ rows = extract_linear(ma); ++ rows = isl_mat_reverse_gauss(rows); ++ ++ if (!rows) ++ return NULL; ++ ++ n = isl_mat_rows(rows); ++ n_div = isl_mat_cols(rows) - n_var; ++ for (i = n - 1; i >= 0; --i) ++ if (isl_seq_first_non_zero(rows->row[i] + n_var, n_div) == -1) ++ break; ++ rows = isl_mat_drop_rows(rows, i + 1, n - (i + 1)); ++ rows = isl_mat_drop_cols(rows, n_var, n_div); ++ ++ return rows; ++} ++ ++/* Extend "complement" with the complement of the purely linear part of "ma". ++ */ ++static isl_stat extend_prefix_complement(__isl_take isl_set *dom, ++ __isl_take isl_multi_aff *ma, void *user) ++{ ++ isl_mat *complement_ma; ++ isl_mat **complement = user; ++ ++ isl_set_free(dom); ++ ++ complement_ma = isl_mat_row_complement(extract_pure_linear(ma)); ++ ++ *complement = isl_mat_concat(*complement, complement_ma); ++ ++ return *complement ? isl_stat_ok : isl_stat_error; ++} ++ ++/* Extract a linear prefix schedule from "pma" that is valid ++ * for all pieces. ++ * In particular, if there are multiple pieces, then the result ++ * contains linear combinations that have a fixed value in all pieces. ++ * That is, if there is a direction that is not fixed in one or more pieces, ++ * then it is also not fixed by the entire piecewise expression. ++ * A direction that is not fixed needs to have a component along ++ * the orthogonal complement of the fixed directions. ++ * Collect these orthogonal complements over all pieces and ++ * compute the complement of the result to obtain the desired directions. ++ * ++ * If "pma" is empty (which indicates a missing, and therefore invalid, ++ * prefix schedule), then the result will contain a basis for all directions, ++ * being the complement of an empty complement. ++ */ ++static __isl_give isl_mat *extract_prefix_pw_multi_aff( ++ __isl_keep isl_pw_multi_aff *pma) ++{ ++ int nvar; ++ isl_ctx *ctx; ++ isl_mat *complement; ++ ++ if (!pma) ++ return NULL; ++ ++ ctx = isl_pw_multi_aff_get_ctx(pma); ++ nvar = isl_pw_multi_aff_dim(pma, isl_dim_in); ++ complement = isl_mat_alloc(ctx, 0, nvar); ++ ++ if (isl_pw_multi_aff_foreach_piece(pma, &extend_prefix_complement, ++ &complement) < 0) ++ complement = isl_mat_free(complement); ++ ++ return isl_mat_row_complement(complement); ++} ++ ++/* Extract a prefix schedule for "node" from "mupa" and add ++ * it to node->sched. ++ * ++ * "mupa" is formulated in terms of the original (uncompressed) spaces, ++ * while node->sched is formulated in terms of the potentially compressed ++ * space. If "node" is compressed, then the expression corresponding ++ * to "node" therefore needs to be transformed first. ++ * ++ * The prefix stored in node->sched is only used to avoid linearly ++ * dependent schedule rows from being generated. Only the linear ++ * part of the prefix is therefore relevant. Use zero for ++ * the coefficients of the constant term and the parameters. ++ * The extracted linear part may have fewer rows than "mupa", ++ * either because of linear dependences or because some element ++ * of "mupa" involve local variables. ++ * Extend the number of rows of the linear part to the number ++ * of elements in "mupa" to ensure that all nodes have the same ++ * number of rows. ++ * ++ * If "mupa" does not contain a prefix schedule for "node", ++ * then it is invalid. In the current implementation, this will ++ * cause the scheduler to not construct any further schedule rows ++ * for "node". ++ */ ++static isl_stat extract_prefix(struct isl_sched_node *node, ++ __isl_keep isl_multi_union_pw_aff *mupa) ++{ ++ isl_space *space; ++ isl_multi_pw_aff *mpa; ++ isl_pw_multi_aff *pma; ++ isl_mat *prefix; ++ int n, n_prefix; ++ ++ if (!mupa) ++ return isl_stat_error; ++ ++ space = isl_space_copy(node->space); ++ mpa = isl_multi_union_pw_aff_extract_multi_pw_aff(mupa, space); ++ ++ if (node->compress) ++ mpa = isl_multi_pw_aff_pullback_pw_multi_aff(mpa, ++ isl_pw_multi_aff_copy(node->decompress)); ++ ++ pma = isl_pw_multi_aff_from_multi_pw_aff(mpa); ++ ++ prefix = extract_prefix_pw_multi_aff(pma); ++ ++ prefix = isl_mat_insert_zero_cols(prefix, 0, 1 + node->nparam); ++ n_prefix = isl_multi_union_pw_aff_dim(mupa, isl_dim_set); ++ n = isl_mat_rows(prefix); ++ prefix = isl_mat_add_zero_rows(prefix, n_prefix - n); ++ node->sched = isl_mat_concat(node->sched, prefix); ++ ++ isl_pw_multi_aff_free(pma); ++ ++ if (!node->sched) ++ return isl_stat_error; ++ ++ return isl_stat_ok; ++} ++ ++/* Check if any (non-trivial) prefix schedule was specified in "sc". ++ * If so, store a copy in "graph" for later simplification ++ * of dependence relations and extract the linear parts ++ * in the respective nodes. ++ * These linear parts are considered as an initial outer band. ++ * Their only effect is to try and prevent rows in the generated schedule ++ * from being linear combinations of the prefix. ++ * ++ * Since the prefix schedule cannot be assumed to be linearly ++ * independent on all nodes, graph->n_row is not incremented. ++ * Note that the ranks of the nodes will get updated regardless and ++ * graph->maxvar is computed based on these ranks. The test for ++ * whether more schedule rows are required in compute_schedule_wcc ++ * therefore does take the prefix into account. ++ * ++ * The prefix schedule specified by the user is required to ++ * be complete on the domain. An invalid prefix will result ++ * in nodes being essentially removed from consideration. ++ */ ++static isl_stat handle_prefix(struct isl_sched_graph *graph, ++ __isl_keep isl_schedule_constraints *sc) ++{ ++ int i; ++ int n; ++ isl_multi_union_pw_aff *mupa; ++ ++ mupa = isl_schedule_constraints_get_prefix(sc); ++ if (!mupa) ++ return isl_stat_error; ++ n = isl_multi_union_pw_aff_dim(mupa, isl_dim_set); ++ if (n == 0) { ++ isl_multi_union_pw_aff_free(mupa); ++ return isl_stat_ok; ++ } ++ ++ graph->prefix = mupa; ++ ++ for (i = 0; i < graph->n; ++i) { ++ if (extract_prefix(&graph->node[i], mupa) < 0) ++ return isl_stat_error; ++ } ++ ++ graph->n_total_row = n; ++ graph->band_start = graph->n_total_row; ++ ++ return isl_stat_ok; ++} ++ + /* Initialize the schedule graph "graph" from the schedule constraints "sc". + * + * The context is included in the domain before the nodes of +@@ -1598,6 +1886,8 @@ static isl_stat graph_init(struct isl_sc + return isl_stat_error; + if (graph_init_table(ctx, graph) < 0) + return isl_stat_error; ++ if (handle_prefix(graph, sc) < 0) ++ return isl_stat_error; + for (i = isl_edge_first; i <= isl_edge_last; ++i) { + isl_size n; + +@@ -1928,30 +2218,53 @@ static isl_size coef_var_offset(__isl_ke return offset; } @@ -2881,7 +3698,7 @@ diff -Npur isl-0.22/isl_scheduler.c isl/isl_scheduler.c } /* Return the offset of the coefficients of the variables of "node" -@@ -1968,14 +1993,57 @@ static int node_var_coef_offset(struct i +@@ -1968,14 +2281,57 @@ static int node_var_coef_offset(struct i } /* Return the position of the pair of variables encoding @@ -2941,7 +3758,7 @@ diff -Npur isl-0.22/isl_scheduler.c isl/isl_scheduler.c } /* Construct an isl_dim_map for mapping constraints on coefficients -@@ -1991,9 +2059,6 @@ static int node_var_coef_pos(struct isl_ +@@ -1991,9 +2347,6 @@ static int node_var_coef_pos(struct isl_ * (0, -c_i_x^+ + c_i_x^-) if s = -1 or * (0, 0, c_i_x^+ - c_i_x^-) if s = 1 and * (0, 0, -c_i_x^+ + c_i_x^-) if s = -1. @@ -2951,7 +3768,7 @@ diff -Npur isl-0.22/isl_scheduler.c isl/isl_scheduler.c * * The caller can extend the mapping to also map the other coefficients * (and therefore not plug in 0). -@@ -2002,7 +2067,6 @@ static __isl_give isl_dim_map *intra_dim +@@ -2002,7 +2355,6 @@ static __isl_give isl_dim_map *intra_dim struct isl_sched_graph *graph, struct isl_sched_node *node, int offset, int s) { @@ -2959,7 +3776,7 @@ diff -Npur isl-0.22/isl_scheduler.c isl/isl_scheduler.c isl_size total; isl_dim_map *dim_map; -@@ -2010,10 +2074,8 @@ static __isl_give isl_dim_map *intra_dim +@@ -2010,10 +2362,8 @@ static __isl_give isl_dim_map *intra_dim if (!node || total < 0) return NULL; @@ -2971,7 +3788,7 @@ diff -Npur isl-0.22/isl_scheduler.c isl/isl_scheduler.c return dim_map; } -@@ -2032,9 +2094,6 @@ static __isl_give isl_dim_map *intra_dim +@@ -2032,9 +2382,6 @@ static __isl_give isl_dim_map *intra_dim * -(c_i_x^+ - c_i_x^-), c_j_x^+ - c_j_x^-) if s = 1 and * (-c_j_0 + c_i_0, -c_j_n + c_i_n, * c_i_x^+ - c_i_x^-, -(c_j_x^+ - c_j_x^-)) if s = -1. @@ -2981,7 +3798,7 @@ diff -Npur isl-0.22/isl_scheduler.c isl/isl_scheduler.c * * The caller can further extend the mapping. */ -@@ -2056,19 +2115,13 @@ static __isl_give isl_dim_map *inter_dim +@@ -2056,19 +2403,13 @@ static __isl_give isl_dim_map *inter_dim isl_dim_map_range(dim_map, pos, 0, 0, 0, 1, s); pos = node_par_coef_offset(dst); isl_dim_map_range(dim_map, pos, 1, 1, 1, dst->nparam, s); @@ -3003,7 +3820,7 @@ diff -Npur isl-0.22/isl_scheduler.c isl/isl_scheduler.c return dim_map; } -@@ -2717,7 +2770,7 @@ static int count_bound_coefficient_const +@@ -2717,7 +3058,7 @@ static int count_bound_coefficient_const return 0; for (i = 0; i < graph->n; ++i) @@ -3012,7 +3829,7 @@ diff -Npur isl-0.22/isl_scheduler.c isl/isl_scheduler.c return 0; } -@@ -2788,6 +2841,17 @@ static isl_stat node_add_coefficient_con +@@ -2788,6 +3129,17 @@ static isl_stat node_add_coefficient_con if (isl_int_is_neg(node->max->el[i])) continue; @@ -3030,7 +3847,7 @@ diff -Npur isl-0.22/isl_scheduler.c isl/isl_scheduler.c isl_int_set_si(ineq->el[pos], 1); isl_int_set_si(ineq->el[pos + 1], -1); isl_int_set(ineq->el[0], node->max->el[i]); -@@ -2842,6 +2906,63 @@ static isl_stat add_bound_coefficient_co +@@ -2842,6 +3194,63 @@ static isl_stat add_bound_coefficient_co return isl_stat_ok; } @@ -3094,7 +3911,7 @@ diff -Npur isl-0.22/isl_scheduler.c isl/isl_scheduler.c /* Add a constraint to graph->lp that equates the value at position * "sum_pos" to the sum of the "n" values starting at "first". */ -@@ -2916,7 +3037,7 @@ static isl_stat add_var_sum_constraint(s +@@ -2916,7 +3325,7 @@ static isl_stat add_var_sum_constraint(s struct isl_sched_node *node = &graph->node[i]; int pos = 1 + node_var_coef_offset(node); @@ -3103,7 +3920,7 @@ diff -Npur isl-0.22/isl_scheduler.c isl/isl_scheduler.c isl_int_set_si(graph->lp->eq[k][pos + j], 1); } -@@ -2975,7 +3096,7 @@ static isl_stat setup_lp(isl_ctx *ctx, s +@@ -2975,7 +3384,7 @@ static isl_stat setup_lp(isl_ctx *ctx, s if (node_update_vmap(node) < 0) return isl_stat_error; node->start = total; @@ -3112,7 +3929,7 @@ diff -Npur isl-0.22/isl_scheduler.c isl/isl_scheduler.c } if (count_constraints(graph, &n_eq, &n_ineq, use_coincidence) < 0) -@@ -2984,6 +3105,9 @@ static isl_stat setup_lp(isl_ctx *ctx, s +@@ -2984,6 +3393,9 @@ static isl_stat setup_lp(isl_ctx *ctx, s return isl_stat_error; if (count_bound_coefficient_constraints(ctx, graph, &n_eq, &n_ineq) < 0) return isl_stat_error; @@ -3122,7 +3939,7 @@ diff -Npur isl-0.22/isl_scheduler.c isl/isl_scheduler.c space = isl_space_set_alloc(ctx, 0, total); isl_basic_set_free(graph->lp); -@@ -3001,6 +3125,8 @@ static isl_stat setup_lp(isl_ctx *ctx, s +@@ -3001,6 +3413,8 @@ static isl_stat setup_lp(isl_ctx *ctx, s return isl_stat_error; if (add_bound_coefficient_constraints(ctx, graph) < 0) return isl_stat_error; @@ -3131,7 +3948,7 @@ diff -Npur isl-0.22/isl_scheduler.c isl/isl_scheduler.c if (add_all_validity_constraints(graph, use_coincidence) < 0) return isl_stat_error; if (add_all_proximity_constraints(graph, use_coincidence) < 0) -@@ -3059,14 +3185,16 @@ static int needs_row(struct isl_sched_gr +@@ -3059,14 +3473,16 @@ static int needs_row(struct isl_sched_gr /* Construct a non-triviality region with triviality directions * corresponding to the rows of "indep". @@ -3153,7 +3970,7 @@ diff -Npur isl-0.22/isl_scheduler.c isl/isl_scheduler.c { isl_ctx *ctx; isl_mat *mat; -@@ -3079,14 +3207,19 @@ static __isl_give isl_mat *construct_tri +@@ -3079,14 +3495,19 @@ static __isl_give isl_mat *construct_tri return NULL; ctx = isl_mat_get_ctx(indep); @@ -3177,7 +3994,7 @@ diff -Npur isl-0.22/isl_scheduler.c isl/isl_scheduler.c } } -@@ -3112,7 +3245,7 @@ static __isl_give isl_vec *solve_lp(isl_ +@@ -3112,7 +3533,7 @@ static __isl_give isl_vec *solve_lp(isl_ graph->region[i].pos = node_var_coef_offset(node); if (needs_row(graph, node)) @@ -3186,7 +4003,7 @@ diff -Npur isl-0.22/isl_scheduler.c isl/isl_scheduler.c else trivial = isl_mat_zero(ctx, 0, 0); graph->region[i].trivial = trivial; -@@ -3130,8 +3263,9 @@ static __isl_give isl_vec *solve_lp(isl_ +@@ -3130,8 +3551,9 @@ static __isl_give isl_vec *solve_lp(isl_ * Each schedule coefficient c_i_x is represented as the difference * between two non-negative variables c_i_x^+ - c_i_x^-. * The c_i_x^- appear before their c_i_x^+ counterpart. @@ -3198,7 +4015,7 @@ diff -Npur isl-0.22/isl_scheduler.c isl/isl_scheduler.c * * Return c_i_x = c_i_x^+ - c_i_x^- */ -@@ -3148,10 +3282,14 @@ static __isl_give isl_vec *extract_var_c +@@ -3148,10 +3570,14 @@ static __isl_give isl_vec *extract_var_c if (!csol) return NULL; @@ -3217,7 +4034,110 @@ diff -Npur isl-0.22/isl_scheduler.c isl/isl_scheduler.c return csol; } -@@ -4412,7 +4550,7 @@ static isl_stat setup_carry_lp(isl_ctx * +@@ -3306,14 +3732,17 @@ static __isl_give isl_multi_aff *node_ex + * The result is defined over the uncompressed node domain. + */ + static __isl_give isl_multi_aff *node_extract_schedule_multi_aff( +- struct isl_sched_node *node) ++ struct isl_sched_graph *graph, struct isl_sched_node *node) + { + isl_size nrow; ++ int start; + + nrow = isl_mat_rows(node->sched); + if (nrow < 0) + return NULL; +- return node_extract_partial_schedule_multi_aff(node, 0, nrow); ++ start = graph->band_start; ++ nrow -= start; ++ return node_extract_partial_schedule_multi_aff(node, start, nrow); + } + + /* Convert node->sched into a map and return this map. +@@ -3322,12 +3751,13 @@ static __isl_give isl_multi_aff *node_ex + * whenever node->sched is updated. + * It is defined over the uncompressed node domain. + */ +-static __isl_give isl_map *node_extract_schedule(struct isl_sched_node *node) ++static __isl_give isl_map *node_extract_schedule( ++ struct isl_sched_graph *graph, struct isl_sched_node *node) + { + if (!node->sched_map) { + isl_multi_aff *ma; + +- ma = node_extract_schedule_multi_aff(node); ++ ma = node_extract_schedule_multi_aff(graph, node); + node->sched_map = isl_map_from_multi_aff(ma); + } + +@@ -3342,13 +3772,13 @@ static __isl_give isl_map *node_extract_ + * This is not the most efficient way, but this shouldn't be a critical + * operation. + */ +-static __isl_give isl_map *specializer(struct isl_sched_node *src, +- struct isl_sched_node *dst) ++static __isl_give isl_map *specializer(struct isl_sched_graph *graph, ++ struct isl_sched_node *src, struct isl_sched_node *dst) + { + isl_map *src_sched, *dst_sched; + +- src_sched = node_extract_schedule(src); +- dst_sched = node_extract_schedule(dst); ++ src_sched = node_extract_schedule(graph, src); ++ dst_sched = node_extract_schedule(graph, dst); + return isl_map_apply_range(src_sched, isl_map_reverse(dst_sched)); + } + +@@ -3386,7 +3816,7 @@ static isl_stat update_edge(isl_ctx *ctx + int empty; + isl_map *id; + +- id = specializer(edge->src, edge->dst); ++ id = specializer(graph, edge->src, edge->dst); + edge->map = isl_map_intersect(edge->map, isl_map_copy(id)); + if (!edge->map) + goto error; +@@ -3460,7 +3890,8 @@ static int range_intersects(__isl_keep i + * + * In other words, is the condition false? + */ +-static int is_condition_false(struct isl_sched_edge *edge) ++static int is_condition_false(struct isl_sched_graph *graph, ++ struct isl_sched_edge *edge) + { + isl_union_map *umap; + isl_map *map, *sched, *test; +@@ -3475,9 +3906,9 @@ static int is_condition_false(struct isl + umap = isl_union_set_unwrap(isl_union_map_domain(umap)); + map = isl_map_from_union_map(umap); + +- sched = node_extract_schedule(edge->src); ++ sched = node_extract_schedule(graph, edge->src); + map = isl_map_apply_domain(map, sched); +- sched = node_extract_schedule(edge->dst); ++ sched = node_extract_schedule(graph, edge->dst); + map = isl_map_apply_range(map, sched); + + test = isl_map_identity(isl_map_get_space(map)); +@@ -3561,7 +3992,7 @@ static int update_edges(isl_ctx *ctx, st + continue; + if (is_local(&graph->edge[i])) + continue; +- local = is_condition_false(&graph->edge[i]); ++ local = is_condition_false(graph, &graph->edge[i]); + if (local < 0) + goto error; + if (local) +@@ -3699,7 +4130,7 @@ static isl_stat copy_nodes(struct isl_sc + dst->node[j].nvar = src->node[i].nvar; + dst->node[j].nparam = src->node[i].nparam; + dst->node[j].sched = isl_mat_copy(src->node[i].sched); +- dst->node[j].sched_map = isl_map_copy(src->node[i].sched_map); ++ dst->node[j].sched_map = NULL; + dst->node[j].coincident = src->node[i].coincident; + dst->node[j].sizes = isl_multi_val_copy(src->node[i].sizes); + dst->node[j].bounds = isl_basic_set_copy(src->node[i].bounds); +@@ -4412,7 +4843,7 @@ static isl_stat setup_carry_lp(isl_ctx * for (i = 0; i < graph->n; ++i) { struct isl_sched_node *node = &graph->node[graph->sorted[i]]; node->start = total; @@ -3226,7 +4146,16 @@ diff -Npur isl-0.22/isl_scheduler.c isl/isl_scheduler.c } if (count_all_constraints(intra, inter, &n_eq, &n_ineq) < 0) -@@ -5806,6 +5944,17 @@ static __isl_give isl_schedule_node *com +@@ -5676,7 +6107,7 @@ static int has_adjacent_true_conditions( + + set_local(&graph->edge[i]); + +- local = is_condition_false(&graph->edge[i]); ++ local = is_condition_false(graph, &graph->edge[i]); + if (local < 0) + return -1; + if (!local) +@@ -5806,6 +6237,17 @@ static __isl_give isl_schedule_node *com return isl_schedule_node_free(node); if (isl_options_get_schedule_outer_coincidence(ctx)) return carry_coincidence(node, graph); @@ -3244,7 +4173,7 @@ diff -Npur isl-0.22/isl_scheduler.c isl/isl_scheduler.c return carry_dependences(node, graph); } -@@ -5846,7 +5995,7 @@ static __isl_give isl_schedule_node *com +@@ -5846,7 +6288,7 @@ static __isl_give isl_schedule_node *com * Since there are only a finite number of dependences, * there will only be a finite number of iterations. */ @@ -3253,7 +4182,7 @@ diff -Npur isl-0.22/isl_scheduler.c isl/isl_scheduler.c struct isl_sched_graph *graph) { int has_coincidence; -@@ -5907,6 +6056,67 @@ static isl_stat compute_schedule_wcc_ban +@@ -5907,6 +6349,67 @@ static isl_stat compute_schedule_wcc_ban return isl_stat_ok; } @@ -3323,7 +4252,7 @@ diff -Npur isl-0.22/isl_scheduler.c isl/isl_scheduler.c * diff -Npur isl-0.22/isl_space.c isl/isl_space.c --- isl-0.22/isl_space.c 2019-11-02 02:05:27.000000000 +0800 -+++ isl/isl_space.c 2021-10-08 14:40:05.232984408 +0800 ++++ isl/isl_space.c 2022-01-27 20:56:35.663289083 +0800 @@ -599,6 +599,20 @@ error: return NULL; }