Ai
21 Star 49 Fork 0

Gitee 极速下载/julia-language

加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
文件
此仓库是为了提升国内下载速度的镜像仓库,每日同步一次。 原始仓库: https://github.com/JuliaLang/julia
克隆/下载
aotcompile.cpp 112.93 KB
一键复制 编辑 原始数据 按行查看 历史
Yichao Yu 提交于 2025-12-12 02:50 +08:00 . (Mostly) Trivial part of LLVM 21 support (#60356)
12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591259225932594259525962597259825992600260126022603260426052606260726082609261026112612261326142615261626172618261926202621262226232624262526262627262826292630263126322633263426352636263726382639264026412642264326442645264626472648264926502651265226532654265526562657265826592660
// This file is a part of Julia. License is MIT: https://julialang.org/license
#include "llvm-version.h"
#include "platform.h"
// target support
#include <llvm/TargetParser/Triple.h>
#include "llvm/Support/CodeGen.h"
#include <llvm/ADT/Statistic.h>
#include <llvm/Analysis/TargetLibraryInfo.h>
#include <llvm/Analysis/TargetTransformInfo.h>
#include <llvm/IR/DataLayout.h>
#include <llvm/MC/TargetRegistry.h>
#include <llvm/Target/TargetMachine.h>
// analysis passes
#include <llvm/Analysis/Passes.h>
#include <llvm/IR/IRBuilder.h>
#include <llvm/IR/PassManager.h>
#include <llvm/IR/Verifier.h>
#include <llvm/Transforms/Utils/ModuleUtils.h>
#include <llvm/Passes/PassBuilder.h>
#include <llvm/Passes/PassPlugin.h>
#if defined(USE_POLLY)
#include <polly/RegisterPasses.h>
#include <polly/LinkAllPasses.h>
#include <polly/CodeGen/CodegenCleanup.h>
#if defined(USE_POLLY_ACC)
#include <polly/Support/LinkGPURuntime.h>
#endif
#endif
// for outputting code
#include <llvm/Bitcode/BitcodeWriter.h>
#include <llvm/Bitcode/BitcodeWriterPass.h>
#include <llvm/Bitcode/BitcodeReader.h>
#include "llvm/Object/ArchiveWriter.h"
#include <llvm/IR/IRPrintingPasses.h>
#include <llvm/IR/LegacyPassManagers.h>
#include <llvm/Transforms/Utils/Cloning.h>
#include <llvm/Support/FormatAdapters.h>
#include <llvm/Linker/Linker.h>
using namespace llvm;
#include <zstd.h>
#include "jitlayers.h"
#include "serialize.h"
#include "julia_assert.h"
#include "processor.h"
#define DEBUG_TYPE "julia_aotcompile"
STATISTIC(CreateNativeCalls, "Number of jl_create_native calls made");
STATISTIC(CreateNativeMethods, "Number of methods compiled for jl_create_native");
STATISTIC(CreateNativeMax, "Max number of methods compiled at once for jl_create_native");
STATISTIC(CreateNativeGlobals, "Number of globals compiled for jl_create_native");
static void addComdat(GlobalValue *G, Triple &T)
{
if (T.isOSBinFormatCOFF() && !G->isDeclaration()) {
// add __declspec(dllexport) to everything marked for export
assert(G->hasExternalLinkage() && "Cannot set DLLExport on non-external linkage!");
G->setDLLStorageClass(GlobalValue::DLLExportStorageClass);
}
}
typedef struct {
orc::ThreadSafeModule M;
SmallVector<GlobalValue*, 0> jl_sysimg_fvars;
SmallVector<GlobalValue*, 0> jl_sysimg_gvars;
std::map<jl_code_instance_t*, std::tuple<uint32_t, uint32_t>> jl_fvar_map;
SmallVector<void*, 0> jl_value_to_llvm;
SmallVector<jl_code_instance_t*, 0> jl_external_to_llvm;
} jl_native_code_desc_t;
extern "C" JL_DLLEXPORT_CODEGEN
void jl_get_function_id_impl(void *native_code, jl_code_instance_t *codeinst,
int32_t *func_idx, int32_t *specfunc_idx)
{
jl_native_code_desc_t *data = (jl_native_code_desc_t*)native_code;
if (data) {
// get the function index in the fvar lookup table
auto it = data->jl_fvar_map.find(codeinst);
if (it != data->jl_fvar_map.end()) {
std::tie(*func_idx, *specfunc_idx) = it->second;
}
}
}
extern "C" JL_DLLEXPORT_CODEGEN void
jl_get_llvm_cis_impl(void *native_code, size_t *num_elements, jl_code_instance_t **data)
{
jl_native_code_desc_t *desc = (jl_native_code_desc_t *)native_code;
auto &map = desc->jl_fvar_map;
if (data == NULL) {
*num_elements = map.size();
return;
}
assert(*num_elements == map.size());
size_t i = 0;
for (auto &ci : map) {
data[i++] = ci.first;
}
}
// get the list of global variables managed by the compiler
extern "C" JL_DLLEXPORT_CODEGEN void jl_get_llvm_gvs_impl(void *native_code,
size_t *num_elements, void **data)
{
jl_native_code_desc_t *desc = (jl_native_code_desc_t *)native_code;
auto &gvars = desc->jl_sysimg_gvars;
if (data == NULL) {
*num_elements = gvars.size();
return;
}
assert(*num_elements == gvars.size());
memcpy(data, gvars.data(), *num_elements * sizeof(void *));
}
// get the initializer values (jl_value_t or jl_binding_t ptr) of managed global variables
extern "C" JL_DLLEXPORT_CODEGEN void jl_get_llvm_gv_inits_impl(void *native_code,
size_t *num_elements,
void **data)
{
jl_native_code_desc_t *desc = (jl_native_code_desc_t *)native_code;
auto &inits = desc->jl_value_to_llvm;
if (data == NULL) {
*num_elements = inits.size();
return;
}
assert(*num_elements == inits.size());
memcpy(data, inits.data(), *num_elements * sizeof(void *));
}
extern "C" JL_DLLEXPORT_CODEGEN void jl_get_llvm_external_fns_impl(void *native_code,
size_t *num_elements,
jl_code_instance_t *data)
{
jl_native_code_desc_t *desc = (jl_native_code_desc_t *)native_code;
auto &external_map = desc->jl_external_to_llvm;
if (data == NULL) {
*num_elements = external_map.size();
return;
}
assert(*num_elements == external_map.size());
memcpy((void *)data, (const void *)external_map.data(),
*num_elements * sizeof(jl_code_instance_t *));
}
extern "C" JL_DLLEXPORT_CODEGEN
LLVMOrcThreadSafeModuleRef jl_get_llvm_module_impl(void *native_code)
{
jl_native_code_desc_t *data = (jl_native_code_desc_t*)native_code;
if (data)
return wrap(&data->M);
else
return NULL;
}
extern "C" JL_DLLEXPORT_CODEGEN
GlobalValue* jl_get_llvm_function_impl(void *native_code, uint32_t idx)
{
jl_native_code_desc_t *data = (jl_native_code_desc_t*)native_code;
if (data)
return data->jl_sysimg_fvars[idx];
else
return NULL;
}
template<typename T>
static inline SmallVector<T*, 0> consume_gv(Module &M, const char *name, bool allow_bad_fvars)
{
// Get information about sysimg export functions from the two global variables.
// Strip them from the Module so that it's easier to handle the uses.
GlobalVariable *gv = M.getGlobalVariable(name);
assert(gv && gv->hasInitializer());
ArrayType *Ty = cast<ArrayType>(gv->getInitializer()->getType());
unsigned nele = Ty->getArrayNumElements();
SmallVector<T*, 0> res(nele);
ConstantArray *ary = nullptr;
if (gv->getInitializer()->isNullValue()) {
for (unsigned i = 0; i < nele; ++i)
res[i] = cast<T>(Constant::getNullValue(Ty->getArrayElementType()));
}
else {
ary = cast<ConstantArray>(gv->getInitializer());
unsigned i = 0;
while (i < nele) {
llvm::Value *val = ary->getOperand(i)->stripPointerCasts();
if (allow_bad_fvars && (!isa<T>(val) || (isa<Function>(val) && cast<Function>(val)->isDeclaration()))) {
// Shouldn't happen in regular use, but can happen in bugpoint.
nele--;
continue;
}
res[i++] = cast<T>(val);
}
res.resize(nele);
}
assert(gv->use_empty());
gv->eraseFromParent();
if (ary && ary->use_empty())
ary->destroyConstant();
return res;
}
static Constant *get_ptrdiff32(Type *T_size, Constant *ptr, Constant *base)
{
if (ptr->getType()->isPointerTy())
ptr = ConstantExpr::getPtrToInt(ptr, T_size);
auto ptrdiff = ConstantExpr::getSub(ptr, base);
return T_size->getPrimitiveSizeInBits() > 32 ? ConstantExpr::getTrunc(ptrdiff, Type::getInt32Ty(ptr->getContext())) : ptrdiff;
}
static Constant *emit_offset_table(Module &M, Type *T_size, ArrayRef<Constant*> vars,
StringRef name, StringRef suffix)
{
auto T_int32 = Type::getInt32Ty(M.getContext());
uint32_t nvars = vars.size();
ArrayType *vars_type = ArrayType::get(T_int32, nvars + 1);
auto gv = new GlobalVariable(M, vars_type, true,
GlobalVariable::ExternalLinkage,
nullptr,
name + "_offsets" + suffix);
auto vbase = ConstantExpr::getPtrToInt(gv, T_size);
SmallVector<Constant*, 0> offsets(nvars + 1);
offsets[0] = ConstantInt::get(T_int32, nvars);
for (uint32_t i = 0; i < nvars; i++)
offsets[i + 1] = get_ptrdiff32(T_size, vars[i], vbase);
gv->setInitializer(ConstantArray::get(vars_type, offsets));
gv->setVisibility(GlobalValue::HiddenVisibility);
gv->setDSOLocal(true);
return vbase;
}
static void emit_table(Module &mod, ArrayRef<GlobalValue*> vars,
StringRef name, Type *T_psize)
{
// Emit a global variable with all the variable addresses.
size_t nvars = vars.size();
SmallVector<Constant*, 0> addrs(nvars);
for (size_t i = 0; i < nvars; i++) {
Constant *var = vars[i];
addrs[i] = ConstantExpr::getBitCast(var, T_psize);
}
ArrayType *vars_type = ArrayType::get(T_psize, nvars);
auto GV = new GlobalVariable(mod, vars_type, true,
GlobalVariable::ExternalLinkage,
ConstantArray::get(vars_type, addrs),
name);
GV->setVisibility(GlobalValue::HiddenVisibility);
GV->setDSOLocal(true);
}
static bool is_safe_char(unsigned char c)
{
return ('0' <= c && c <= '9') ||
('A' <= c && c <= 'Z') ||
('a' <= c && c <= 'z') ||
(c == '_' || c == '$') ||
(c >= 128 && c < 255);
}
static const char hexchars[16] = {
'0', '1', '2', '3', '4', '5', '6', '7',
'8', '9', 'A', 'B', 'C', 'D', 'E', 'F' };
static const char *const common_names[256] = {
// 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, a, b, c, d, e, f
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x00
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x10
"SP", "NOT", "DQT", "YY", 0, "REM", "AND", "SQT", // 0x20
"LPR", "RPR", "MUL", "SUM", 0, "SUB", "DOT", "DIV", // 0x28
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, "COL", 0, "LT", "EQ", "GT", "QQ", // 0x30
"AT", 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x40
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, "LBR", "RDV", "RBR", "POW", 0, // 0x50
"TIC", 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x60
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, "LCR", "OR", "RCR", "TLD", "DEL", // 0x70
0 }; // remainder is filled with zeros, though are also all safe characters
// reversibly removes special characters from the name of GlobalObjects,
// which might cause them to be treated special by LLVM or the system linker
// the only non-identifier characters we allow to appear are '.' and '$',
// and all of UTF-8 above code-point 128 (except 255)
// most are given "friendly" abbreviations
// the remaining few will print as hex
// e.g. mangles "llvm.a≠a$a!a##" as "llvmDOT.a≠a$aNOT.aYY.YY."
static void makeSafeName(GlobalObject &G)
{
StringRef Name = G.getName();
SmallVector<char, 32> SafeName;
for (unsigned char c : Name.bytes()) {
if (is_safe_char(c)) {
SafeName.push_back(c);
}
else {
if (common_names[c]) {
SafeName.push_back(common_names[c][0]);
SafeName.push_back(common_names[c][1]);
if (common_names[c][2])
SafeName.push_back(common_names[c][2]);
}
else {
SafeName.push_back(hexchars[(c >> 4) & 0xF]);
SafeName.push_back(hexchars[c & 0xF]);
}
SafeName.push_back('.');
}
}
if (SafeName.size() != Name.size())
G.setName(StringRef(SafeName.data(), SafeName.size()));
}
namespace { // file-local namespace
class egal_set {
public:
jl_genericmemory_t *list = (jl_genericmemory_t*)jl_an_empty_memory_any;
jl_genericmemory_t *keyset = (jl_genericmemory_t*)jl_an_empty_memory_any;
egal_set(egal_set&) = delete;
egal_set(egal_set&&) = delete;
egal_set() = default;
void insert(jl_value_t *val)
{
jl_value_t *rval = jl_idset_get(list, keyset, val);
if (rval == NULL) {
ssize_t idx;
list = jl_idset_put_key(list, val, &idx);
keyset = jl_idset_put_idx(list, keyset, idx);
}
}
jl_value_t *get(jl_value_t *val)
{
return jl_idset_get(list, keyset, val);
}
};
}
using ::egal_set;
struct jl_compiled_function_t {
orc::ThreadSafeModule TSM;
jl_llvm_functions_t decls;
};
typedef DenseMap<jl_code_instance_t*, jl_compiled_function_t> jl_compiled_functions_t;
static void record_method_roots(egal_set &method_roots, jl_method_instance_t *mi)
{
jl_method_t *m = mi->def.method;
if (!jl_is_method(m))
return;
// the method might have a root for this already; use it if so
JL_LOCK(&m->writelock);
if (m->roots) {
size_t j, len = jl_array_dim0(m->roots);
for (j = 0; j < len; j++) {
jl_value_t *v = jl_array_ptr_ref(m->roots, j);
if (jl_is_globally_rooted(v))
continue;
method_roots.insert(v);
}
}
JL_UNLOCK(&m->writelock);
}
static void aot_optimize_roots(jl_codegen_params_t &params, egal_set &method_roots, jl_compiled_functions_t &compiled_functions)
{
for (size_t i = 0; i < jl_array_dim0(params.temporary_roots); i++) {
jl_value_t *val = jl_array_ptr_ref(params.temporary_roots, i);
auto ref = params.global_targets.find((void*)val);
if (ref == params.global_targets.end())
continue;
auto get_global_root = [val, &method_roots]() {
if (jl_is_globally_rooted(val))
return val;
jl_value_t *mval = method_roots.get(val);
if (mval)
return mval;
return jl_as_global_root(val, 1);
};
jl_value_t *mval = get_global_root();
if (mval != val) {
GlobalVariable *GV = ref->second;
params.global_targets.erase(ref);
auto mref = params.global_targets.find((void*)mval);
if (mref != params.global_targets.end()) {
// replace ref with mref in all Modules
std::string OldName(GV->getName());
StringRef NewName(mref->second->getName());
for (auto &def : compiled_functions) {
orc::ThreadSafeModule &TSM = def.second.TSM;
Module &M = *TSM.getModuleUnlocked();
if (GlobalValue *GV2 = M.getNamedValue(OldName)) {
if (GV2 == GV)
GV = nullptr;
// either replace or rename the old value to use the other equivalent name
if (GlobalValue *GV3 = M.getNamedValue(NewName)) {
GV2->replaceAllUsesWith(GV3);
GV2->eraseFromParent();
}
else {
GV2->setName(NewName);
}
}
}
assert(GV == nullptr);
}
else {
params.global_targets[(void*)mval] = GV;
}
}
}
}
static jl_compiled_functions_t::iterator get_ci_equiv_compiled(jl_code_instance_t *ci JL_PROPAGATES_ROOT, jl_compiled_functions_t &compiled_functions) JL_NOTSAFEPOINT
{
jl_value_t *def = ci->def;
jl_value_t *owner = ci->owner;
jl_value_t *rettype = ci->rettype;
size_t min_world = jl_atomic_load_relaxed(&ci->min_world);
size_t max_world = jl_atomic_load_relaxed(&ci->max_world);
for (auto it = compiled_functions.begin(), E = compiled_functions.end(); it != E; ++it) {
auto codeinst = it->first;
if (codeinst != ci &&
jl_atomic_load_relaxed(&codeinst->inferred) != NULL &&
jl_atomic_load_relaxed(&codeinst->min_world) <= min_world &&
jl_atomic_load_relaxed(&codeinst->max_world) >= max_world &&
jl_egal(codeinst->def, def) &&
jl_egal(codeinst->owner, owner) &&
jl_egal(codeinst->rettype, rettype)) {
return it;
}
}
return compiled_functions.end();
}
static void resolve_workqueue(jl_codegen_params_t &params, egal_set &method_roots, jl_compiled_functions_t &compiled_functions)
{
jl_workqueue_t workqueue;
std::swap(params.workqueue, workqueue);
jl_code_instance_t *codeinst = NULL;
JL_GC_PUSH1(&codeinst);
assert(!params.cache);
while (!workqueue.empty()) {
auto it = workqueue.pop_back_val();
codeinst = it.first;
auto &proto = it.second;
// try to emit code for this item from the workqueue
StringRef invokeName = "";
StringRef preal_decl = "";
bool preal_specsig = false;
{
auto it = compiled_functions.find(codeinst);
if (it == compiled_functions.end())
it = get_ci_equiv_compiled(codeinst, compiled_functions);
if (it != compiled_functions.end()) {
auto &decls = it->second.decls;
invokeName = decls.functionObject;
if (decls.functionObject == "jl_fptr_args") {
preal_decl = decls.specFunctionObject;
}
else if (decls.functionObject != "jl_fptr_sparam" && decls.functionObject != "jl_f_opaque_closure_call" && decls.functionObject != "jl_fptr_const_return") {
preal_decl = decls.specFunctionObject;
preal_specsig = true;
}
}
}
// patch up the prototype we emitted earlier
Module *mod = proto.decl->getParent();
assert(proto.decl->isDeclaration());
Function *pinvoke = nullptr;
if (preal_decl.empty() && jl_atomic_load_relaxed(&codeinst->invoke) == jl_fptr_const_return_addr) {
std::string gf_thunk_name = emit_abi_constreturn(mod, params, proto.specsig, codeinst);
preal_specsig = proto.specsig;
if (invokeName.empty())
invokeName = "jl_fptr_const_return";
preal_decl = mod->getNamedValue(gf_thunk_name)->getName();
}
if (preal_decl.empty()) {
pinvoke = emit_tojlinvoke(codeinst, invokeName, mod, params);
if (!proto.specsig) {
proto.decl->replaceAllUsesWith(pinvoke);
proto.decl->eraseFromParent();
proto.decl = pinvoke;
}
}
if (proto.specsig && !preal_specsig) {
// get or build an fptr1 that can invoke codeinst
if (pinvoke == nullptr)
pinvoke = get_or_emit_fptr1(preal_decl, mod);
// emit specsig-to-(jl)invoke conversion
proto.decl->setLinkage(GlobalVariable::InternalLinkage);
//protodecl->setAlwaysInline();
jl_init_function(proto.decl, params);
jl_method_instance_t *mi = jl_get_ci_mi(codeinst);
size_t nrealargs = jl_nparams(mi->specTypes); // number of actual arguments being passed
bool is_opaque_closure = jl_is_method(mi->def.value) && mi->def.method->is_for_opaque_closure;
// TODO: maybe this can be cached in codeinst->specfptr?
emit_specsig_to_fptr1(proto.decl, proto.cc, proto.return_roots, mi->specTypes, codeinst->rettype, is_opaque_closure, nrealargs, params, pinvoke);
preal_decl = ""; // no need to fixup the name
}
if (!preal_decl.empty()) {
// merge and/or rename this prototype to the real function
if (Function *specfun = cast_or_null<Function>(mod->getNamedValue(preal_decl))) {
if (proto.decl != specfun) {
proto.decl->replaceAllUsesWith(specfun);
proto.decl->eraseFromParent();
proto.decl = specfun;
}
}
else {
proto.decl->setName(preal_decl);
}
}
if (proto.oc) { // additionally, if we are dealing with an oc, then we might also need to fix up the fptr1 reference too
assert(proto.specsig);
StringRef ocinvokeDecl = invokeName;
// if OC expected a specialized specsig dispatch, but we don't have it, use the inner trampoline here too
// XXX: this invoke translation logic is supposed to exactly match new_opaque_closure
if (!preal_specsig || ocinvokeDecl == "jl_f_opaque_closure_call" || ocinvokeDecl == "jl_fptr_interpret_call" || ocinvokeDecl == "jl_fptr_const_return")
ocinvokeDecl = pinvoke->getName();
assert(!ocinvokeDecl.empty());
assert(ocinvokeDecl != "jl_fptr_args");
assert(ocinvokeDecl != "jl_fptr_const_return");
assert(ocinvokeDecl != "jl_fptr_sparam");
// merge and/or rename this prototype to the real function
if (Function *specfun = cast_or_null<Function>(mod->getNamedValue(ocinvokeDecl))) {
if (proto.oc != specfun) {
proto.oc->replaceAllUsesWith(specfun);
proto.oc->eraseFromParent();
proto.oc = specfun;
}
}
else {
proto.oc->setName(ocinvokeDecl);
}
}
workqueue.append(params.workqueue);
params.workqueue.clear();
}
JL_GC_POP();
}
/// Link the function in the source module into the destination module if
/// needed, setting up mapping information.
/// Similar to orc::cloneFunctionDecl, but more complete for greater correctness
Function *IRLinker_copyFunctionProto(Module *DstM, Function *SF) {
// If there is no linkage to be performed or we are linking from the source,
// bring SF over, if we haven't already.
if (SF->getParent() == DstM)
return SF;
if (auto *F = DstM->getNamedValue(SF->getName()))
return cast<Function>(F);
auto *F = Function::Create(SF->getFunctionType(), SF->getLinkage(),
SF->getAddressSpace(), SF->getName(), DstM);
F->copyAttributesFrom(SF);
#if JL_LLVM_VERSION < 210000
F->IsNewDbgInfoFormat = SF->IsNewDbgInfoFormat;
#endif
// Remove these copied constants since they point to the source module.
F->setPersonalityFn(nullptr);
F->setPrefixData(nullptr);
F->setPrologueData(nullptr);
return F;
}
static Function *aot_abi_converter(jl_codegen_params_t &params, Module *M, jl_abi_t from_abi, jl_code_instance_t *codeinst, Module *defM, StringRef func, StringRef specfunc, bool target_specsig)
{
std::string gf_thunk_name;
if (!specfunc.empty()) {
Value *llvmtarget = IRLinker_copyFunctionProto(M, defM->getFunction(specfunc));
gf_thunk_name = emit_abi_converter(M, params, from_abi, codeinst, llvmtarget, target_specsig);
}
else {
Value *llvmtarget = func.empty() ? nullptr : IRLinker_copyFunctionProto(M, defM->getFunction(func));
gf_thunk_name = emit_abi_dispatcher(M, params, from_abi, codeinst, llvmtarget);
}
auto F = M->getFunction(gf_thunk_name);
assert(F);
return F;
}
static void generate_cfunc_thunks(jl_codegen_params_t &params, jl_compiled_functions_t &compiled_functions)
{
DenseMap<jl_method_instance_t*, jl_code_instance_t*> compiled_mi;
for (auto &def : compiled_functions) {
jl_code_instance_t *this_code = def.first;
jl_method_instance_t *mi = jl_get_ci_mi(this_code);
if (this_code->owner == jl_nothing && jl_atomic_load_relaxed(&this_code->max_world) == ~(size_t)0 && this_code->def == (jl_value_t*)mi)
compiled_mi[mi] = this_code;
}
size_t latestworld = jl_atomic_load_acquire(&jl_world_counter);
for (cfunc_decl_t &cfunc : params.cfuncs) {
Module *M = cfunc.cfuncdata->getParent();
jl_value_t *sigt = cfunc.abi.sigt;
JL_GC_PROMISE_ROOTED(sigt);
jl_value_t *declrt = cfunc.abi.rt;
JL_GC_PROMISE_ROOTED(declrt);
Function *unspec = aot_abi_converter(params, M, cfunc.abi, nullptr, nullptr, "", "", false);
jl_code_instance_t *codeinst = nullptr;
auto assign_fptr = [&params, &cfunc, &codeinst, &unspec](Function *f) {
ConstantArray *init = cast<ConstantArray>(cfunc.cfuncdata->getInitializer());
SmallVector<Constant*,8> initvals;
for (unsigned i = 0; i < init->getNumOperands(); ++i)
initvals.push_back(init->getOperand(i));
assert(initvals.size() == 8);
assert(initvals[0]->isNullValue());
assert(initvals[2]->isNullValue());
if (codeinst) {
Constant *llvmcodeinst = literal_pointer_val_slot(params, f->getParent(), (jl_value_t*)codeinst);
initvals[2] = llvmcodeinst; // plast_codeinst
}
assert(initvals[4]->isNullValue());
initvals[4] = unspec;
initvals[0] = f;
cfunc.cfuncdata->setInitializer(ConstantArray::get(init->getType(), initvals));
};
Module *defM = nullptr;
StringRef func;
jl_method_instance_t *mi = (jl_method_instance_t*)jl_get_specialization1((jl_tupletype_t*)sigt, latestworld, 0);
if ((jl_value_t*)mi != jl_nothing) {
auto it = compiled_mi.find(mi);
if (it != compiled_mi.end()) {
codeinst = it->second;
JL_GC_PROMISE_ROOTED(codeinst);
auto defs = compiled_functions.find(codeinst);
defM = defs->second.TSM.getModuleUnlocked();
const jl_llvm_functions_t &decls = defs->second.decls;
func = decls.functionObject;
StringRef specfunc = decls.specFunctionObject;
jl_value_t *astrt = codeinst->rettype;
if (astrt != (jl_value_t*)jl_bottom_type &&
jl_type_intersection(astrt, declrt) == jl_bottom_type) {
// Do not warn if the function never returns since it is
// occasionally required by the C API (typically error callbacks)
// even though we're likely to encounter memory errors in that case
jl_printf(JL_STDERR, "WARNING: cfunction: return type of %s does not match\n", name_from_method_instance(mi));
}
if (func == "jl_fptr_const_return") {
std::string gf_thunk_name = emit_abi_constreturn(M, params, cfunc.abi, codeinst->rettype_const);
auto F = M->getFunction(gf_thunk_name);
assert(F);
assign_fptr(F);
continue;
}
else if (func == "jl_fptr_args") {
assert(!specfunc.empty());
if (!cfunc.abi.specsig && jl_subtype(astrt, declrt)) {
assign_fptr(IRLinker_copyFunctionProto(M, defM->getFunction(specfunc)));
continue;
}
assign_fptr(aot_abi_converter(params, M, cfunc.abi, codeinst, defM, func, specfunc, false));
continue;
}
else if (func == "jl_fptr_sparam" || func == "jl_f_opaque_closure_call") {
func = ""; // use jl_invoke instead for these, since we don't declare these prototypes
}
else {
assert(!specfunc.empty());
if (jl_egal(mi->specTypes, sigt) && jl_egal(declrt, astrt)) {
assign_fptr(IRLinker_copyFunctionProto(M, defM->getFunction(specfunc)));
continue;
}
assign_fptr(aot_abi_converter(params, M, cfunc.abi, codeinst, defM, func, specfunc, true));
continue;
}
}
}
Function *f = codeinst ? aot_abi_converter(params, M, cfunc.abi, codeinst, defM, func, "", false) : unspec;
assign_fptr(f);
}
}
// destructively move the contents of src into dest
// this assumes that the targets of the two modules are the same
// including the DataLayout and ModuleFlags (for example)
// and that there is no module-level assembly
// Comdat is also removed, since this needs to be re-added later
static void jl_merge_module(Linker &L, orc::ThreadSafeModule srcTSM) JL_NOTSAFEPOINT
{
srcTSM.consumingModuleDo([&L](std::unique_ptr<Module> src) JL_NOTSAFEPOINT {
bool error = L.linkInModule(std::move(src));
assert(!error && "linking llvmcall modules failed");
(void)error;
});
}
static bool canPartition(const Function &F)
{
return !F.hasFnAttribute(Attribute::AlwaysInline) &&
!F.hasFnAttribute(Attribute::InlineHint);
}
// this builds the object file portion of the sysimage files for fast startup
// `external_linkage` create linkages between pkgimages.
extern "C" JL_DLLEXPORT_CODEGEN
void *jl_create_native_impl(LLVMOrcThreadSafeModuleRef llvmmod, int trim, int external_linkage, size_t world,
jl_array_t *mod_array, jl_array_t *worklist, int all, jl_array_t *module_init_order)
{
JL_TIMING(INFERENCE, INFERENCE);
auto ct = jl_current_task;
if (!jl_compile_and_emit_func) {
jl_error("inference not available for generating compiled output");
}
bool timed = (ct->reentrant_timing & 1) == 0;
if (timed)
ct->reentrant_timing |= 1;
uint64_t compiler_start_time = 0;
uint8_t measure_compile_time_enabled = jl_atomic_load_relaxed(&jl_measure_compile_time_enabled);
if (measure_compile_time_enabled)
compiler_start_time = jl_hrtime();
jl_value_t **fargs;
JL_GC_PUSHARGS(fargs, 8);
#ifdef _P64
jl_value_t *jl_array_ulong_type = jl_array_uint64_type;
#else
jl_value_t *jl_array_ulong_type = jl_array_uint32_type;
#endif
jl_array_t *worlds = jl_alloc_array_1d(jl_array_ulong_type, 2);
fargs[0] = jl_compile_and_emit_func;
fargs[1] = (jl_value_t*)worlds;
jl_array_data(worlds, size_t)[0] = jl_typeinf_world;
int compiler_world = 1;
if (trim || jl_array_data(worlds, size_t)[0] == 0 || external_linkage)
compiler_world = 0;
jl_array_data(worlds, size_t)[compiler_world] = world; // might overwrite previous
worlds->dimsize[0] = 1 + compiler_world;
fargs[2] = jl_box_uint8(trim);
fargs[3] = jl_box_bool(external_linkage);
fargs[4] = worklist ? (jl_value_t*)worklist : jl_nothing; // worklist (or nothing)
fargs[5] = mod_array ? (jl_value_t*)mod_array : jl_nothing; // mod_array (or nothing)
fargs[6] = jl_box_bool(all);
fargs[7] = module_init_order ? (jl_value_t*)module_init_order : jl_nothing; // module_init_order (or nothing)
size_t last_age = ct->world_age;
ct->world_age = jl_typeinf_world;
fargs[0] = jl_apply(fargs, 8);
fargs[1] = fargs[2] = fargs[3] = fargs[4] = fargs[5] = fargs[6] = fargs[7] = NULL;
ct->world_age = last_age;
jl_value_t *codeinfos = fargs[0];
JL_TYPECHK(jl_create_native, array_any, codeinfos);
void *data = jl_emit_native((jl_array_t*)codeinfos, llvmmod, NULL, external_linkage ? 1 : 0);
JL_GC_POP();
// move everything inside, now that we've merged everything
// (before adding the exported headers)
((jl_native_code_desc_t*)data)->M.withModuleDo([&](Module &M) {
auto TT = Triple(M.getTargetTriple());
Function *juliapersonality_func = nullptr;
if (TT.isOSWindows() && TT.getArch() == Triple::x86_64) {
// setting the function personality enables stack unwinding and catching exceptions
// so make sure everything has something set
Type *T_int32 = Type::getInt32Ty(M.getContext());
juliapersonality_func = Function::Create(FunctionType::get(T_int32, true),
Function::ExternalLinkage, "__julia_personality", M);
juliapersonality_func->setDLLStorageClass(GlobalValue::DLLImportStorageClass);
}
for (GlobalObject &G : M.global_objects()) {
if (!G.isDeclaration()) {
G.setLinkage(GlobalValue::InternalLinkage);
G.setDSOLocal(true);
makeSafeName(G);
if (Function *F = dyn_cast<Function>(&G)) {
if (juliapersonality_func) {
// Add unwind exception personalities to functions to handle async exceptions
F->setPersonalityFn(juliapersonality_func);
}
}
}
}
});
if (timed) {
if (measure_compile_time_enabled) {
auto end = jl_hrtime();
jl_atomic_fetch_add_relaxed(&jl_cumulative_compile_time, end - compiler_start_time);
}
ct->reentrant_timing &= ~1ull;
}
return data;
}
// also be used be extern consumers like GPUCompiler.jl to obtain a module containing
// all reachable & inferrrable functions.
extern "C" JL_DLLEXPORT_CODEGEN
void *jl_emit_native_impl(jl_array_t *codeinfos, LLVMOrcThreadSafeModuleRef llvmmod, const jl_cgparams_t *cgparams, int external_linkage)
{
JL_TIMING(NATIVE_AOT, NATIVE_Create);
++CreateNativeCalls;
CreateNativeMax.updateMax(jl_array_nrows(codeinfos));
if (cgparams == NULL)
cgparams = &jl_default_cgparams;
jl_cgparams_t target_cgparams = *cgparams;
target_cgparams.sanitize_memory = jl_options.target_sanitize_memory;
target_cgparams.sanitize_thread = jl_options.target_sanitize_thread;
target_cgparams.sanitize_address = jl_options.target_sanitize_address;
jl_native_code_desc_t *data = new jl_native_code_desc_t;
orc::ThreadSafeContext ctx;
orc::ThreadSafeModule backing;
if (!llvmmod) {
ctx = jl_ExecutionEngine->makeContext();
backing = jl_create_ts_module("text", ctx, jl_ExecutionEngine->getDataLayout(), jl_ExecutionEngine->getTargetTriple());
}
orc::ThreadSafeModule &clone = llvmmod ? *unwrap(llvmmod) : backing;
auto ctxt = clone.getContext();
// compile all methods for the current world and type-inference world
auto target_info = clone.withModuleDo([&](Module &M) {
return std::make_pair(M.getDataLayout(), Triple(M.getTargetTriple()));
});
egal_set method_roots;
jl_codegen_params_t params(ctxt, std::move(target_info.first), std::move(target_info.second));
if (!llvmmod)
params.getContext().setDiscardValueNames(true);
params.params = &target_cgparams;
assert(params.imaging_mode); // `_imaging_mode` controls if broken features like code-coverage are disabled
params.external_linkage = external_linkage;
params.temporary_roots = jl_alloc_array_1d(jl_array_any_type, 0);
bool safepoint_on_entry = params.safepoint_on_entry;
JL_GC_PUSH3(&params.temporary_roots, &method_roots.list, &method_roots.keyset);
jl_compiled_functions_t compiled_functions;
size_t i, l;
for (i = 0, l = jl_array_nrows(codeinfos); i < l; i++) {
// each item in this list is either a CodeInstance followed by a CodeInfo indicating something
// to compile, or a rettype followed by a sig describing a C-callable alias to create.
jl_value_t *item = jl_array_ptr_ref(codeinfos, i);
if (jl_is_code_instance(item)) {
// now add it to our compilation results
jl_code_instance_t *codeinst = (jl_code_instance_t*)item;
jl_code_info_t *src = (jl_code_info_t*)jl_array_ptr_ref(codeinfos, ++i);
assert(jl_is_code_info(src));
if (compiled_functions.count(codeinst))
continue; // skip any duplicates that accidentally made there way in here (or make this an error?)
if (jl_ir_inlining_cost((jl_value_t*)src) < UINT16_MAX)
params.safepoint_on_entry = false; // ensure we don't block ExpandAtomicModifyPass from inlining this code if applicable
orc::ThreadSafeModule result_m = jl_create_ts_module(name_from_method_instance(jl_get_ci_mi(codeinst)),
params.tsctx, clone.getModuleUnlocked()->getDataLayout(),
Triple(clone.getModuleUnlocked()->getTargetTriple()));
jl_llvm_functions_t decls;
if (!(params.params->force_emit_all) && jl_atomic_load_relaxed(&codeinst->invoke) == jl_fptr_const_return_addr)
decls.functionObject = "jl_fptr_const_return";
else
decls = jl_emit_codeinst(result_m, codeinst, src, params);
params.safepoint_on_entry = safepoint_on_entry;
record_method_roots(method_roots, jl_get_ci_mi(codeinst));
if (result_m)
compiled_functions[codeinst] = {std::move(result_m), std::move(decls)};
}
else {
assert(jl_is_simplevector(item));
jl_value_t *rt = jl_svecref(item, 0);
jl_value_t *sig = jl_svecref(item, 1);
jl_value_t *nameval = jl_svec_len(item) == 2 ? jl_nothing : jl_svecref(item, 2);
assert(jl_is_type(rt) && jl_is_type(sig));
jl_generate_ccallable(clone.getModuleUnlocked(), nameval, rt, sig, params);
}
}
// finally, make sure all referenced methods get fixed up, particularly if the user declined to compile them
resolve_workqueue(params, method_roots, compiled_functions);
// including generating cfunction thunks
generate_cfunc_thunks(params, compiled_functions);
aot_optimize_roots(params, method_roots, compiled_functions);
params.temporary_roots = nullptr;
params.temporary_roots_set.clear();
JL_GC_POP();
// process the globals array, before jl_merge_module destroys them
SmallVector<std::string, 0> gvars(params.global_targets.size());
data->jl_value_to_llvm.resize(params.global_targets.size());
StringSet<> gvars_names;
DenseSet<GlobalValue *> gvars_set;
size_t idx = 0;
for (auto &global : params.global_targets) {
gvars[idx] = global.second->getName().str();
assert(gvars_set.insert(global.second).second && "Duplicate gvar in params!");
assert(gvars_names.insert(gvars[idx]).second && "Duplicate gvar name in params!");
data->jl_value_to_llvm[idx] = global.first;
idx++;
}
CreateNativeMethods += compiled_functions.size();
size_t offset = gvars.size();
data->jl_external_to_llvm.resize(params.external_fns.size());
for (auto &extern_fn : params.external_fns) {
jl_code_instance_t *this_code = std::get<0>(extern_fn.first);
bool specsig = std::get<1>(extern_fn.first);
assert(specsig && "Error external_fns doesn't handle non-specsig yet");
(void) specsig;
GlobalVariable *F = extern_fn.second;
size_t idx = gvars.size() - offset;
assert(idx >= 0);
assert(idx < data->jl_external_to_llvm.size());
data->jl_external_to_llvm[idx] = this_code;
assert(gvars_set.insert(F).second && "Duplicate gvar in params!");
assert(gvars_names.insert(F->getName()).second && "Duplicate gvar name in params!");
gvars.push_back(std::string(F->getName()));
}
// clones the contents of the module `m` to the shadow_output collector
// while examining and recording what kind of function pointer we have
{
Linker L(*clone.getModuleUnlocked());
for (auto &def : compiled_functions) {
jl_code_instance_t *this_code = def.first;
JL_GC_PROMISE_ROOTED(this_code);
jl_llvm_functions_t &decls = def.second.decls;
StringRef func = decls.functionObject;
StringRef cfunc = decls.specFunctionObject;
orc::ThreadSafeModule &M = def.second.TSM;
if (external_linkage) {
uint8_t specsigflags;
jl_callptr_t invoke;
void *fptr;
jl_read_codeinst_invoke(this_code, &specsigflags, &invoke, &fptr, 0);
if (invoke != NULL && (specsigflags & JL_CI_FLAGS_FROM_IMAGE)) {
// this codeinst is already available externally: keep it only if canPartition demands it for local use
// TODO: for performance, avoid generating the src code when we know it would reach here anyways?
if (M.withModuleDo([&](Module &M) { return !canPartition(*cast<Function>(M.getNamedValue(cfunc))); })) {
jl_merge_module(L, std::move(M));
}
continue;
}
}
jl_merge_module(L, std::move(M));
uint32_t func_id = 0;
uint32_t cfunc_id = 0;
if (func == "jl_fptr_args") {
func_id = -1;
}
else if (func == "jl_fptr_sparam") {
func_id = -2;
}
else if (func == "jl_f_opaque_closure_call") {
func_id = -4;
}
else if (func == "jl_fptr_const_return") {
func_id = -5;
}
else {
//Safe b/c context is locked by params
data->jl_sysimg_fvars.push_back(cast<Function>(clone.getModuleUnlocked()->getNamedValue(func)));
func_id = data->jl_sysimg_fvars.size();
}
if (!cfunc.empty()) {
//Safe b/c context is locked by params
data->jl_sysimg_fvars.push_back(cast<Function>(clone.getModuleUnlocked()->getNamedValue(cfunc)));
cfunc_id = data->jl_sysimg_fvars.size();
}
data->jl_fvar_map[this_code] = std::make_tuple(func_id, cfunc_id);
}
bool Changed = true;
while (Changed) {
Changed = false;
// make sure everything referenced got included though, since some functions aren't
// correctly implemented by staticdata for external use, and so codegen won't emit
// an external reference but expects a private copy here instead
for (auto &def : compiled_functions) {
orc::ThreadSafeModule &M = def.second.TSM;
if (!M)
continue;
jl_llvm_functions_t &decls = def.second.decls;
StringRef func = decls.functionObject;
StringRef cfunc = decls.specFunctionObject;
if (func != "jl_fptr_args" &&
func != "jl_fptr_sparam" &&
func != "jl_f_opaque_closure_call" &&
clone.getModuleUnlocked()->getNamedValue(func)) {
jl_merge_module(L, std::move(M));
Changed = true;
continue;
}
if (!cfunc.empty() && clone.getModuleUnlocked()->getNamedValue(cfunc)) {
Changed = true;
jl_merge_module(L, std::move(M));
}
}
}
#ifndef NDEBUG
// make sure we didn't forget anything that we promised to include in here
for (auto &def : compiled_functions) {
jl_llvm_functions_t &decls = def.second.decls;
StringRef func = decls.functionObject;
StringRef cfunc = decls.specFunctionObject;
if (func != "jl_fptr_args" &&
func != "jl_fptr_sparam" &&
func != "jl_f_opaque_closure_call") {
GlobalValue *F = clone.getModuleUnlocked()->getNamedValue(func);
assert(!F || !F->isDeclaration());
}
if (!cfunc.empty()) {
GlobalValue *F = clone.getModuleUnlocked()->getNamedValue(cfunc);
assert(!F || !F->isDeclaration());
}
}
#endif
compiled_functions.clear();
if (params._shared_module) {
bool error = L.linkInModule(std::move(params._shared_module));
assert(!error && "Error linking in shared module");
(void)error;
}
}
// now get references to the globals in the merged module
// and set them to be internalized and initialized at startup
// filter out any gvars that got optimized away
idx = 0;
size_t newoffset = 0;
size_t newidx = 0;
for (auto &global : gvars) {
//Safe b/c context is locked by params
GlobalVariable *G = cast_or_null<GlobalVariable>(clone.getModuleUnlocked()->getNamedValue(global));
if (G != nullptr) {
assert(!G->hasInitializer());
G->setInitializer(Constant::getNullValue(G->getValueType()));
G->setLinkage(GlobalValue::InternalLinkage);
G->setDSOLocal(true);
assert(newidx == data->jl_sysimg_gvars.size());
if (idx < offset) {
data->jl_value_to_llvm[newidx] = data->jl_value_to_llvm[idx];
newoffset = newidx + 1;
}
else {
data->jl_external_to_llvm[newidx - newoffset] = data->jl_external_to_llvm[idx - offset];
}
data->jl_sysimg_gvars.push_back(G);
newidx++;
}
idx++;
}
data->jl_value_to_llvm.resize(newoffset);
data->jl_external_to_llvm.resize(newidx - newoffset);
gvars.clear();
CreateNativeGlobals += idx;
data->M = std::move(clone);
return (void*)data;
}
static object::Archive::Kind getDefaultForHost(Triple &triple)
{
if (triple.isOSDarwin())
return object::Archive::K_DARWIN;
return object::Archive::K_GNU;
}
typedef Error ArchiveWriterError;
static void reportWriterError(const ErrorInfoBase &E)
{
std::string err = E.message();
jl_safe_printf("ERROR: failed to emit output file %s\n", err.c_str());
}
static void injectCRTAlias(Module &M, StringRef name, StringRef alias, FunctionType *FT)
{
Function *target = M.getFunction(alias);
if (!target) {
target = Function::Create(FT, Function::ExternalLinkage, alias, M);
}
Function *interposer = Function::Create(FT, Function::InternalLinkage, name, M);
appendToCompilerUsed(M, {interposer});
llvm::IRBuilder<> builder(BasicBlock::Create(M.getContext(), "top", interposer));
SmallVector<Value *, 4> CallArgs;
for (auto &arg : interposer->args())
CallArgs.push_back(&arg);
auto val = builder.CreateCall(target, CallArgs);
builder.CreateRet(val);
}
void multiversioning_preannotate(Module &M);
// See src/processor.h for documentation about this table. Corresponds to jl_image_shard_t.
static GlobalVariable *emit_shard_table(Module &M, Type *T_size, Type *T_psize, unsigned threads) {
SmallVector<Constant *, 0> tables(sizeof(jl_image_shard_t) / sizeof(void *) * threads);
for (unsigned i = 0; i < threads; i++) {
auto suffix = "_" + std::to_string(i);
auto create_gv = [&](StringRef name, bool constant) {
auto gv = new GlobalVariable(M, T_size, constant,
GlobalValue::ExternalLinkage, nullptr, name + suffix);
gv->setVisibility(GlobalValue::HiddenVisibility);
gv->setDSOLocal(true);
return gv;
};
auto table = tables.data() + i * sizeof(jl_image_shard_t) / sizeof(void *);
table[offsetof(jl_image_shard_t, fvar_count) / sizeof(void*)] = create_gv("jl_fvar_count", true);
table[offsetof(jl_image_shard_t, fvar_ptrs) / sizeof(void*)] = create_gv("jl_fvar_ptrs", true);
table[offsetof(jl_image_shard_t, fvar_idxs) / sizeof(void*)] = create_gv("jl_fvar_idxs", true);
table[offsetof(jl_image_shard_t, gvar_offsets) / sizeof(void*)] = create_gv("jl_gvar_offsets", true);
table[offsetof(jl_image_shard_t, gvar_idxs) / sizeof(void*)] = create_gv("jl_gvar_idxs", true);
table[offsetof(jl_image_shard_t, clone_slots) / sizeof(void*)] = create_gv("jl_clone_slots", true);
table[offsetof(jl_image_shard_t, clone_ptrs) / sizeof(void*)] = create_gv("jl_clone_ptrs", true);
table[offsetof(jl_image_shard_t, clone_idxs) / sizeof(void*)] = create_gv("jl_clone_idxs", true);
}
auto tables_arr = ConstantArray::get(ArrayType::get(T_psize, tables.size()), tables);
auto tables_gv = new GlobalVariable(M, tables_arr->getType(), false,
GlobalValue::ExternalLinkage, tables_arr, "jl_shard_tables");
tables_gv->setVisibility(GlobalValue::HiddenVisibility);
tables_gv->setDSOLocal(true);
return tables_gv;
}
static Function *emit_pgcstack_default_func(Module &M, Type *T_ptr) {
auto FT = FunctionType::get(T_ptr, false);
auto F = Function::Create(FT, GlobalValue::InternalLinkage, "pgcstack_default_func", &M);
llvm::IRBuilder<> builder(BasicBlock::Create(M.getContext(), "top", F));
builder.CreateRet(Constant::getNullValue(T_ptr));
return F;
}
// See src/processor.h for documentation about this table. Corresponds to jl_image_ptls_t.
static GlobalVariable *emit_ptls_table(Module &M, Type *T_size, Type *T_ptr) {
std::array<Constant *, 3> ptls_table{
new GlobalVariable(M, T_ptr, false, GlobalValue::ExternalLinkage, emit_pgcstack_default_func(M, T_ptr), "jl_pgcstack_func_slot"),
new GlobalVariable(M, T_size, false, GlobalValue::ExternalLinkage, Constant::getNullValue(T_size), "jl_pgcstack_key_slot"),
new GlobalVariable(M, T_size, false, GlobalValue::ExternalLinkage, Constant::getNullValue(T_size), "jl_tls_offset"),
};
for (auto &gv : ptls_table) {
cast<GlobalVariable>(gv)->setVisibility(GlobalValue::HiddenVisibility);
cast<GlobalVariable>(gv)->setDSOLocal(true);
}
auto ptls_table_arr = ConstantArray::get(ArrayType::get(T_ptr, ptls_table.size()), ptls_table);
auto ptls_table_gv = new GlobalVariable(M, ptls_table_arr->getType(), false,
GlobalValue::ExternalLinkage, ptls_table_arr, "jl_ptls_table");
ptls_table_gv->setVisibility(GlobalValue::HiddenVisibility);
ptls_table_gv->setDSOLocal(true);
return ptls_table_gv;
}
// See src/processor.h for documentation about this table. Corresponds to jl_image_header_t.
static GlobalVariable *emit_image_header(Module &M, unsigned threads, unsigned nfvars, unsigned ngvars) {
constexpr uint32_t version = 1;
std::array<uint32_t, 4> header{
version,
threads,
nfvars,
ngvars,
};
auto header_arr = ConstantDataArray::get(M.getContext(), header);
auto header_gv = new GlobalVariable(M, header_arr->getType(), false,
GlobalValue::InternalLinkage, header_arr, "jl_image_header");
return header_gv;
}
// Grab fvars and gvars data from the module
static void get_fvars_gvars(Module &M, DenseMap<GlobalValue *, unsigned> &fvars, DenseMap<GlobalValue *, unsigned> &gvars) {
auto fvars_gv = M.getGlobalVariable("jl_fvars");
auto gvars_gv = M.getGlobalVariable("jl_gvars");
auto fvars_idxs = M.getGlobalVariable("jl_fvar_idxs");
auto gvars_idxs = M.getGlobalVariable("jl_gvar_idxs");
assert(fvars_gv);
assert(gvars_gv);
assert(fvars_idxs);
assert(gvars_idxs);
auto fvars_init = cast<ConstantArray>(fvars_gv->getInitializer());
auto gvars_init = cast<ConstantArray>(gvars_gv->getInitializer());
for (unsigned i = 0; i < fvars_init->getNumOperands(); ++i) {
auto gv = cast<GlobalValue>(fvars_init->getOperand(i)->stripPointerCasts());
assert(gv && gv->hasName() && "fvar must be a named global");
assert(!fvars.count(gv) && "Duplicate fvar");
fvars[gv] = i;
}
assert(fvars.size() == fvars_init->getNumOperands());
for (unsigned i = 0; i < gvars_init->getNumOperands(); ++i) {
auto gv = cast<GlobalValue>(gvars_init->getOperand(i)->stripPointerCasts());
assert(gv && gv->hasName() && "gvar must be a named global");
assert(!gvars.count(gv) && "Duplicate gvar");
gvars[gv] = i;
}
assert(gvars.size() == gvars_init->getNumOperands());
fvars_gv->eraseFromParent();
gvars_gv->eraseFromParent();
fvars_idxs->eraseFromParent();
gvars_idxs->eraseFromParent();
}
// Weight computation
// It is important for multithreaded image building to be able to split work up
// among the threads equally. The weight calculated here is an estimation of
// how expensive a particular function is going to be to compile.
struct FunctionInfo {
size_t weight;
size_t bbs;
size_t insts;
size_t clones;
};
static FunctionInfo getFunctionWeight(const Function &F)
{
FunctionInfo info;
info.weight = 1;
info.bbs = F.size();
info.insts = 0;
info.clones = 1;
for (const BasicBlock &BB : F) {
info.insts += BB.size();
}
if (F.hasFnAttribute("julia.mv.clones")) {
auto val = F.getFnAttribute("julia.mv.clones").getValueAsString();
// base16, so must be at most 4 * length bits long
// popcount gives number of clones
info.clones = APInt(val.size() * 4, val, 16).popcount() + 1;
}
info.weight += info.insts;
// more basic blocks = more complex than just sum of insts,
// add some weight to it
info.weight += info.bbs;
info.weight *= info.clones;
return info;
}
struct ModuleInfo {
size_t globals;
size_t funcs;
size_t bbs;
size_t insts;
size_t clones;
size_t weight;
};
ModuleInfo compute_module_info(Module &M) {
ModuleInfo info;
info.globals = 0;
info.funcs = 0;
info.bbs = 0;
info.insts = 0;
info.clones = 0;
info.weight = 0;
for (auto &G : M.global_values()) {
if (G.isDeclaration()) {
continue;
}
info.globals++;
if (auto F = dyn_cast<Function>(&G)) {
info.funcs++;
auto func_info = getFunctionWeight(*F);
info.bbs += func_info.bbs;
info.insts += func_info.insts;
info.clones += func_info.clones;
info.weight += func_info.weight;
} else {
info.weight += 1;
}
}
return info;
}
struct Partition {
StringMap<bool> globals;
StringMap<unsigned> fvars;
StringMap<unsigned> gvars;
size_t weight;
};
static inline bool verify_partitioning(const SmallVectorImpl<Partition> &partitions, const Module &M, DenseMap<GlobalValue *, unsigned> &fvars, DenseMap<GlobalValue *, unsigned> &gvars) {
bool bad = false;
#ifndef JL_NDEBUG
size_t fvars_size = fvars.size();
size_t gvars_size = gvars.size();
SmallVector<uint32_t, 0> fvars_partition(fvars_size);
SmallVector<uint32_t, 0> gvars_partition(gvars_size);
StringMap<uint32_t> GVNames;
for (uint32_t i = 0; i < partitions.size(); i++) {
for (auto &name : partitions[i].globals) {
if (GVNames.count(name.getKey())) {
bad = true;
dbgs() << "Duplicate global name " << name.getKey() << " in partitions " << i << " and " << GVNames[name.getKey()] << "\n";
}
GVNames[name.getKey()] = i;
}
for (auto &fvar : partitions[i].fvars) {
if (fvars_partition[fvar.second] != 0) {
bad = true;
dbgs() << "Duplicate fvar " << fvar.first() << " in partitions " << i << " and " << fvars_partition[fvar.second] - 1 << "\n";
}
fvars_partition[fvar.second] = i+1;
}
for (auto &gvar : partitions[i].gvars) {
if (gvars_partition[gvar.second] != 0) {
bad = true;
dbgs() << "Duplicate gvar " << gvar.first() << " in partitions " << i << " and " << gvars_partition[gvar.second] - 1 << "\n";
}
gvars_partition[gvar.second] = i+1;
}
}
for (auto &GV : M.global_values()) {
if (GV.isDeclaration()) {
if (GVNames.count(GV.getName())) {
bad = true;
dbgs() << "Global " << GV.getName() << " is a declaration but is in partition " << GVNames[GV.getName()] << "\n";
}
} else {
// Local global values are not partitioned
if (!GVNames.count(GV.getName())) {
bad = true;
dbgs() << "Global " << GV << " not in any partition\n";
}
for (ConstantUses<GlobalValue> uses(const_cast<GlobalValue*>(&GV), const_cast<Module&>(M)); !uses.done(); uses.next()) {
auto val = uses.get_info().val;
if (!GVNames.count(val->getName())) {
bad = true;
dbgs() << "Global " << val->getName() << " used by " << GV.getName() << ", which is not in any partition\n";
continue;
}
if (GVNames[val->getName()] != GVNames[GV.getName()]) {
bad = true;
dbgs() << "Global " << val->getName() << " used by " << GV.getName() << ", which is in partition " << GVNames[GV.getName()] << " but " << val->getName() << " is in partition " << GVNames[val->getName()] << "\n";
}
}
}
}
for (uint32_t i = 0; i < fvars_size; i++) {
if (fvars_partition[i] == 0) {
auto gv = find_if(fvars.begin(), fvars.end(), [i](auto var) { return var.second == i; });
bad = true;
dbgs() << "fvar " << gv->first->getName() << " at " << i << " not in any partition\n";
}
}
for (uint32_t i = 0; i < gvars_size; i++) {
if (gvars_partition[i] == 0) {
bad = true;
dbgs() << "gvar " << i << " not in any partition\n";
}
}
#endif
return !bad;
}
// Chop a module up as equally as possible by weight into threads partitions
static SmallVector<Partition, 32> partitionModule(Module &M, unsigned threads) {
//Start by stripping fvars and gvars, which helpfully removes their uses as well
DenseMap<GlobalValue *, unsigned> fvars, gvars;
get_fvars_gvars(M, fvars, gvars);
// Partition by union-find, since we only have def->use traversal right now
struct Partitioner {
struct Node {
GlobalValue *GV;
unsigned parent;
unsigned size;
size_t weight;
};
SmallVector<Node, 0> nodes;
DenseMap<GlobalValue *, unsigned> node_map;
unsigned merged;
unsigned make(GlobalValue *GV, size_t weight) {
unsigned idx = nodes.size();
nodes.push_back({GV, idx, 1, weight});
node_map[GV] = idx;
return idx;
}
unsigned find(unsigned idx) {
while (nodes[idx].parent != idx) {
nodes[idx].parent = nodes[nodes[idx].parent].parent;
idx = nodes[idx].parent;
}
return idx;
}
unsigned merge(unsigned x, unsigned y) {
x = find(x);
y = find(y);
if (x == y)
return x;
if (nodes[x].size < nodes[y].size)
std::swap(x, y);
nodes[y].parent = x;
nodes[x].size += nodes[y].size;
nodes[x].weight += nodes[y].weight;
merged++;
return x;
}
};
Partitioner partitioner;
for (auto &G : M.global_values()) {
if (G.isDeclaration())
continue;
// Currently ccallable global aliases have extern linkage, we only want to make the
// internally linked functions/global variables extern+hidden
if (G.hasLocalLinkage()) {
G.setLinkage(GlobalValue::ExternalLinkage);
G.setVisibility(GlobalValue::HiddenVisibility);
}
if (auto F = dyn_cast<Function>(&G)) {
partitioner.make(&G, getFunctionWeight(*F).weight);
}
else {
partitioner.make(&G, 1);
}
}
// Merge all uses to go together into the same partition
for (unsigned i = 0; i < partitioner.nodes.size(); ++i) {
for (ConstantUses<GlobalValue> uses(partitioner.nodes[i].GV, M); !uses.done(); uses.next()) {
auto val = uses.get_info().val;
auto idx = partitioner.node_map.find(val);
// This can fail if we can't partition a global, but it uses something we can partition
// This should be fixed by altering canPartition to not permit partitioning this global
assert(idx != partitioner.node_map.end());
partitioner.merge(i, idx->second);
}
}
SmallVector<Partition, 32> partitions(threads);
// always get the smallest partition first
auto pcomp = [](const Partition *p1, const Partition *p2) {
return p1->weight > p2->weight;
};
std::priority_queue<Partition *, SmallVector<Partition *, 0>, decltype(pcomp)> pq(pcomp);
for (unsigned i = 0; i < threads; ++i) {
pq.push(&partitions[i]);
}
SmallVector<unsigned, 0> idxs(partitioner.nodes.size());
std::iota(idxs.begin(), idxs.end(), 0);
std::sort(idxs.begin(), idxs.end(), [&](unsigned a, unsigned b) {
//because roots have more weight than their children,
//we can sort by weight and get the roots first
return partitioner.nodes[a].weight > partitioner.nodes[b].weight;
});
// Assign the root of each partition to a partition, then assign its children to the same one
for (unsigned idx = 0; idx < idxs.size(); ++idx) {
auto i = idxs[idx];
auto root = partitioner.find(i);
assert(root == i || partitioner.nodes[root].weight == 0);
if (partitioner.nodes[root].weight) {
auto &node = partitioner.nodes[root];
auto &P = *pq.top();
pq.pop();
auto name = node.GV->getName();
P.globals.insert({name, true});
if (fvars.count(node.GV))
P.fvars[name] = fvars[node.GV];
if (gvars.count(node.GV))
P.gvars[name] = gvars[node.GV];
P.weight += node.weight;
node.weight = 0;
node.size = &P - partitions.data();
pq.push(&P);
}
if (root != i) {
auto &node = partitioner.nodes[i];
assert(node.weight != 0);
// we assigned its root already, so just add it to the root's partition
// don't touch the priority queue, since we're not changing the weight
auto &P = partitions[partitioner.nodes[root].size];
auto name = node.GV->getName();
P.globals.insert({name, true});
if (fvars.count(node.GV))
P.fvars[name] = fvars[node.GV];
if (gvars.count(node.GV))
P.gvars[name] = gvars[node.GV];
node.weight = 0;
node.size = partitioner.nodes[root].size;
}
}
bool verified = verify_partitioning(partitions, M, fvars, gvars);
if (!verified)
llvm_dump(&M);
assert(verified && "Partitioning failed to partition globals correctly");
(void) verified;
return partitions;
}
struct ImageTimer {
uint64_t elapsed = 0;
std::string name;
std::string desc;
void startTimer() {
elapsed = jl_hrtime();
}
void stopTimer() {
elapsed = jl_hrtime() - elapsed;
}
void init(const Twine &name, const Twine &desc) {
this->name = name.str();
this->desc = desc.str();
}
operator bool() const {
return elapsed != 0;
}
void print(raw_ostream &out, bool clear=false) {
if (!*this)
return;
out << llvm::formatv("{0:F3} ", elapsed / 1e9) << name << " " << desc << "\n";
if (clear)
elapsed = 0;
}
};
struct ShardTimers {
ImageTimer deserialize;
ImageTimer materialize;
ImageTimer construct;
// impl timers
ImageTimer unopt;
ImageTimer optimize;
ImageTimer opt;
ImageTimer obj;
ImageTimer asm_;
std::string name;
std::string desc;
void print(raw_ostream &out, bool clear=false) {
StringRef sep = "===-------------------------------------------------------------------------===";
out << formatv("{0}\n{1}\n{0}\n", sep, fmt_align(name + " : " + desc, AlignStyle::Center, sep.size()));
auto total = deserialize.elapsed + materialize.elapsed + construct.elapsed +
unopt.elapsed + optimize.elapsed + opt.elapsed + obj.elapsed + asm_.elapsed;
out << "Time (s) Name Description\n";
deserialize.print(out, clear);
materialize.print(out, clear);
construct.print(out, clear);
unopt.print(out, clear);
optimize.print(out, clear);
opt.print(out, clear);
obj.print(out, clear);
asm_.print(out, clear);
out << llvm::formatv("{0:F3} total Total time taken\n", total / 1e9);
}
};
struct AOTOutputs {
SmallVector<char, 0> unopt, opt, obj, asm_;
};
// Perform the actual optimization and emission of the output files
static AOTOutputs add_output_impl(Module &M, TargetMachine &SourceTM, ShardTimers &timers,
bool unopt, bool opt, bool obj, bool asm_) {
assert((unopt || opt || obj || asm_) && "no output requested");
AOTOutputs out;
auto TM = std::unique_ptr<TargetMachine>(
SourceTM.getTarget().createTargetMachine(
#if JL_LLVM_VERSION < 210000
SourceTM.getTargetTriple().str(),
#else
SourceTM.getTargetTriple(),
#endif
SourceTM.getTargetCPU(),
SourceTM.getTargetFeatureString(),
SourceTM.Options,
SourceTM.getRelocationModel(),
SourceTM.getCodeModel(),
SourceTM.getOptLevel()));
fixupTM(*TM);
if (unopt) {
timers.unopt.startTimer();
raw_svector_ostream OS(out.unopt);
PassBuilder PB;
AnalysisManagers AM{*TM, PB, OptimizationLevel::O0};
ModulePassManager MPM;
MPM.addPass(BitcodeWriterPass(OS));
MPM.run(M, AM.MAM);
timers.unopt.stopTimer();
}
if (!opt && !obj && !asm_) {
return out;
}
assert(!verifyLLVMIR(M));
{
timers.optimize.startTimer();
auto PMTM = std::unique_ptr<TargetMachine>(
SourceTM.getTarget().createTargetMachine(
#if JL_LLVM_VERSION < 210000
SourceTM.getTargetTriple().str(),
#else
SourceTM.getTargetTriple(),
#endif
SourceTM.getTargetCPU(),
SourceTM.getTargetFeatureString(),
SourceTM.Options,
SourceTM.getRelocationModel(),
SourceTM.getCodeModel(),
SourceTM.getOptLevel()));
fixupTM(*PMTM);
auto options = OptimizationOptions::defaults(true, true);
options.sanitize_memory = jl_options.target_sanitize_memory;
options.sanitize_thread = jl_options.target_sanitize_thread;
options.sanitize_address = jl_options.target_sanitize_address;
NewPM optimizer{std::move(PMTM), getOptLevel(jl_options.opt_level), options};
optimizer.run(M);
assert(!verifyLLVMIR(M));
bool inject_aliases = false;
for (auto &F : M.functions()) {
if (!F.isDeclaration() && F.getName() != "_DllMainCRTStartup") {
inject_aliases = true;
break;
}
}
// no need to inject aliases if we have no functions
if (inject_aliases) {
// We would like to emit an alias or an weakref alias to redirect these symbols
// but LLVM doesn't let us emit a GlobalAlias to a declaration...
// So for now we inject a definition of these functions that calls our runtime
// functions. We do so after optimization to avoid cloning these functions.
// Float16 conversion routines
#if defined(_CPU_X86_64_) && defined(_OS_DARWIN_)
// LLVM 16 reverted to soft-float ABI for passing half on x86_64 Darwin
// https://github.com/llvm/llvm-project/commit/2bcf51c7f82ca7752d1bba390a2e0cb5fdd05ca9
injectCRTAlias(M, "__gnu_h2f_ieee", "julia_half_to_float",
FunctionType::get(Type::getFloatTy(M.getContext()), { Type::getInt16Ty(M.getContext()) }, false));
injectCRTAlias(M, "__extendhfsf2", "julia_half_to_float",
FunctionType::get(Type::getFloatTy(M.getContext()), { Type::getInt16Ty(M.getContext()) }, false));
injectCRTAlias(M, "__gnu_f2h_ieee", "julia_float_to_half",
FunctionType::get(Type::getInt16Ty(M.getContext()), { Type::getFloatTy(M.getContext()) }, false));
injectCRTAlias(M, "__truncsfhf2", "julia_float_to_half",
FunctionType::get(Type::getInt16Ty(M.getContext()), { Type::getFloatTy(M.getContext()) }, false));
injectCRTAlias(M, "__truncdfhf2", "julia_double_to_half",
FunctionType::get(Type::getInt16Ty(M.getContext()), { Type::getDoubleTy(M.getContext()) }, false));
#else
injectCRTAlias(M, "__gnu_h2f_ieee", "julia__gnu_h2f_ieee",
FunctionType::get(Type::getFloatTy(M.getContext()), { Type::getHalfTy(M.getContext()) }, false));
injectCRTAlias(M, "__extendhfsf2", "julia__gnu_h2f_ieee",
FunctionType::get(Type::getFloatTy(M.getContext()), { Type::getHalfTy(M.getContext()) }, false));
injectCRTAlias(M, "__gnu_f2h_ieee", "julia__gnu_f2h_ieee",
FunctionType::get(Type::getHalfTy(M.getContext()), { Type::getFloatTy(M.getContext()) }, false));
injectCRTAlias(M, "__truncsfhf2", "julia__gnu_f2h_ieee",
FunctionType::get(Type::getHalfTy(M.getContext()), { Type::getFloatTy(M.getContext()) }, false));
injectCRTAlias(M, "__truncdfhf2", "julia__truncdfhf2",
FunctionType::get(Type::getHalfTy(M.getContext()), { Type::getDoubleTy(M.getContext()) }, false));
#endif
// BFloat16 conversion routines
injectCRTAlias(M, "__truncsfbf2", "julia__truncsfbf2",
FunctionType::get(Type::getBFloatTy(M.getContext()), { Type::getFloatTy(M.getContext()) }, false));
injectCRTAlias(M, "__truncsdbf2", "julia__truncdfbf2",
FunctionType::get(Type::getBFloatTy(M.getContext()), { Type::getDoubleTy(M.getContext()) }, false));
}
timers.optimize.stopTimer();
}
if (opt) {
timers.opt.startTimer();
raw_svector_ostream OS(out.opt);
PassBuilder PB;
AnalysisManagers AM{*TM, PB, OptimizationLevel::O0};
ModulePassManager MPM;
MPM.addPass(BitcodeWriterPass(OS));
MPM.run(M, AM.MAM);
timers.opt.stopTimer();
}
if (obj) {
timers.obj.startTimer();
raw_svector_ostream OS(out.obj);
legacy::PassManager emitter;
addTargetPasses(&emitter, TM->getTargetTriple(), TM->getTargetIRAnalysis());
#if JL_LLVM_VERSION >= 180000
if (TM->addPassesToEmitFile(emitter, OS, nullptr, CodeGenFileType::ObjectFile, false))
#else
if (TM->addPassesToEmitFile(emitter, OS, nullptr, CGFT_ObjectFile, false))
#endif
jl_safe_printf("ERROR: target does not support generation of object files\n");
emitter.run(M);
timers.obj.stopTimer();
}
if (asm_) {
timers.asm_.startTimer();
raw_svector_ostream OS(out.asm_);
legacy::PassManager emitter;
addTargetPasses(&emitter, TM->getTargetTriple(), TM->getTargetIRAnalysis());
#if JL_LLVM_VERSION >= 180000
if (TM->addPassesToEmitFile(emitter, OS, nullptr, CodeGenFileType::AssemblyFile, false))
#else
if (TM->addPassesToEmitFile(emitter, OS, nullptr, CGFT_AssemblyFile, false))
#endif
jl_safe_printf("ERROR: target does not support generation of assembly files\n");
emitter.run(M);
timers.asm_.stopTimer();
}
return out;
}
// serialize module to bitcode
static auto serializeModule(const Module &M) {
assert(!verifyLLVMIR(M) && "Serializing invalid module!");
SmallVector<char, 0> ClonedModuleBuffer;
BitcodeWriter BCWriter(ClonedModuleBuffer);
BCWriter.writeModule(M);
BCWriter.writeSymtab();
BCWriter.writeStrtab();
return ClonedModuleBuffer;
}
// Modules are deserialized lazily by LLVM, to avoid deserializing
// unnecessary functions. We take advantage of this by serializing
// the entire module once, then deleting the bodies of functions
// that are not in this partition. Once unnecessary functions are
// deleted, we then materialize the entire module to make use-lists
// consistent.
static void materializePreserved(Module &M, Partition &partition) {
DenseSet<GlobalValue *> Preserve;
for (auto &Name : partition.globals) {
auto *GV = M.getNamedValue(Name.first());
assert(GV && !GV->isDeclaration() && !GV->hasLocalLinkage());
if (!Name.second) {
// We skip partitioning for internal variables, so this has
// the same effect as putting it in preserve.
// This just avoids a hashtable lookup.
GV->setLinkage(GlobalValue::InternalLinkage);
assert(GV->hasDefaultVisibility());
}
else {
Preserve.insert(GV);
}
}
for (auto &F : M.functions()) {
if (F.isDeclaration())
continue;
if (F.hasLocalLinkage())
continue;
if (Preserve.contains(&F))
continue;
if (!canPartition(F)) {
F.setLinkage(GlobalValue::AvailableExternallyLinkage);
F.setVisibility(GlobalValue::HiddenVisibility);
F.setDSOLocal(true);
continue;
}
F.deleteBody();
F.setLinkage(GlobalValue::ExternalLinkage);
F.setVisibility(GlobalValue::HiddenVisibility);
F.setDSOLocal(true);
}
for (auto &GV : M.globals()) {
if (GV.isDeclaration())
continue;
if (Preserve.contains(&GV))
continue;
if (GV.hasLocalLinkage())
continue;
GV.setInitializer(nullptr);
GV.setLinkage(GlobalValue::ExternalLinkage);
GV.setVisibility(GlobalValue::HiddenVisibility);
if (GV.getDLLStorageClass() != GlobalValue::DLLStorageClassTypes::DefaultStorageClass)
continue; // Don't mess with exported or imported globals
GV.setDSOLocal(true);
}
// Global aliases are a pain to deal with. It is illegal to have an alias to a declaration,
// so we need to replace them with either a function or a global variable declaration. However,
// we can't just delete the alias, because that would break the users of the alias. Therefore,
// we do a dance where we point each global alias to a dummy function or global variable,
// then materialize the module to access use-lists, then replace all the uses, and finally commit
// to deleting the old alias.
SmallVector<std::pair<GlobalAlias *, GlobalValue *>> DeletedAliases;
for (auto &GA : M.aliases()) {
assert(!GA.isDeclaration() && "Global aliases can't be declarations!"); // because LLVM says so
if (Preserve.contains(&GA))
continue;
if (GA.hasLocalLinkage())
continue;
if (GA.getValueType()->isFunctionTy()) {
auto F = Function::Create(cast<FunctionType>(GA.getValueType()), GlobalValue::ExternalLinkage, "", &M);
// This is an extremely sad hack to make sure the global alias never points to an extern function
auto BB = BasicBlock::Create(M.getContext(), "", F);
new UnreachableInst(M.getContext(), BB);
GA.setAliasee(F);
DeletedAliases.push_back({ &GA, F });
}
else {
auto GV = new GlobalVariable(M, GA.getValueType(), false, GlobalValue::ExternalLinkage, Constant::getNullValue(GA.getValueType()));
DeletedAliases.push_back({ &GA, GV });
}
}
cantFail(M.materializeAll());
for (auto &Deleted : DeletedAliases) {
Deleted.second->takeName(Deleted.first);
Deleted.first->replaceAllUsesWith(Deleted.second);
Deleted.first->eraseFromParent();
// undo our previous sad hack
if (auto F = dyn_cast<Function>(Deleted.second)) {
F->deleteBody();
} else {
cast<GlobalVariable>(Deleted.second)->setInitializer(nullptr);
}
}
}
// Reconstruct jl_fvars, jl_gvars, jl_fvars_idxs, and jl_gvars_idxs from the partition
static void construct_vars(Module &M, Partition &partition, StringRef suffix) {
SmallVector<std::pair<uint32_t, GlobalValue *>> fvar_pairs;
fvar_pairs.reserve(partition.fvars.size());
for (auto &fvar : partition.fvars) {
auto F = M.getFunction(fvar.first());
assert(F);
assert(!F->isDeclaration());
fvar_pairs.push_back({ fvar.second, F });
}
SmallVector<GlobalValue *, 0> fvars;
SmallVector<uint32_t, 0> fvar_idxs;
fvars.reserve(fvar_pairs.size());
fvar_idxs.reserve(fvar_pairs.size());
std::sort(fvar_pairs.begin(), fvar_pairs.end());
for (auto &fvar : fvar_pairs) {
fvars.push_back(fvar.second);
fvar_idxs.push_back(fvar.first);
}
SmallVector<std::pair<uint32_t, GlobalValue *>, 0> gvar_pairs;
gvar_pairs.reserve(partition.gvars.size());
for (auto &gvar : partition.gvars) {
auto GV = M.getNamedGlobal(gvar.first());
assert(GV);
assert(!GV->isDeclaration());
gvar_pairs.push_back({ gvar.second, GV });
}
SmallVector<Constant*, 0> gvars;
SmallVector<uint32_t, 0> gvar_idxs;
gvars.reserve(gvar_pairs.size());
gvar_idxs.reserve(gvar_pairs.size());
std::sort(gvar_pairs.begin(), gvar_pairs.end());
for (auto &gvar : gvar_pairs) {
gvars.push_back(gvar.second);
gvar_idxs.push_back(gvar.first);
}
// Now commit the fvars, gvars, and idxs
auto T_size = M.getDataLayout().getIntPtrType(M.getContext());
emit_table(M, fvars, "jl_fvars", PointerType::getUnqual(T_size->getContext()));
emit_offset_table(M, T_size, gvars, "jl_gvar", suffix);
auto fidxs = ConstantDataArray::get(M.getContext(), fvar_idxs);
auto fidxs_var = new GlobalVariable(M, fidxs->getType(), true,
GlobalVariable::ExternalLinkage,
fidxs, "jl_fvar_idxs");
fidxs_var->setVisibility(GlobalValue::HiddenVisibility);
fidxs_var->setDSOLocal(true);
auto gidxs = ConstantDataArray::get(M.getContext(), gvar_idxs);
auto gidxs_var = new GlobalVariable(M, gidxs->getType(), true,
GlobalVariable::ExternalLinkage,
gidxs, "jl_gvar_idxs" + suffix);
gidxs_var->setVisibility(GlobalValue::HiddenVisibility);
gidxs_var->setDSOLocal(true);
}
template<typename CB>
static inline void schedule_uv_thread(uv_thread_t *worker, CB &&cb)
{
auto func = new CB(std::move(cb));
// Use libuv thread to avoid issues with stack sizes
uv_thread_create(worker, [] (void *arg) {
auto func = static_cast<CB*>(arg);
(*func)();
delete func;
}, func);
}
// Entrypoint to optionally-multithreaded image compilation. This handles global coordination of the threading,
// as well as partitioning, serialization, and deserialization.
template<typename ModuleReleasedFunc>
static SmallVector<AOTOutputs, 16> add_output(Module &M, TargetMachine &TM, StringRef name, unsigned threads,
bool unopt_out, bool opt_out, bool obj_out, bool asm_out, ModuleReleasedFunc module_released) {
SmallVector<AOTOutputs, 16> outputs(threads);
assert(threads);
assert(unopt_out || opt_out || obj_out || asm_out);
// Timers for timing purposes
TimerGroup timer_group("add_output", ("Time to optimize and emit LLVM module " + name).str());
SmallVector<ShardTimers, 1> timers(threads);
for (unsigned i = 0; i < threads; ++i) {
auto idx = std::to_string(i);
timers[i].name = "shard_" + idx;
timers[i].desc = ("Timings for " + name + " module shard " + idx).str();
timers[i].deserialize.init("deserialize_" + idx, "Deserialize module");
timers[i].materialize.init("materialize_" + idx, "Materialize declarations");
timers[i].construct.init("construct_" + idx, "Construct partitioned definitions");
timers[i].unopt.init("unopt_" + idx, "Emit unoptimized bitcode");
timers[i].optimize.init("optimize_" + idx, "Optimize shard");
timers[i].opt.init("opt_" + idx, "Emit optimized bitcode");
timers[i].obj.init("obj_" + idx, "Emit object file");
timers[i].asm_.init("asm_" + idx, "Emit assembly file");
}
Timer partition_timer("partition", "Partition module", timer_group);
Timer serialize_timer("serialize", "Serialize module", timer_group);
Timer output_timer("output", "Add outputs", timer_group);
bool report_timings = false;
if (auto env = getenv("JULIA_IMAGE_TIMINGS")) {
char *endptr;
unsigned long val = strtoul(env, &endptr, 10);
if (endptr != env && !*endptr && val <= 1) {
report_timings = val;
} else {
if (StringRef("true").compare_insensitive(env) == 0)
report_timings = true;
else if (StringRef("false").compare_insensitive(env) == 0)
report_timings = false;
else
errs() << "WARNING: Invalid value for JULIA_IMAGE_TIMINGS: " << env << "\n";
}
}
// Single-threaded case
if (threads == 1) {
output_timer.startTimer();
{
JL_TIMING(NATIVE_AOT, NATIVE_Opt);
// convert gvars to the expected offset table format for shard 0
if (M.getGlobalVariable("jl_gvars")) {
auto gvars = consume_gv<Constant>(M, "jl_gvars", false);
Type *T_size = M.getDataLayout().getIntPtrType(M.getContext());
emit_offset_table(M, T_size, gvars, "jl_gvar", "_0"); // module flag "julia.mv.suffix"
M.getGlobalVariable("jl_gvar_idxs")->setName("jl_gvar_idxs_0");
}
outputs[0] = add_output_impl(M, TM, timers[0], unopt_out, opt_out, obj_out, asm_out);
}
output_timer.stopTimer();
// Don't need M anymore
module_released(M);
if (!report_timings) {
timer_group.clear();
} else {
timer_group.print(dbgs(), true);
for (auto &t : timers) {
t.print(dbgs(), true);
}
}
return outputs;
}
partition_timer.startTimer();
uint64_t counter = 0;
// Partitioning requires all globals to have names.
// We use a prefix to avoid name conflicts with user code.
for (auto &G : M.global_values()) {
if (!G.isDeclaration() && !G.hasName()) {
G.setName("jl_ext_" + Twine(counter++));
}
}
auto partitions = partitionModule(M, threads);
partition_timer.stopTimer();
serialize_timer.startTimer();
auto serialized = serializeModule(M);
serialize_timer.stopTimer();
// Don't need M anymore, since we'll only read from serialized from now on
module_released(M);
output_timer.startTimer();
// Start all of the worker threads
{
JL_TIMING(NATIVE_AOT, NATIVE_Opt);
std::vector<uv_thread_t> workers(threads);
for (unsigned i = 0; i < threads; i++) {
schedule_uv_thread(&workers[i], [&, i]() {
LLVMContext ctx;
ctx.setDiscardValueNames(true);
// Lazily deserialize the entire module
timers[i].deserialize.startTimer();
auto EM = getLazyBitcodeModule(MemoryBufferRef(StringRef(serialized.data(), serialized.size()), "Optimized"), ctx);
// Make sure this also fails with only julia, but not LLVM assertions enabled,
// otherwise, the first error we hit is the LLVM module verification failure,
// which will look very confusing, because the module was partially deserialized.
bool deser_succeeded = (bool)EM;
auto M = cantFail(std::move(EM), "Error loading module");
assert(deser_succeeded); (void)deser_succeeded;
timers[i].deserialize.stopTimer();
timers[i].materialize.startTimer();
materializePreserved(*M, partitions[i]);
timers[i].materialize.stopTimer();
timers[i].construct.startTimer();
std::string suffix = "_" + std::to_string(i);
construct_vars(*M, partitions[i], suffix);
M->setModuleFlag(Module::Error, "julia.mv.suffix", MDString::get(M->getContext(), suffix));
// The DICompileUnit file is not used for anything, but ld64 requires it be a unique string per object file
// or it may skip emitting debug info for that file. Here set it to ./julia#N
DIFile *topfile = DIFile::get(M->getContext(), "julia#" + std::to_string(i), ".");
if (M->getNamedMetadata("llvm.dbg.cu"))
for (auto CU: M->getNamedMetadata("llvm.dbg.cu")->operands())
CU->replaceOperandWith(0, topfile);
timers[i].construct.stopTimer();
outputs[i] = add_output_impl(*M, TM, timers[i], unopt_out, opt_out, obj_out, asm_out);
});
}
// Wait for all of the worker threads to finish
for (unsigned i = 0; i < threads; i++)
uv_thread_join(&workers[i]);
}
output_timer.stopTimer();
if (!report_timings) {
timer_group.clear();
} else {
timer_group.print(dbgs(), true);
for (auto &t : timers) {
t.print(dbgs(), true);
}
dbgs() << "Partition weights: [";
bool comma = false;
for (auto &p : partitions) {
if (comma)
dbgs() << ", ";
else
comma = true;
dbgs() << p.weight;
}
dbgs() << "]\n";
}
return outputs;
}
extern int jl_is_timing_passes;
static unsigned compute_image_thread_count(const ModuleInfo &info) {
// 32-bit systems are very memory-constrained
#ifdef _P32
LLVM_DEBUG(dbgs() << "32-bit systems are restricted to a single thread\n");
return 1;
#endif
if (jl_is_timing_passes) // LLVM isn't thread safe when timing the passes https://github.com/llvm/llvm-project/issues/44417
return 1;
// This is not overridable because empty modules do occasionally appear, but they'll be very small and thus exit early to
// known easy behavior. Plus they really don't warrant multiple threads
if (info.weight < 1000) {
LLVM_DEBUG(dbgs() << "Small module, using a single thread\n");
return 1;
}
unsigned threads = std::max(jl_effective_threads() / 2, 1);
auto max_threads = info.globals / 100;
if (max_threads < threads) {
LLVM_DEBUG(dbgs() << "Low global count limiting threads to " << max_threads << " (" << info.globals << "globals)\n");
threads = max_threads;
}
// environment variable override
const char *env_threads = getenv("JULIA_IMAGE_THREADS");
bool env_threads_set = false;
if (env_threads) {
char *endptr;
unsigned long requested = strtoul(env_threads, &endptr, 10);
if (*endptr || !requested) {
jl_safe_printf("WARNING: invalid value '%s' for JULIA_IMAGE_THREADS\n", env_threads);
} else {
LLVM_DEBUG(dbgs() << "Overriding threads to " << requested << " due to JULIA_IMAGE_THREADS\n");
threads = requested;
env_threads_set = true;
}
}
// more defaults
if (!env_threads_set && threads > 1) {
if (auto fallbackenv = getenv("JULIA_CPU_THREADS")) {
char *endptr;
unsigned long requested = strtoul(fallbackenv, &endptr, 10);
if (*endptr || !requested) {
jl_safe_printf("WARNING: invalid value '%s' for JULIA_CPU_THREADS\n", fallbackenv);
} else if (requested < threads) {
LLVM_DEBUG(dbgs() << "Overriding threads to " << requested << " due to JULIA_CPU_THREADS\n");
threads = requested;
}
}
}
threads = std::max(threads, 1u);
return threads;
}
jl_emission_params_t default_emission_params = { 1 };
// takes the running content that has collected in the shadow module and dump it to disk
// this builds the object file portion of the sysimage files for fast startup
extern "C" JL_DLLEXPORT_CODEGEN
void jl_dump_native_impl(void *native_code,
const char *bc_fname, const char *unopt_bc_fname, const char *obj_fname,
const char *asm_fname,
ios_t *z, ios_t *s,
jl_emission_params_t *params)
{
JL_TIMING(NATIVE_AOT, NATIVE_Dump);
jl_native_code_desc_t *data = (jl_native_code_desc_t*)native_code;
if (!bc_fname && !unopt_bc_fname && !obj_fname && !asm_fname) {
LLVM_DEBUG(dbgs() << "No output requested, skipping native code dump?\n");
delete data;
return;
}
if (!params) {
params = &default_emission_params;
}
// We don't want to use MCJIT's target machine because
// it uses the large code model and we may potentially
// want less optimizations there.
// make sure to emit the native object format, even if FORCE_ELF was set in codegen
Triple TheTriple(data->M.withModuleDo([](Module &M) { return M.getTargetTriple(); }));
if (TheTriple.isOSWindows()) {
TheTriple.setObjectFormat(Triple::COFF);
} else if (TheTriple.isOSDarwin()) {
TheTriple.setObjectFormat(Triple::MachO);
SmallString<16> Str;
Str += "macosx";
if (TheTriple.isAArch64())
Str += "11.0.0"; // Update this if MACOSX_VERSION_MIN changes
else
Str += "10.14.0";
TheTriple.setOSName(Str);
}
std::optional<Reloc::Model> RelocModel;
if (TheTriple.isOSLinux() || TheTriple.isOSFreeBSD() || TheTriple.isOSOpenBSD()) {
RelocModel = Reloc::PIC_;
}
CodeModel::Model CMModel = CodeModel::Small;
if (TheTriple.isPPC() || TheTriple.isRISCV() ||
(TheTriple.isX86() && TheTriple.isArch64Bit() && TheTriple.isOSLinux())) {
// On PPC the small model is limited to 16bit offsets. For very large images the small code model
CMModel = CodeModel::Medium; // isn't good enough on x86 so use Medium, it has no cost because only the image goes in .ldata
}
std::unique_ptr<TargetMachine> SourceTM(
jl_ExecutionEngine->getTarget().createTargetMachine(
#if JL_LLVM_VERSION < 210000
TheTriple.getTriple(),
#else
TheTriple,
#endif
jl_ExecutionEngine->getTargetCPU(),
jl_ExecutionEngine->getTargetFeatureString(),
jl_ExecutionEngine->getTargetOptions(),
RelocModel,
CMModel,
#if JL_LLVM_VERSION >= 180000
CodeGenOptLevel::Aggressive // -O3 TODO: respect command -O0 flag?
#else
CodeGenOpt::Aggressive // -O3 TODO: respect command -O0 flag?
#endif
));
fixupTM(*SourceTM);
auto DL = jl_create_datalayout(*SourceTM);
std::string StackProtectorGuard;
unsigned OverrideStackAlignment;
data->M.withModuleDo([&](Module &M) {
StackProtectorGuard = M.getStackProtectorGuard().str();
OverrideStackAlignment = M.getOverrideStackAlignment();
});
auto compile = [&](Module &M, StringRef name, unsigned threads, auto module_released) {
return add_output(M, *SourceTM, name, threads, !!unopt_bc_fname, !!bc_fname, !!obj_fname, !!asm_fname, module_released);
};
SmallVector<AOTOutputs, 16> sysimg_outputs;
SmallVector<AOTOutputs, 16> data_outputs;
SmallVector<AOTOutputs, 16> metadata_outputs;
if (z) {
JL_TIMING(NATIVE_AOT, NATIVE_Sysimg);
LLVMContext Context;
Context.setDiscardValueNames(true);
Module sysimgM("sysimg", Context);
#if JL_LLVM_VERSION < 210000
sysimgM.setTargetTriple(TheTriple.str());
#else
sysimgM.setTargetTriple(TheTriple);
#endif
sysimgM.setDataLayout(DL);
sysimgM.setStackProtectorGuard(StackProtectorGuard);
sysimgM.setOverrideStackAlignment(OverrideStackAlignment);
int compression = jl_options.compress_sysimage ? 15 : 0;
ArrayRef<char> sysimg_data{z->buf, (size_t)z->size};
SmallVector<char, 0> compressed_data;
if (compression) {
compressed_data.resize(ZSTD_compressBound(z->size));
size_t comp_size = ZSTD_compress(compressed_data.data(), compressed_data.size(),
z->buf, z->size, compression);
compressed_data.resize(comp_size);
sysimg_data = compressed_data;
ios_close(z);
free(z);
}
Constant *data = ConstantDataArray::get(Context, sysimg_data);
auto sysdata = new GlobalVariable(sysimgM, data->getType(), false,
GlobalVariable::ExternalLinkage,
data, "jl_system_image_data");
sysdata->setAlignment(Align(jl_page_size));
#if JL_LLVM_VERSION >= 180000
sysdata->setCodeModel(CodeModel::Large);
#else
if (TheTriple.isX86() && TheTriple.isArch64Bit() && TheTriple.isOSLinux())
sysdata->setSection(".ldata");
#endif
addComdat(sysdata, TheTriple);
Constant *len = ConstantInt::get(sysimgM.getDataLayout().getIntPtrType(Context), sysimg_data.size());
addComdat(new GlobalVariable(sysimgM, len->getType(), true,
GlobalVariable::ExternalLinkage,
len, "jl_system_image_size"), TheTriple);
const char *unpack_func = compression ? "jl_image_unpack_zstd" : "jl_image_unpack_uncomp";
auto unpack = new GlobalVariable(sysimgM, DL.getIntPtrType(Context), true,
GlobalVariable::ExternalLinkage, nullptr,
unpack_func);
addComdat(new GlobalVariable(sysimgM, PointerType::getUnqual(Context), true,
GlobalVariable::ExternalLinkage, unpack,
"jl_image_unpack"),
TheTriple);
if (!compression) {
// Free z here, since we've copied out everything into data
// Results in serious memory savings
ios_close(z);
free(z);
}
compressed_data.clear();
// Note that we don't set z to null, this allows the check in WRITE_ARCHIVE
// to function as expected
// no need to free the module/context, destructor handles that
sysimg_outputs = compile(sysimgM, "sysimg", 1, [](Module &) {});
}
const bool imaging_mode = true;
unsigned threads = 1;
unsigned nfvars = 0;
unsigned ngvars = 0;
// Reset the target triple to make sure it matches the new target machine
bool has_veccall = false;
data->M.withModuleDo([&](Module &dataM) {
JL_TIMING(NATIVE_AOT, NATIVE_Setup);
#if JL_LLVM_VERSION < 210000
dataM.setTargetTriple(TheTriple.str());
#else
dataM.setTargetTriple(TheTriple);
#endif
dataM.setDataLayout(DL);
dataM.setPICLevel(PICLevel::BigPIC);
auto &Context = dataM.getContext();
Type *T_psize = PointerType::getUnqual(Context);
// This should really be in jl_create_native, but we haven't
// yet set the target triple binary format correctly at that
// point. This should be resolved when we start JITting for
// COFF when we switch over to JITLink.
for (auto &GA : dataM.aliases()) {
// Global aliases are only used for ccallable things, so we should
// mark them as dllexport
addComdat(&GA, TheTriple);
}
// add metadata information
if (imaging_mode) {
multiversioning_preannotate(dataM);
{
DenseSet<GlobalValue *> fvars(data->jl_sysimg_fvars.begin(), data->jl_sysimg_fvars.end());
for (auto &F : dataM) {
if (F.hasFnAttribute("julia.mv.reloc") || F.hasFnAttribute("julia.mv.fvar")) {
if (fvars.insert(&F).second) {
data->jl_sysimg_fvars.push_back(&F);
}
}
}
}
ModuleInfo module_info = compute_module_info(dataM);
LLVM_DEBUG(dbgs()
<< "Dumping module with stats:\n"
<< " globals: " << module_info.globals << "\n"
<< " functions: " << module_info.funcs << "\n"
<< " basic blocks: " << module_info.bbs << "\n"
<< " instructions: " << module_info.insts << "\n"
<< " clones: " << module_info.clones << "\n"
<< " weight: " << module_info.weight << "\n"
);
threads = compute_image_thread_count(module_info);
LLVM_DEBUG(dbgs() << "Using " << threads << " to emit aot image\n");
nfvars = data->jl_sysimg_fvars.size();
ngvars = data->jl_sysimg_gvars.size();
emit_table(dataM, data->jl_sysimg_gvars, "jl_gvars", T_psize);
emit_table(dataM, data->jl_sysimg_fvars, "jl_fvars", T_psize);
SmallVector<uint32_t, 0> idxs;
idxs.resize(data->jl_sysimg_gvars.size());
std::iota(idxs.begin(), idxs.end(), 0);
auto gidxs = ConstantDataArray::get(Context, idxs);
auto gidxs_var = new GlobalVariable(dataM, gidxs->getType(), true,
GlobalVariable::ExternalLinkage,
gidxs, "jl_gvar_idxs");
gidxs_var->setVisibility(GlobalValue::HiddenVisibility);
gidxs_var->setDSOLocal(true);
idxs.clear();
idxs.resize(data->jl_sysimg_fvars.size());
std::iota(idxs.begin(), idxs.end(), 0);
auto fidxs = ConstantDataArray::get(Context, idxs);
auto fidxs_var = new GlobalVariable(dataM, fidxs->getType(), true,
GlobalVariable::ExternalLinkage,
fidxs, "jl_fvar_idxs");
fidxs_var->setVisibility(GlobalValue::HiddenVisibility);
fidxs_var->setDSOLocal(true);
dataM.addModuleFlag(Module::Error, "julia.mv.suffix", MDString::get(Context, "_0"));
// let the compiler know we are going to internalize a copy of this,
// if it has a current usage with ExternalLinkage
auto jl_small_typeof_copy = dataM.getGlobalVariable("jl_small_typeof");
if (jl_small_typeof_copy) {
jl_small_typeof_copy->setVisibility(GlobalValue::HiddenVisibility);
jl_small_typeof_copy->setDSOLocal(true);
jl_small_typeof_copy->setDLLStorageClass(GlobalValue::DLLStorageClassTypes::DefaultStorageClass);
}
}
has_veccall = !!dataM.getModuleFlag("julia.mv.veccall");
});
{
// Don't use withModuleDo here since we delete the TSM midway through
auto TSCtx = data->M.getContext();
auto lock = TSCtx.getLock();
auto dataM = data->M.getModuleUnlocked();
data_outputs = compile(*dataM, "text", threads, [data, &lock, &TSCtx](Module &) {
// Delete data when add_output thinks it's done with it
// Saves memory for use when multithreading
auto lock2 = std::move(lock);
delete data;
// Drop last reference to shared LLVM::Context
auto TSCtx2 = std::move(TSCtx);
});
}
if (params->emit_metadata) {
JL_TIMING(NATIVE_AOT, NATIVE_Metadata);
LLVMContext Context;
Context.setDiscardValueNames(true);
Module metadataM("metadata", Context);
#if JL_LLVM_VERSION < 210000
metadataM.setTargetTriple(TheTriple.str());
#else
metadataM.setTargetTriple(TheTriple);
#endif
metadataM.setDataLayout(DL);
metadataM.setStackProtectorGuard(StackProtectorGuard);
metadataM.setOverrideStackAlignment(OverrideStackAlignment);
// reflect the address of the jl_RTLD_DEFAULT_handle variable
// back to the caller, so that we can check for consistency issues
GlobalValue *jlRTLD_DEFAULT_var = jl_emit_RTLD_DEFAULT_var(&metadataM);
Type *T_size = DL.getIntPtrType(Context);
Type *T_psize = PointerType::getUnqual(T_size->getContext());
Type *T_ptr = PointerType::get(Context, 0);
auto FT = FunctionType::get(PointerType::getUnqual(Context), {}, false);
auto F = Function::Create(FT, Function::ExternalLinkage, "get_jl_RTLD_DEFAULT_handle_addr", metadataM);
llvm::IRBuilder<> builder(BasicBlock::Create(Context, "top", F));
builder.CreateRet(jlRTLD_DEFAULT_var);
F->setLinkage(GlobalValue::ExternalLinkage);
if (TheTriple.isOSBinFormatCOFF())
F->setDLLStorageClass(GlobalValue::DLLStorageClassTypes::DLLExportStorageClass);
if (TheTriple.isOSWindows()) {
// Windows expect that the function `_DllMainStartup` is present in an dll.
// Normal compilers use something like Zig's crtdll.c instead we provide a
// a stub implementation.
auto T_pvoid = PointerType::getUnqual(Context);
auto T_int32 = Type::getInt32Ty(Context);
auto FT = FunctionType::get(T_int32, {T_pvoid, T_int32, T_pvoid}, false);
auto F = Function::Create(FT, Function::ExternalLinkage, "_DllMainCRTStartup", metadataM);
F->setCallingConv(CallingConv::X86_StdCall);
llvm::IRBuilder<> builder(BasicBlock::Create(Context, "top", F));
builder.CreateRet(ConstantInt::get(T_int32, 1));
}
if (imaging_mode) {
auto specs = jl_get_llvm_clone_targets(jl_options.cpu_target);
const uint32_t base_flags = has_veccall ? JL_TARGET_VEC_CALL : 0;
SmallVector<uint8_t, 0> data;
auto push_i32 = [&] (uint32_t v) {
uint8_t buff[4];
memcpy(buff, &v, 4);
data.insert(data.end(), buff, buff + 4);
};
push_i32(specs.size());
for (uint32_t i = 0; i < specs.size(); i++) {
push_i32(base_flags | (specs[i].flags & JL_TARGET_UNKNOWN_NAME));
auto &specdata = specs[i].data;
data.insert(data.end(), specdata.begin(), specdata.end());
}
auto value = ConstantDataArray::get(Context, data);
auto target_ids = new GlobalVariable(metadataM, value->getType(), true,
GlobalVariable::InternalLinkage,
value, "jl_dispatch_target_ids");
auto shards = emit_shard_table(metadataM, T_size, T_psize, threads);
auto ptls = emit_ptls_table(metadataM, T_size, T_ptr);
auto header = emit_image_header(metadataM, threads, nfvars, ngvars);
auto AT = ArrayType::get(T_size, sizeof(jl_small_typeof) / sizeof(void*));
auto jl_small_typeof_copy = new GlobalVariable(metadataM, AT, false,
GlobalVariable::ExternalLinkage,
Constant::getNullValue(AT),
"jl_small_typeof");
jl_small_typeof_copy->setVisibility(GlobalValue::HiddenVisibility);
jl_small_typeof_copy->setDSOLocal(true);
// Create CPU target string constant
auto cpu_target_str = jl_options.cpu_target ? jl_options.cpu_target : "native";
auto cpu_target_data = ConstantDataArray::getString(Context, cpu_target_str, true);
auto cpu_target_global = new GlobalVariable(metadataM, cpu_target_data->getType(), true,
GlobalVariable::InternalLinkage,
cpu_target_data, "jl_cpu_target_string");
AT = ArrayType::get(T_psize, 6);
auto pointers = new GlobalVariable(metadataM, AT, false,
GlobalVariable::ExternalLinkage,
ConstantArray::get(AT, {
ConstantExpr::getBitCast(header, T_psize),
ConstantExpr::getBitCast(shards, T_psize),
ConstantExpr::getBitCast(ptls, T_psize),
ConstantExpr::getBitCast(jl_small_typeof_copy, T_psize),
ConstantExpr::getBitCast(target_ids, T_psize),
ConstantExpr::getBitCast(cpu_target_global, T_psize)
}),
"jl_image_pointers");
addComdat(pointers, TheTriple);
if (s) {
write_int32(s, data.size());
ios_write(s, (const char *)data.data(), data.size());
}
}
// no need to free module/context, destructor handles that
metadata_outputs = compile(metadataM, "data", 1, [](Module &) {});
}
{
JL_TIMING(NATIVE_AOT, NATIVE_Write);
object::Archive::Kind Kind = getDefaultForHost(TheTriple);
#if JL_LLVM_VERSION >= 180000
#define WritingMode SymtabWritingMode::NormalSymtab
#else
#define WritingMode true
#endif
#define WRITE_ARCHIVE(fname, field, prefix, suffix) \
if (fname) {\
SmallVector<NewArchiveMember, 0> archive; \
SmallVector<std::string, 16> filenames; \
SmallVector<StringRef, 16> buffers; \
for (size_t i = 0; i < threads; i++) { \
filenames.push_back((StringRef("text") + prefix + "#" + Twine(i) + suffix).str()); \
buffers.push_back(StringRef(data_outputs[i].field.data(), data_outputs[i].field.size())); \
} \
filenames.push_back("metadata" prefix suffix); \
buffers.push_back(StringRef(metadata_outputs[0].field.data(), metadata_outputs[0].field.size())); \
if (z) { \
filenames.push_back("sysimg" prefix suffix); \
buffers.push_back(StringRef(sysimg_outputs[0].field.data(), sysimg_outputs[0].field.size())); \
} \
for (size_t i = 0; i < filenames.size(); i++) { \
archive.push_back(NewArchiveMember(MemoryBufferRef(buffers[i], filenames[i]))); \
} \
handleAllErrors(writeArchive(fname, archive, WritingMode, Kind, true, false), reportWriterError); \
}
WRITE_ARCHIVE(unopt_bc_fname, unopt, "_unopt", ".bc");
WRITE_ARCHIVE(bc_fname, opt, "_opt", ".bc");
WRITE_ARCHIVE(obj_fname, obj, "", ".o");
WRITE_ARCHIVE(asm_fname, asm_, "", ".s");
#undef WRITE_ARCHIVE
}
}
// sometimes in GDB you want to find out what code would be created from a mi
extern "C" JL_DLLEXPORT_CODEGEN jl_code_info_t *jl_gdbdumpcode(jl_method_instance_t *mi)
{
jl_llvmf_dump_t llvmf_dump;
size_t world = jl_current_task->world_age;
JL_STREAM *stream = (JL_STREAM*)STDERR_FILENO;
jl_code_info_t *src = jl_gdbcodetyped1(mi, world);
JL_GC_PUSH1(&src);
jl_printf(stream, "---- dumping IR for ----\n");
jl_static_show(stream, (jl_value_t*)mi);
jl_printf(stream, "\n----\n");
jl_printf(stream, "\n---- unoptimized IR ----\n");
jl_get_llvmf_defn(&llvmf_dump, mi, src, 0, false, jl_default_cgparams);
if (llvmf_dump.F) {
jl_value_t *ir = jl_dump_function_ir(&llvmf_dump, 0, 1, "source");
if (ir != NULL && jl_is_string(ir))
jl_printf(stream, "%s", jl_string_data(ir));
}
jl_printf(stream, "\n----\n");
jl_printf(stream, "\n---- optimized IR ----\n");
jl_get_llvmf_defn(&llvmf_dump, mi, src, 0, true, jl_default_cgparams);
if (llvmf_dump.F) {
jl_value_t *ir = jl_dump_function_ir(&llvmf_dump, 0, 1, "source");
if (ir != NULL && jl_is_string(ir))
jl_printf(stream, "%s", jl_string_data(ir));
}
jl_printf(stream, "\n----\n");
jl_printf(stream, "\n---- assembly ----\n");
jl_get_llvmf_defn(&llvmf_dump, mi, src, 0, true, jl_default_cgparams);
if (llvmf_dump.F) {
jl_value_t *ir = jl_dump_function_asm(&llvmf_dump, 0, "", "source", 0, true);
if (ir != NULL && jl_is_string(ir))
jl_printf(stream, "%s", jl_string_data(ir));
}
jl_printf(stream, "\n----\n");
JL_GC_POP();
return src;
}
// --- native code info, and dump function to IR and ASM ---
// Get pointer to llvm::Function instance, compiling if necessary
// for use in reflection from Julia.
// This is paired with jl_dump_function_ir and jl_dump_function_asm, either of which will free all memory allocated here
extern "C" JL_DLLEXPORT_CODEGEN
void jl_get_llvmf_defn_impl(jl_llvmf_dump_t *dump, jl_method_instance_t *mi, jl_code_info_t *src, char getwrapper, char optimize, const jl_cgparams_t params)
{
// emit this function into a new llvm module
dump->F = nullptr;
dump->TSM = nullptr;
if (src && jl_is_code_info(src)) {
auto ctx = jl_ExecutionEngine->makeContext();
const auto &DL = jl_ExecutionEngine->getDataLayout();
const auto &TT = jl_ExecutionEngine->getTargetTriple();
orc::ThreadSafeModule m = jl_create_ts_module(name_from_method_instance(mi), ctx, DL, TT);
Function *F = nullptr;
{
uint64_t compiler_start_time = 0;
uint8_t measure_compile_time_enabled = jl_atomic_load_relaxed(&jl_measure_compile_time_enabled);
if (measure_compile_time_enabled)
compiler_start_time = jl_hrtime();
jl_codegen_params_t output(ctx, DL, TT);
output.params = &params;
output.imaging_mode = jl_options.image_codegen;
output.temporary_roots = jl_alloc_array_1d(jl_array_any_type, 0);
JL_GC_PUSH1(&output.temporary_roots);
jl_llvm_functions_t decls = jl_emit_code(m, mi, src, mi->specTypes, src->rettype, output);
// while not required, also emit the cfunc thunks, based on the
// inferred ABIs of their targets in the current latest world,
// since otherwise it is challenging to see all relevant codes
jl_compiled_functions_t compiled_functions;
size_t latestworld = jl_atomic_load_acquire(&jl_world_counter);
for (cfunc_decl_t &cfunc : output.cfuncs) {
jl_value_t *sigt = cfunc.abi.sigt;
JL_GC_PROMISE_ROOTED(sigt);
jl_value_t *mi = jl_get_specialization1((jl_tupletype_t*)sigt, latestworld, 0);
if (mi == jl_nothing)
continue;
jl_code_instance_t *codeinst = jl_type_infer((jl_method_instance_t*)mi, latestworld, SOURCE_MODE_NOT_REQUIRED, jl_options.trim);
if (codeinst == nullptr || compiled_functions.count(codeinst))
continue;
orc::ThreadSafeModule decl_m = jl_create_ts_module("extern", ctx, DL, TT);
jl_llvm_functions_t decls;
if (jl_atomic_load_relaxed(&codeinst->invoke) == jl_fptr_const_return_addr)
decls.functionObject = "jl_fptr_const_return";
else
decls = jl_emit_codedecls(decl_m, codeinst, output);
compiled_functions[codeinst] = {std::move(decl_m), std::move(decls)};
}
generate_cfunc_thunks(output, compiled_functions);
emit_always_inline(m, output);
output.workqueue.clear();
compiled_functions.clear();
output.temporary_roots = nullptr;
JL_GC_POP(); // GC the global_targets array contents now since reflection doesn't need it
if (m) {
// if compilation succeeded, prepare to return the result
// Similar to jl_link_global from jitlayers.cpp,
// so that code_llvm shows similar codegen to the jit
for (auto &global : output.global_targets) {
if (jl_options.image_codegen) {
global.second->setLinkage(GlobalValue::ExternalLinkage);
}
else {
auto p = literal_static_pointer_val(global.first, global.second->getValueType());
Type *elty = PointerType::get(p->getContext(), 0);
// For pretty printing, when LLVM inlines the global initializer into its loads
auto alias = GlobalAlias::create(elty, 0, GlobalValue::PrivateLinkage, global.second->getName() + ".jit", p, global.second->getParent());
global.second->setInitializer(ConstantExpr::getBitCast(alias, global.second->getValueType()));
global.second->setConstant(true);
global.second->setLinkage(GlobalValue::PrivateLinkage);
global.second->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
global.second->setVisibility(GlobalValue::DefaultVisibility);
}
}
if (!jl_options.image_codegen) {
optimizeDLSyms(*m.getModuleUnlocked());
}
assert(!verifyLLVMIR(*m.getModuleUnlocked()));
if (optimize) {
auto opts = OptimizationOptions::defaults();
opts.sanitize_memory = params.sanitize_memory;
opts.sanitize_thread = params.sanitize_thread;
opts.sanitize_address = params.sanitize_address;
NewPM PM{jl_ExecutionEngine->cloneTargetMachine(), getOptLevel(jl_options.opt_level), opts};
//Safe b/c context lock is held by output
PM.run(*m.getModuleUnlocked());
assert(!verifyLLVMIR(*m.getModuleUnlocked()));
}
const std::string *fname;
if (decls.functionObject == "jl_fptr_args" || decls.functionObject == "jl_fptr_sparam")
getwrapper = false;
if (!getwrapper)
fname = &decls.specFunctionObject;
else
fname = &decls.functionObject;
F = cast<Function>(m.getModuleUnlocked()->getNamedValue(*fname));
}
if (measure_compile_time_enabled) {
auto end = jl_hrtime();
jl_atomic_fetch_add_relaxed(&jl_cumulative_compile_time, end - compiler_start_time);
}
}
if (F) {
dump->TSM = wrap(new orc::ThreadSafeModule(std::move(m)));
dump->F = wrap(F);
return;
}
}
}
Loading...
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化
C++
1
https://gitee.com/mirrors/julia-language.git
git@gitee.com:mirrors/julia-language.git
mirrors
julia-language
julia-language
master

搜索帮助