Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support external linkage in "sysimages" #44527

Merged
merged 2 commits into from
Nov 30, 2022
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
Replace the .ji serialization with sysimage format
This unifies two serializers, `dump.c` (used for packages)
and `staticdata.c` (used for system images). It adopts the
`staticdata` strategy, adding support for external linkage,
uniquing of MethodInstances & types, method extensions,
external specializations, and invalidation. This lays the
groundwork for native code caching as done with system images.

Co-authored-by: Valentin Churavy <[email protected]>
Co-authored-by: Jameson Nash <[email protected]>
Co-authored-by: Tim Holy <[email protected]>
  • Loading branch information
3 people committed Nov 29, 2022
commit cbfdb3facd0f2ece4088f43ef97533e9e0921081
8 changes: 4 additions & 4 deletions base/compiler/typeinfer.jl
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
# This file is a part of Julia. License is MIT: https://julialang.org/license

# Tracking of newly-inferred MethodInstances during precompilation
# Tracking of newly-inferred CodeInstances during precompilation
const track_newly_inferred = RefValue{Bool}(false)
const newly_inferred = MethodInstance[]
const newly_inferred = CodeInstance[]

# build (and start inferring) the inference frame for the top-level MethodInstance
function typeinf(interp::AbstractInterpreter, result::InferenceResult, cache::Symbol)
Expand Down Expand Up @@ -403,11 +403,11 @@ function cache_result!(interp::AbstractInterpreter, result::InferenceResult)
# TODO: also don't store inferred code if we've previously decided to interpret this function
if !already_inferred
inferred_result = transform_result_for_cache(interp, linfo, valid_worlds, result)
code_cache(interp)[linfo] = CodeInstance(result, inferred_result, valid_worlds)
code_cache(interp)[linfo] = ci = CodeInstance(result, inferred_result, valid_worlds)
if track_newly_inferred[]
m = linfo.def
if isa(m, Method) && m.module != Core
ccall(:jl_push_newly_inferred, Cvoid, (Any,), linfo)
ccall(:jl_push_newly_inferred, Cvoid, (Any,), ci)
end
end
end
Expand Down
83 changes: 48 additions & 35 deletions base/loading.jl
Original file line number Diff line number Diff line change
Expand Up @@ -898,7 +898,7 @@ function _include_from_serialized(pkg::PkgId, path::String, depmods::Vector{Any}
end

@debug "Loading cache file $path for $pkg"
sv = ccall(:jl_restore_incremental, Any, (Cstring, Any), path, depmods)
sv = ccall(:jl_restore_incremental, Any, (Cstring, Any, Cint), path, depmods, false)
if isa(sv, Exception)
return sv
end
Expand Down Expand Up @@ -973,7 +973,7 @@ function run_package_callbacks(modkey::PkgId)
end

# loads a precompile cache file, after checking stale_cachefile tests
function _tryrequire_from_serialized(modkey::PkgId, build_id::UInt64)
function _tryrequire_from_serialized(modkey::PkgId, build_id::UInt128)
assert_havelock(require_lock)
loaded = nothing
if root_module_exists(modkey)
Expand Down Expand Up @@ -1021,7 +1021,7 @@ function _tryrequire_from_serialized(modkey::PkgId, path::String, sourcepath::St
for i in 1:length(depmods)
dep = depmods[i]
dep isa Module && continue
_, depkey, depbuild_id = dep::Tuple{String, PkgId, UInt64}
_, depkey, depbuild_id = dep::Tuple{String, PkgId, UInt128}
@assert root_module_exists(depkey)
dep = root_module(depkey)
depmods[i] = dep
Expand Down Expand Up @@ -1052,7 +1052,7 @@ function _tryrequire_from_serialized(pkg::PkgId, path::String)
local depmodnames
io = open(path, "r")
try
isvalid_cache_header(io) || return ArgumentError("Invalid header in cache file $path.")
iszero(isvalid_cache_header(io)) && return ArgumentError("Invalid header in cache file $path.")
depmodnames = parse_cache_header(io)[3]
isvalid_file_crc(io) || return ArgumentError("Invalid checksum in cache file $path.")
finally
Expand All @@ -1074,7 +1074,7 @@ end

# returns `nothing` if require found a precompile cache for this sourcepath, but couldn't load it
# returns the set of modules restored if the cache load succeeded
@constprop :none function _require_search_from_serialized(pkg::PkgId, sourcepath::String, build_id::UInt64)
@constprop :none function _require_search_from_serialized(pkg::PkgId, sourcepath::String, build_id::UInt128)
assert_havelock(require_lock)
paths = find_all_in_cache_path(pkg)
for path_to_try in paths::Vector{String}
Expand All @@ -1087,7 +1087,7 @@ end
for i in 1:length(staledeps)
dep = staledeps[i]
dep isa Module && continue
modpath, modkey, modbuild_id = dep::Tuple{String, PkgId, UInt64}
modpath, modkey, modbuild_id = dep::Tuple{String, PkgId, UInt128}
modpaths = find_all_in_cache_path(modkey)
modfound = false
for modpath_to_try in modpaths::Vector{String}
Expand All @@ -1101,7 +1101,7 @@ end
break
end
if !modfound
@debug "Rejecting cache file $path_to_try because required dependency $modkey with build ID $modbuild_id is missing from the cache."
@debug "Rejecting cache file $path_to_try because required dependency $modkey with build ID $(UUID(modbuild_id)) is missing from the cache."
staledeps = true
break
end
Expand Down Expand Up @@ -1153,7 +1153,7 @@ const package_callbacks = Any[]
const include_callbacks = Any[]

# used to optionally track dependencies when requiring a module:
const _concrete_dependencies = Pair{PkgId,UInt64}[] # these dependency versions are "set in stone", and the process should try to avoid invalidating them
const _concrete_dependencies = Pair{PkgId,UInt128}[] # these dependency versions are "set in stone", and the process should try to avoid invalidating them
const _require_dependencies = Any[] # a list of (mod, path, mtime) tuples that are the file dependencies of the module currently being precompiled
const _track_dependencies = Ref(false) # set this to true to track the list of file dependencies
function _include_dependency(mod::Module, _path::AbstractString)
Expand Down Expand Up @@ -1406,7 +1406,7 @@ function _require(pkg::PkgId, env=nothing)

# attempt to load the module file via the precompile cache locations
if JLOptions().use_compiled_modules != 0
m = _require_search_from_serialized(pkg, path, UInt64(0))
m = _require_search_from_serialized(pkg, path, UInt128(0))
if m isa Module
return m
end
Expand All @@ -1416,7 +1416,7 @@ function _require(pkg::PkgId, env=nothing)
# but it was not handled by the precompile loader, complain
for (concrete_pkg, concrete_build_id) in _concrete_dependencies
if pkg == concrete_pkg
@warn """Module $(pkg.name) with build ID $concrete_build_id is missing from the cache.
@warn """Module $(pkg.name) with build ID $((UUID(concrete_build_id))) is missing from the cache.
This may mean $pkg does not support precompilation but is imported by a module that does."""
if JLOptions().incremental != 0
# during incremental precompilation, this should be fail-fast
Expand Down Expand Up @@ -1785,9 +1785,13 @@ function compilecache(pkg::PkgId, path::String, internal_stderr::IO = stderr, in
close(tmpio)
p = create_expr_cache(pkg, path, tmppath, concrete_deps, internal_stderr, internal_stdout)
if success(p)
# append checksum to the end of the .ji file:
open(tmppath, "a+") do f
write(f, _crc32c(seekstart(f)))
# append extra crc to the end of the .ji file:
open(tmppath, "r+") do f
if iszero(isvalid_cache_header(f))
error("Invalid header for $pkg in new cache file $(repr(tmppath)).")
end
seekstart(f)
write(f, _crc32c(f))
end
# inherit permission from the source file (and make them writable)
chmod(tmppath, filemode(path) & 0o777 | 0o200)
Expand All @@ -1807,7 +1811,7 @@ function compilecache(pkg::PkgId, path::String, internal_stderr::IO = stderr, in
end
end

# this is atomic according to POSIX:
# this is atomic according to POSIX (not Win32):
rename(tmppath, cachefile; force=true)
return cachefile
end
Expand All @@ -1817,13 +1821,16 @@ function compilecache(pkg::PkgId, path::String, internal_stderr::IO = stderr, in
if p.exitcode == 125
return PrecompilableError()
else
error("Failed to precompile $pkg to $tmppath.")
error("Failed to precompile $pkg to $(repr(tmppath)).")
end
end

module_build_id(m::Module) = ccall(:jl_module_build_id, UInt64, (Any,), m)
function module_build_id(m::Module)
hi, lo = ccall(:jl_module_build_id, NTuple{2,UInt64}, (Any,), m)
return (UInt128(hi) << 64) | lo
end

isvalid_cache_header(f::IOStream) = (0 != ccall(:jl_read_verify_header, Cint, (Ptr{Cvoid},), f.ios))
isvalid_cache_header(f::IOStream) = ccall(:jl_read_verify_header, UInt64, (Ptr{Cvoid},), f.ios) # returns checksum id or zero
isvalid_file_crc(f::IOStream) = (_crc32c(seekstart(f), filesize(f) - 4) == read(f, UInt32))

struct CacheHeaderIncludes
Expand Down Expand Up @@ -1897,13 +1904,14 @@ function parse_cache_header(f::IO)
totbytes -= 8
@assert totbytes == 0 "header of cache file appears to be corrupt (totbytes == $(totbytes))"
# read the list of modules that are required to be present during loading
required_modules = Vector{Pair{PkgId, UInt64}}()
required_modules = Vector{Pair{PkgId, UInt128}}()
while true
n = read(f, Int32)
n == 0 && break
sym = String(read(f, n)) # module name
uuid = UUID((read(f, UInt64), read(f, UInt64))) # pkg UUID
build_id = read(f, UInt64) # build id
build_id = UInt128(read(f, UInt64)) << 64
build_id |= read(f, UInt64)
push!(required_modules, PkgId(uuid, sym) => build_id)
end
return modules, (includes, requires), required_modules, srctextpos, prefs, prefs_hash
Expand All @@ -1912,29 +1920,29 @@ end
function parse_cache_header(cachefile::String; srcfiles_only::Bool=false)
io = open(cachefile, "r")
try
!isvalid_cache_header(io) && throw(ArgumentError("Invalid header in cache file $cachefile."))
iszero(isvalid_cache_header(io)) && throw(ArgumentError("Invalid header in cache file $cachefile."))
ret = parse_cache_header(io)
srcfiles_only || return ret
modules, (includes, requires), required_modules, srctextpos, prefs, prefs_hash = ret
_, (includes, _), _, srctextpos, _... = ret
srcfiles = srctext_files(io, srctextpos)
delidx = Int[]
for (i, chi) in enumerate(includes)
chi.filename ∈ srcfiles || push!(delidx, i)
end
deleteat!(includes, delidx)
return modules, (includes, requires), required_modules, srctextpos, prefs, prefs_hash
return ret
finally
close(io)
end
end



preferences_hash(f::IO) = parse_cache_header(f)[end]
preferences_hash(f::IO) = parse_cache_header(f)[6]
function preferences_hash(cachefile::String)
io = open(cachefile, "r")
try
if !isvalid_cache_header(io)
if iszero(isvalid_cache_header(io))
throw(ArgumentError("Invalid header in cache file $cachefile."))
end
return preferences_hash(io)
Expand All @@ -1945,22 +1953,22 @@ end


function cache_dependencies(f::IO)
defs, (includes, requires), modules, srctextpos, prefs, prefs_hash = parse_cache_header(f)
_, (includes, _), modules, _... = parse_cache_header(f)
return modules, map(chi -> (chi.filename, chi.mtime), includes) # return just filename and mtime
end

function cache_dependencies(cachefile::String)
io = open(cachefile, "r")
try
!isvalid_cache_header(io) && throw(ArgumentError("Invalid header in cache file $cachefile."))
iszero(isvalid_cache_header(io)) && throw(ArgumentError("Invalid header in cache file $cachefile."))
return cache_dependencies(io)
finally
close(io)
end
end

function read_dependency_src(io::IO, filename::AbstractString)
modules, (includes, requires), required_modules, srctextpos, prefs, prefs_hash = parse_cache_header(io)
srctextpos = parse_cache_header(io)[4]
srctextpos == 0 && error("no source-text stored in cache file")
seek(io, srctextpos)
return _read_dependency_src(io, filename)
Expand All @@ -1983,7 +1991,7 @@ end
function read_dependency_src(cachefile::String, filename::AbstractString)
io = open(cachefile, "r")
try
!isvalid_cache_header(io) && throw(ArgumentError("Invalid header in cache file $cachefile."))
iszero(isvalid_cache_header(io)) && throw(ArgumentError("Invalid header in cache file $cachefile."))
return read_dependency_src(io, filename)
finally
close(io)
Expand Down Expand Up @@ -2173,12 +2181,13 @@ get_compiletime_preferences(::Nothing) = String[]
# returns true if it "cachefile.ji" is stale relative to "modpath.jl" and build_id for modkey
# otherwise returns the list of dependencies to also check
@constprop :none function stale_cachefile(modpath::String, cachefile::String; ignore_loaded::Bool = false)
return stale_cachefile(PkgId(""), UInt64(0), modpath, cachefile; ignore_loaded)
return stale_cachefile(PkgId(""), UInt128(0), modpath, cachefile; ignore_loaded)
end
@constprop :none function stale_cachefile(modkey::PkgId, build_id::UInt64, modpath::String, cachefile::String; ignore_loaded::Bool = false)
@constprop :none function stale_cachefile(modkey::PkgId, build_id::UInt128, modpath::String, cachefile::String; ignore_loaded::Bool = false)
io = open(cachefile, "r")
try
if !isvalid_cache_header(io)
checksum = isvalid_cache_header(io)
if iszero(checksum)
@debug "Rejecting cache file $cachefile due to it containing an invalid cache header"
return true # invalid cache file
end
Expand All @@ -2191,9 +2200,12 @@ end
@debug "Rejecting cache file $cachefile for $modkey since it is for $id instead"
return true
end
if build_id != UInt64(0) && id.second != build_id
@debug "Ignoring cache file $cachefile for $modkey since it is does not provide desired build_id"
return true
if build_id != UInt128(0)
id_build = (UInt128(checksum) << 64) | id.second
if id_build != build_id
@debug "Ignoring cache file $cachefile for $modkey ($((UUID(id_build)))) since it is does not provide desired build_id ($((UUID(build_id))))"
return true
end
end
id = id.first
modules = Dict{PkgId, UInt64}(modules)
Expand Down Expand Up @@ -2233,11 +2245,12 @@ end
for (req_key, req_build_id) in _concrete_dependencies
build_id = get(modules, req_key, UInt64(0))
if build_id !== UInt64(0)
build_id |= UInt128(checksum) << 64
if build_id === req_build_id
skip_timecheck = true
break
end
@debug "Rejecting cache file $cachefile because it provides the wrong build_id (got $build_id) for $req_key (want $req_build_id)"
@debug "Rejecting cache file $cachefile because it provides the wrong build_id (got $((UUID(build_id)))) for $req_key (want $(UUID(req_build_id)))"
return true # cachefile doesn't provide the required version of the dependency
end
end
Expand Down
2 changes: 1 addition & 1 deletion deps/llvm.mk
Original file line number Diff line number Diff line change
Expand Up @@ -308,8 +308,8 @@ LLVM_TOOLS_JLL_TAGS := -llvm_version+$(LLVM_VER_MAJ)
endif

$(eval $(call bb-install,llvm,LLVM,false,true))
$(eval $(call bb-install,clang,CLANG,false,true))
$(eval $(call bb-install,lld,LLD,false,true))
vchuravy marked this conversation as resolved.
Show resolved Hide resolved
$(eval $(call bb-install,clang,CLANG,false,true))
$(eval $(call bb-install,llvm-tools,LLVM_TOOLS,false,true))

endif # USE_BINARYBUILDER_LLVM
Expand Down
7 changes: 3 additions & 4 deletions src/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ endif

SRCS := \
jltypes gf typemap smallintset ast builtins module interpreter symbol \
dlload sys init task array dump staticdata toplevel jl_uv datatype \
dlload sys init task array staticdata toplevel jl_uv datatype \
simplevector runtime_intrinsics precompile jloptions \
threading partr stackwalk gc gc-debug gc-pages gc-stacks gc-alloc-profiler method \
jlapi signal-handling safepoint timing subtype rtutils gc-heap-snapshot \
Expand Down Expand Up @@ -291,7 +291,6 @@ $(BUILDDIR)/codegen.o $(BUILDDIR)/codegen.dbg.obj: $(addprefix $(SRCDIR)/,\
$(BUILDDIR)/datatype.o $(BUILDDIR)/datatype.dbg.obj: $(SRCDIR)/support/htable.h $(SRCDIR)/support/htable.inc
$(BUILDDIR)/debuginfo.o $(BUILDDIR)/debuginfo.dbg.obj: $(addprefix $(SRCDIR)/,debuginfo.h processor.h jitlayers.h debug-registry.h)
$(BUILDDIR)/disasm.o $(BUILDDIR)/disasm.dbg.obj: $(SRCDIR)/debuginfo.h $(SRCDIR)/processor.h
$(BUILDDIR)/dump.o $(BUILDDIR)/dump.dbg.obj: $(addprefix $(SRCDIR)/,common_symbols1.inc common_symbols2.inc builtin_proto.h serialize.h)
$(BUILDDIR)/gc-debug.o $(BUILDDIR)/gc-debug.dbg.obj: $(SRCDIR)/gc.h
$(BUILDDIR)/gc-pages.o $(BUILDDIR)/gc-pages.dbg.obj: $(SRCDIR)/gc.h
$(BUILDDIR)/gc.o $(BUILDDIR)/gc.dbg.obj: $(SRCDIR)/gc.h $(SRCDIR)/gc-heap-snapshot.h $(SRCDIR)/gc-alloc-profiler.h
Expand All @@ -317,7 +316,7 @@ $(BUILDDIR)/llvm-remove-addrspaces.o $(BUILDDIR)/llvm-remove-addrspaces.dbg.obj:
$(BUILDDIR)/llvm-ptls.o $(BUILDDIR)/llvm-ptls.dbg.obj: $(SRCDIR)/codegen_shared.h
$(BUILDDIR)/processor.o $(BUILDDIR)/processor.dbg.obj: $(addprefix $(SRCDIR)/,processor_*.cpp processor.h features_*.h)
$(BUILDDIR)/signal-handling.o $(BUILDDIR)/signal-handling.dbg.obj: $(addprefix $(SRCDIR)/,signals-*.c)
$(BUILDDIR)/staticdata.o $(BUILDDIR)/staticdata.dbg.obj: $(SRCDIR)/processor.h $(SRCDIR)/builtin_proto.h
$(BUILDDIR)/staticdata.o $(BUILDDIR)/staticdata.dbg.obj: $(SRCDIR)/staticdata_utils.c $(SRCDIR)/processor.h $(SRCDIR)/builtin_proto.h
$(BUILDDIR)/toplevel.o $(BUILDDIR)/toplevel.dbg.obj: $(SRCDIR)/builtin_proto.h
$(BUILDDIR)/ircode.o $(BUILDDIR)/ircode.dbg.obj: $(SRCDIR)/serialize.h
$(BUILDDIR)/pipeline.o $(BUILDDIR)/pipeline.dbg.obj: $(SRCDIR)/passes.h $(SRCDIR)/jitlayers.h
Expand Down Expand Up @@ -453,7 +452,7 @@ SA_EXCEPTIONS-jloptions.c := -Xanalyzer -analyzer-config -Xana
SA_EXCEPTIONS-subtype.c := -Xanalyzer -analyzer-config -Xanalyzer silence-checkers="core.uninitialized.Assign;core.UndefinedBinaryOperatorResult"
SA_EXCEPTIONS-codegen.c := -Xanalyzer -analyzer-config -Xanalyzer silence-checkers="core"
# these need to be annotated (and possibly fixed)
SKIP_IMPLICIT_ATOMICS := dump.c module.c staticdata.c codegen.cpp
SKIP_IMPLICIT_ATOMICS := module.c staticdata.c codegen.cpp
# these need to be annotated (and possibly fixed)
SKIP_GC_CHECK := codegen.cpp rtutils.c

Expand Down
Loading