Last active
July 28, 2024 19:11
-
-
Save rrbutani/da6f9ab08a84c3610fbff6774929d6b2 to your computer and use it in GitHub Desktop.
An alternative approach to `mitm-cache` and `buildGradleApplication`'s use of verification-metadata.xml (https://github.com/NixOS/nixpkgs/tree/master/pkgs/development/tools/build-managers/gradle, https://github.com/raphiz/buildGradleApplication)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ stdenvNoCC | |
, writeScript | |
, lib | |
, gradle | |
, python312 | |
}: | |
# NOTE: prior art re: packaging Gradle projects + dealing with external | |
# dependencies: | |
# - https://github.com/NixOS/nixpkgs/blob/99dec1f6b06290f64a8d1711c41e5e13653a14c7/pkgs/development/tools/build-managers/gradle/README.md | |
# - https://github.com/NixOS/nixpkgs/blob/c12b2a0b196a42a20949cf648c86acfbe6418ad3/doc/languages-frameworks/gradle.section.md | |
# - https://github.com/raphiz/buildGradleApplication | |
# | |
# Our use case is not as general so we just use the gradle dependency cache as | |
# our mechanism for feeding pre-fetched external deps into the build: | |
# - https://docs.gradle.org/6.1.1/userguide/dependency_resolution.html#sec:dependency_cache | |
# | |
# We follow the same pattern used for many dependency managers modeled in | |
# nixpkgs: have a FOD that fetches the external deps and produces the cache | |
# and then a main build that consumes the cache. | |
# | |
# As per the docs linked above, `$GRADLE_HOME/caches/module-...` is what we're | |
# persisting. | |
# | |
# Corresponding main derivations should add this derivation to their | |
# `nativeBuildInputs`; the setup + configure hooks below will take care of the | |
# rest. | |
{ pname | |
, src | |
, gradleArgs ? [] | |
, extraNativeBuildInputs ? [] | |
, extraBuildInputs ? [] | |
# Expected output hash, in SRI form. | |
, hash | |
# Task to run to get gradle to fetch the dependencies that will be used. | |
, task ? "properties" | |
# Whether to make a copy of `src` in this derivation or not. | |
, needsMutableSrc ? false | |
# Unix epoch milliseconds (64 bits) to use in gradle's metadata cache entries. | |
# | |
# Note that this does not appear to need to be newer than the current time; | |
# for cache hits there doesn't seem to be a TTL.. to be safe we default to | |
# using a far-in-the-future timestamp anyways. | |
, replacementTimestamp ? 8000000000000 # July 6th, 2223 | |
# Whether to have `make-gradle-dependency-cache-metadata-reproducible.py` | |
# print information about the replacements it is making. | |
, debugFixups ? true | |
}: stdenvNoCC.mkDerivation (finalAttrs: let | |
drv = stdenvNoCC.mkDerivation { | |
# Having a different name but with an existing hash will still cause the | |
# derivation to be rebuilt. | |
# | |
# We intentionally place the gradle version in this derivation's name so | |
# that it'll be refetched if the gradle version changes — as per the docs, | |
# different gradle versions have different module/file/metadata cache | |
# versions: https://docs.gradle.org/current/userguide/dependency_resolution.html#sub:cache_copy | |
# | |
# We could technically model the table in the docs linked above so that we're | |
# only sensitive to differences in gradle version that actually alter the | |
# dependency cache schema versions but this is currently not worth the effort. | |
# | |
# Note that we do not place the main derivation's name in this derivation's | |
# name: the idea is that mismatches in the fetched deps vs. what the build | |
# needs will quickly result in loud and visible errors. | |
name = "gradle-${lib.getVersion gradle}-deps-for-" + pname; | |
nativeBuildInputs = [ gradle python312 ] ++ extraNativeBuildInputs; | |
buildInputs = extraBuildInputs; | |
inherit src; | |
dontUnpack = !needsMutableSrc; | |
GRADLE_USER_HOME = "./gradle-user-home"; | |
gradleFlags = [ | |
"--no-daemon" | |
"--no-parallel" # see ./make-gradle-dependency-cache-metadata-reproducible.py | |
# "--quiet" | |
] ++ lib.optional (!needsMutableSrc) "--project-dir ${src}"; | |
buildPhase = lib.optionalString (!needsMutableSrc) '' | |
gradleFlagsArray+=(--project-cache-dir "$(mktemp -d)") | |
'' + | |
# Remove `--offline` from the default gradle setup hook. | |
# | |
# (we want the other flags from the setup hook though) | |
'' | |
gradleFlagsArrayCopy=("''${gradleFlagsArray[@]}") | |
gradleFlagsArray=() | |
for flag in "''${gradleFlagsArrayCopy[@]}"; do | |
if [[ "$flag" == "--offline" ]]; then continue; fi | |
gradleFlagsArray+=("$flag") | |
done | |
'' + '' | |
set -x | |
gradle ${task} ${lib.strings.escapeShellArgs gradleArgs} | |
set +x | |
''; | |
installPhase = '' | |
mkdir $out | |
mv $GRADLE_USER_HOME/caches/modules-* $out/ | |
''; | |
fixupPhase = lib.optionalString (replacementTimestamp != null) '' | |
export REPLACEMENT_TIMESTAMP="${builtins.toString replacementTimestamp}" | |
'' + lib.optionalString debugFixups '' | |
export DEBUG=true | |
'' + '' | |
python3 ${./make-gradle-dependency-cache-metadata-reproducible.py} \ | |
--patch $out/* | |
'' + | |
# .lock files aren't reproducible and also aren't required: | |
'' | |
rm $out/modules-*/*.lock | |
''; | |
# https://nix.dev/manual/nix/2.22/language/advanced-attributes.html?highlight=outputHash#adv-attr-outputHash | |
outputHashMode = "recursive"; | |
outputHashAlgo = null; # users must provide hashes in SRI form | |
outputHash = hash; | |
}; | |
in { | |
inherit (drv) name; | |
# Depends on what's being fetched of course. Assuming it's typically JARs + | |
# scripts. | |
meta.sourceProvenance = with lib.sourceTypes; [ | |
# binaryNativeCode | |
binaryBytecode | |
fromSource | |
]; | |
passthru = { | |
depsDrv = drv; | |
inner = drv; | |
}; | |
dontUnpack = true; dontBuild = true; | |
# the main derivation should include this drv in `nativeBuildInputs` so that | |
# the following build hook can make `GRADLE_HOME` have the dependency cache we | |
# produced symlinked into it: | |
setupHook = writeScript "gradle-deps-setup-hook" '' | |
gradleDepsConfigureHook() { | |
if [ -z "''${GRADLE_USER_HOME-}" ]; then | |
GRADLE_USER_HOME="$(mktemp -d)" | |
fi | |
export GRADLE_USER_HOME | |
mkdir -p "''${GRADLE_USER_HOME}/caches/" | |
for m in ${drv}/*; do | |
echo "gradle-deps: symlinking in dependency cache at '$m'" >&2 | |
ln -s $m "''${GRADLE_USER_HOME}/caches/" | |
done | |
} | |
if [ -z "''${dontUseGradleDepsConfigure-}" ]; then | |
preConfigureHooks+=(gradleDepsConfigureHook) | |
fi | |
''; | |
}) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
# NOTE: there's a reproducibility issue with the metadata stored in: | |
# - modules-<ver>/metadata-<ver>/module-artifact.bin | |
# - modules-<ver>/metadata-<ver>/module-metadata.bin | |
# - modules-<ver>/metadata-<ver>/resource-at-url.bin | |
# | |
# This stems from Gradle encoding timestamps in these files: | |
# - https://github.com/gradle/gradle/blob/5bb3182cf38a901dbffbacc0cb9c8efec9f87e9a/platforms/software/dependency-management/src/main/java/org/gradle/api/internal/artifacts/ivyservice/modulecache/ModuleMetadataCacheEntrySerializer.java#L25-L55 | |
# | |
# Unfortunately there does not appear to be a way to inhibit/override this at | |
# the application level; the closest thing available is | |
# `org.gradle.internal.test.clockoffset` but this doesn't help in this case: | |
# https://github.com/gradle/gradle/blob/dc6e12baff8fbfbe75ee3f5a238831e594d95cb8/subprojects/core/src/main/java/org/gradle/util/internal/BuildCommencedTimeProvider.java#L23 | |
# | |
# Intercepting at a level below (i.e. using libfaketime: | |
# https://github.com/wolfcw/libfaketime) also does not work in this case because | |
# there's network I/O involves; TLS/cert verification complains if system time | |
# isn't approximately correct. | |
# | |
# So, rather than try to get gradle to produce reproducible metadata files we | |
# just fix them up after the fact. | |
# | |
# This script implements a parser for the metadata files and patches timestamps | |
# to fix the reproducibility issue. | |
# | |
# NOTE: there's another source of reproducibility issues: non-determinism about | |
# which repository dependencies are fetched from... | |
# - for now, running with `--no-parallel` seems to sufficiently mitigate this | |
# | |
# NOTE: there's actually a third source of reproducibility issues that I | |
# haven't gotten to the bottom of yet: sometimes some entries are listed | |
# multiple times... | |
# - in ~100 gradle runs I've only observed this once | |
# - to mitigate, perhaps it's worth having this script error if it sees | |
# duplicated entries? | |
# - "resolving" the issue would entail actually removing the duplicates but | |
# that's harder; would require having a real encoding flow rather than just | |
# decode + patch at offsets | |
from abc import ABC, abstractmethod | |
from dataclasses import dataclass | |
from datetime import datetime | |
from io import BufferedRandom, BufferedReader | |
import os | |
from pathlib import Path | |
import sys | |
from typing import Any, ClassVar, Optional, Self, override | |
DEBUG = "DEBUG" in os.environ | |
# NOTE: the layout of the metadata files is not stable; metadata versions are | |
# listed here: | |
# - https://github.com/gradle/gradle/blob/497dffa06e9a0769c5e0e2ba866d0fbd88f7a1d5/platforms/software/dependency-management/src/main/java/org/gradle/api/internal/artifacts/ivyservice/CacheLayout.java#L37-L89 | |
# | |
# For now we only support the latest version as of this writing (106): | |
SUPPORTED_METADATA_LAYOUT_VERSIONS = { | |
"106": None, | |
} | |
################################################################################ | |
################################################################################ | |
# https://github.com/gradle/gradle/blob/6548746755a464cd473900cb9545d944470deee0/platforms/core-execution/persistent-cache/src/main/java/org/gradle/cache/internal/btree/Block.java#L19-L21 | |
class Block: | |
LONG_SIZE: int = 8 | |
INT_SIZE: int = 4 | |
SHORT_SIZE: int = 2 | |
Stream = BufferedReader | |
def read_byte(stream: Stream) -> int: | |
return int.from_bytes(stream.read(1), byteorder="big") | |
def read_short(stream: Stream) -> int: | |
return int.from_bytes(stream.read(Block.SHORT_SIZE), byteorder="big") | |
def read_int(stream: Stream) -> int: | |
return int.from_bytes(stream.read(Block.INT_SIZE), byteorder="big") | |
def read_long(stream: Stream) -> int: | |
return int.from_bytes(stream.read(Block.LONG_SIZE), byteorder="big") | |
def read_boolean(stream: Stream) -> bool: | |
byte = read_byte(stream) | |
if byte == 0: return False | |
elif byte == 1: return True | |
else: | |
raise ValueError(f"Got {hex(byte)} for boolean at {hex(stream.tell())}") | |
# https://github.com/gradle/gradle/blob/7abeef04cae2eb77d08f00f01cdb625482351189/platforms/core-runtime/serialization/src/main/java/org/gradle/internal/serialize/AbstractDecoder.java#L40-L46 | |
def read_byte_array(stream: Stream) -> bytes: | |
size = read_int(stream) | |
return stream.read(size) | |
# https://github.com/gradle/gradle/blob/7abeef04cae2eb77d08f00f01cdb625482351189/platforms/core-runtime/serialization/src/main/java/org/gradle/internal/serialize/InputStreamBackedDecoder.java#L76-L79 | |
# DataInputStream.readUTF: https://stackoverflow.com/a/59691602 | |
# https://github.com/gradle/gradle/blob/7abeef04cae2eb77d08f00f01cdb625482351189/platforms/core-runtime/serialization/src/main/java/org/gradle/internal/serialize/OutputStreamBackedEncoder.java#L61-L67 | |
# DataOutputStream.writeUTF: https://docs.oracle.com/javase/8/docs/api/java/io/DataOutputStream.html#writeUTF-java.lang.String- | |
def read_string(stream: Stream) -> str: | |
size = read_short(stream) | |
raw = stream.read(size) | |
return raw.decode() | |
# https://github.com/gradle/gradle/blob/7abeef04cae2eb77d08f00f01cdb625482351189/platforms/core-runtime/serialization/src/main/java/org/gradle/internal/serialize/AbstractDecoder.java#L68-L75 | |
def read_nullable_string(stream: Stream) -> Optional['DecodedString']: | |
if read_boolean(stream): | |
return DecodedString().decode(stream) | |
else: | |
return None | |
# https://github.com/EsotericSoftware/kryo/blob/b9101fbd67b3943a74af5bef6781d6be29affee8/src/com/esotericsoftware/kryo/io/Input.java#L524-L578 | |
# https://github.com/gradle/gradle/blob/e035e3763e7f954eb155a18fb8bf86972df6a57c/platforms/core-runtime/serialization/src/main/java/org/gradle/internal/serialize/kryo/KryoBackedEncoder.java#L67-L70 | |
def read_kryo_varint_positive(stream: Stream) -> int: | |
byte = read_byte(stream) | |
out = byte & 0x3F | |
if (byte & 0x40) != 0: | |
byte = read_byte(stream) | |
out |= (byte & 0x7F) << 6 | |
if (byte & 0x80) != 0: | |
byte = read_byte(stream) | |
out |= (byte & 0x7F) << 13 | |
if (byte & 0x80) != 0: | |
byte = read_byte(stream) | |
out |= (byte & 0x7F) << 20 | |
if (byte & 0x80) != 0: | |
byte = read_byte(stream) | |
out |= (byte & 0x7F) << 27 | |
return out | |
# https://github.com/EsotericSoftware/kryo/blob/b9101fbd67b3943a74af5bef6781d6be29affee8/src/com/esotericsoftware/kryo/io/Input.java#L829-L848 | |
def read_kryo_string(stream: Stream) -> Optional[str]: | |
read_flag = lambda b: b & 0x80 != 0 | |
byte = peek_byte(stream) | |
# https://github.com/EsotericSoftware/kryo/blob/b9101fbd67b3943a74af5bef6781d6be29affee8/src/com/esotericsoftware/kryo/io/Input.java#L850-L941 | |
if not read_flag(byte): | |
# ascii | |
arr = [] | |
while (byte := read_byte(stream) & 0x80) != 0x80: arr.append(byte) | |
arr.append(byte & 0x7F) | |
return bytes(arr).decode() | |
else: | |
num_chars = read_kryo_varint_positive(stream) | |
starting_pos = stream.tell() | |
if num_chars == 0: | |
return None | |
elif num_chars == 1: | |
return "" | |
else: | |
num_chars -= 1 | |
# UTF-8: | |
curr_char = 0 | |
while curr_char < num_chars: | |
hi = read_byte(stream) >> 4 | |
if hi in range(0, 8): | |
pass | |
elif hi in [12, 13]: | |
read_byte(stream) | |
elif hi == 14: | |
read_byte(stream) | |
read_byte(stream) | |
curr_char += 1 | |
len = stream.tell() - starting_pos | |
stream.seek(starting_pos) | |
return stream.read(len).decode() | |
# https://github.com/EsotericSoftware/kryo/blob/b9101fbd67b3943a74af5bef6781d6be29affee8/src/com/esotericsoftware/kryo/io/Input.java#L651-L697 | |
def read_kryo_varlong_positive(stream: Stream) -> int: | |
out = 0 | |
byte_num = 0 | |
while byte_num <= 8: | |
byte = read_byte(stream) | |
out |= (byte & 0x7F) << (byte_num * 7) | |
byte_num += 1 | |
if (byte & 0x80) == 0: | |
break | |
return out | |
# https://github.com/gradle/gradle/blob/7abeef04cae2eb77d08f00f01cdb625482351189/platforms/core-runtime/serialization/src/main/java/org/gradle/internal/serialize/AbstractEncoder.java#L44-L48 | |
def read_kryo_byte_array(stream: Stream) -> bytes: | |
size = read_kryo_varint_positive(stream) | |
return stream.read(size) | |
def peek_byte(stream: Stream) -> int: | |
return int.from_bytes(stream.peek(1)[:1], byteorder="big") | |
def peek_short(stream: Stream) -> int: | |
return int.from_bytes(stream.peek(Block.SHORT_SIZE)[:Block.SHORT_SIZE], byteorder="big") | |
def peek_int(stream: Stream) -> int: | |
return int.from_bytes(stream.peek(Block.INT_SIZE)[:Block.INT_SIZE], byteorder="big") | |
def peek_long(stream: Stream) -> int: | |
return int.from_bytes(stream.peek(Block.LONG_SIZE)[:Block.LONG_SIZE], byteorder="big") | |
def eq(a, b): | |
if a != b: | |
print(f"a: {a} ({hex(a)}), b: {b} ({hex(b)})", file=sys.stderr) | |
return False | |
else: | |
return True | |
class Decodable(ABC): | |
at_offset: Optional[int] = None | |
decoded_len: Optional[int] = None | |
already_decoded_into: bool = False | |
@abstractmethod | |
def size(self) -> int: | |
pass | |
def decode(self, stream: Stream) -> 'Self': | |
self.at_offset = stream.tell() | |
if self.already_decoded_into: raise "uh-oh" | |
self.already_decoded_into = True | |
self._decode(stream) | |
offset_after = stream.tell() | |
assert offset_after >= self.at_offset | |
self.decoded_len = offset_after - self.at_offset | |
return self | |
@abstractmethod | |
def _decode(self, stream: Stream): pass | |
class DecodableBlock(Decodable): | |
HEADER_SIZE: int = 1 + Block.INT_SIZE | |
TAIL_SIZE: int = Block.INT_SIZE | |
@abstractmethod | |
def type_byte(self) -> Optional[int]: | |
pass | |
@abstractmethod | |
def inner_size(self) -> int: pass | |
@abstractmethod | |
def _inner_decode(self, stream: Stream): pass | |
def size(self) -> int: | |
return self.HEADER_SIZE + self.inner_size() + self.TAIL_SIZE | |
# https://github.com/gradle/gradle/blob/6548746755a464cd473900cb9545d944470deee0/platforms/core-execution/persistent-cache/src/main/java/org/gradle/cache/internal/btree/FileBackedBlockStore.java#L209-L229 | |
def _decode(self, stream: Stream): | |
pos = stream.tell() | |
type = read_byte(stream) | |
assert eq(type, self.type_byte()) | |
len = read_int(stream) | |
self._inner_decode(stream) | |
pos_after_payload_read = stream.tell() | |
self.actual_payload_length = read_int(stream) | |
assert eq(pos_after_payload_read - pos, self.actual_payload_length) | |
assert eq(self.inner_size(), len) | |
stream.seek(pos + self.size()) | |
class Patchable(ABC): # NOTE: assumes implementors are also `Decodable` | |
def fixed_timestamp(self, override: Optional[Any] = None) -> bytes: | |
default = str(0x77777777777) # 2230 | |
ts = override or os.environ.get("REPLACEMENT_TIMESTAMP") or default | |
ts = int(ts).to_bytes(Block.LONG_SIZE, byteorder="big") | |
return ts | |
def patch(self, writer: BufferedRandom, hint: Optional[Any] = None): | |
assert self.already_decoded_into | |
start, size = self.at_offset, self.decoded_len | |
end = start + size | |
if DEBUG: | |
print( | |
f"[PATCHING] {type(self)}: {size} ({hex(size)}) bytes at: " | |
f"{start} to {end} ({hex(start)} to {hex(end)}):\n" | |
f" - {self}", | |
file=sys.stderr, | |
) | |
writer.seek(start) | |
orig_bytes = writer.read(size) | |
new_bytes = self._patch(orig_bytes, hint) | |
if new_bytes == None: | |
return | |
assert len(orig_bytes) == len(new_bytes) == size | |
if DEBUG: | |
print( | |
f" + making replacement:\n" | |
f" * orig: {orig_bytes}\n" | |
f" * new: {new_bytes}\n", | |
file=sys.stderr, | |
) | |
writer.seek(start) | |
writer.write(new_bytes) | |
def _patch(self, _bytes: bytes, _hint: Optional[Any] = None) -> None | bytes: | |
return None | |
@dataclass | |
class Timestamp(Decodable, Patchable): | |
millis_since_unix_epoch: int = 0 # 64-bit int | |
def size(self): return Block.LONG_SIZE | |
def _decode(self, inp: Stream): | |
self.millis_since_unix_epoch = read_long(inp) | |
def __repr__(self): | |
return datetime.fromtimestamp(self.millis_since_unix_epoch // 1000).__repr__() | |
@override | |
def _patch(self, _orig: bytes, hint: Optional[Any] = None) -> bytes: | |
return self.fixed_timestamp(hint) | |
@dataclass # useful if you want to know the offset of a string | |
class DecodedString(Decodable): | |
inner: str = "" | |
def size(self): return len(self.inner) # not useful pre-decode | |
def _decode(self, inp: Stream): | |
self.inner = read_string(inp) | |
@dataclass | |
class DecodedKryoString(Decodable): | |
inner: Optional[str] = "" | |
def size(self): raise "unknown" | |
def _decode(self, inp: Stream): | |
self.inner = read_kryo_string(inp) | |
################################################################################ | |
################################################################################ | |
@dataclass | |
class FreeListEntry(Decodable): | |
pos: int = 0 | |
size_: int = 0 | |
def size(self) -> int: return Block.LONG_SIZE + Block.INT_SIZE | |
def _decode(self, stream: Stream): | |
self.pos = read_long(stream) | |
self.size_ = read_int(stream) | |
# https://github.com/gradle/gradle/blob/bb763da97066c47f0dc2d0f119286320b4382401/platforms/core-execution/persistent-cache/src/main/java/org/gradle/cache/internal/btree/FreeListBlockStore.java#L128-L256 | |
@dataclass | |
class FreeListBlock(DecodableBlock): | |
max_free_list_entries: int | |
next_block: int = None | |
largest_in_next_block: int = None | |
entries: Optional[list[FreeListEntry]] = None | |
def type_byte(self) -> int: return 0x44 | |
def inner_size(self) -> int: | |
return ( | |
Block.LONG_SIZE + Block.INT_SIZE + Block.INT_SIZE | |
+ self.max_free_list_entries * FreeListEntry().size() | |
) | |
def _inner_decode(self, stream: Stream): | |
self.next_block = read_long(stream) | |
self.largest_in_next_block = read_int(stream) | |
count = read_int(stream) | |
self.entries = [] | |
for _ in range(count): | |
self.entries.append(FreeListEntry().decode(stream)) | |
# https://github.com/gradle/gradle/blob/6548746755a464cd473900cb9545d944470deee0/platforms/core-execution/persistent-cache/src/main/java/org/gradle/cache/internal/btree/BTreePersistentIndexedCache.java#L328-L364 | |
@dataclass | |
class BTreeHeaderBlock(DecodableBlock): | |
max_child_index_entries: int | |
def type_byte(self) -> int: return 0x55 | |
def inner_size(self) -> int: return Block.LONG_SIZE + Block.SHORT_SIZE | |
def _inner_decode(self, stream: Stream): | |
self.root_pos = read_long(stream) | |
actual_child_index_entries = read_short(stream) | |
assert eq(actual_child_index_entries, self.max_child_index_entries) | |
@dataclass | |
class IndexEntry(Decodable): | |
hash_code: int = 0 | |
data_block: int = 0 | |
child_index_block: int = 0 | |
def size(self) -> int: return 3 * Block.LONG_SIZE | |
def _decode(self, stream: Stream): | |
self.hash_code = read_long(stream) | |
self.data_block = read_long(stream) | |
self.child_index_block = read_long(stream) | |
def __repr__(self): | |
ch = None if self.child_index_block == 2 ** 64 - 1 else self.child_index_block | |
return f"IndexEntry(hash={hex(self.hash_code)}, data_block={hex(self.data_block)}, child_index_block = {ch})" | |
# https://github.com/gradle/gradle/blob/6548746755a464cd473900cb9545d944470deee0/platforms/core-execution/persistent-cache/src/main/java/org/gradle/cache/internal/btree/BTreePersistentIndexedCache.java#L366-L396 | |
@dataclass | |
class BTreeIndexBlock(DecodableBlock): | |
max_child_index_entries: int | |
entries: list[int] = None | |
tail_pos: int = None | |
def type_byte(self) -> int: return 0x77 | |
def inner_size(self) -> int: | |
return Block.INT_SIZE + Block.LONG_SIZE + IndexEntry().size() * self.max_child_index_entries | |
def _inner_decode(self, stream: Stream): | |
count = read_int(stream) | |
self.entries = [] | |
for _ in range(count): | |
self.entries.append(IndexEntry().decode(stream)) | |
self.tail_pos = read_long(stream) | |
# https://github.com/gradle/gradle/blob/6548746755a464cd473900cb9545d944470deee0/platforms/core-execution/persistent-cache/src/main/java/org/gradle/cache/internal/btree/BTreePersistentIndexedCache.java#L657-L707 | |
@dataclass | |
class BTreeDataBlock[V: Decodable](DecodableBlock, Patchable): | |
value: V | |
size_: int = 0 | |
byte_len: int = 0 | |
def type_byte(self) -> int: return 0x33 | |
def inner_size(self): return 2 * Block.INT_SIZE + self.size_ | |
def _inner_decode(self, stream: Stream): | |
self.size_ = read_int(stream) | |
self.byte_len = read_int(stream) | |
# We only model and deserialize values (not keys) because.. that's all | |
# that's actually stored; key values aren't serialized, only hashes: | |
# https://github.com/gradle/gradle/blob/6548746755a464cd473900cb9545d944470deee0/platforms/core-execution/persistent-cache/src/main/java/org/gradle/cache/internal/btree/BTreePersistentIndexedCache.java#L141-L164 | |
# previously: | |
# self.value = stream.read(self.byte_len) | |
pos_before = stream.tell() | |
self.value.decode(stream) | |
pos_after = stream.tell() | |
read = pos_after - pos_before | |
if read != self.byte_len: | |
msg = ( | |
f"expected {type(self.value)} to read {self.byte_len} " | |
f"({hex(self.byte_len)}) bytes ({hex(pos_before)} to " | |
f"{hex(pos_before + self.byte_len)}); actually read {read} " | |
f"({hex(read)}) bytes (up to {hex(pos_after)})" | |
f"\n\nvalue: {self.value}" | |
) | |
if read > self.byte_len: | |
print(ValueError(msg), file=sys.stderr) | |
raise ValueError(msg) # NOTE: choosing to not exit for now.. | |
stream.seek(pos_before + self.byte_len) | |
elif DEBUG: | |
print(f"warning: {msg}", file=sys.stderr) | |
stream.seek(pos_before + self.byte_len) | |
@override | |
def patch(self, writer: BufferedRandom, hint: Any | None = None): | |
if issubclass(type(self.value), Patchable): | |
self.value.patch(writer, hint) | |
@dataclass | |
class BTreePersistentIndexedCache[V: Decodable](Decodable): | |
value_type: type[V] | |
# https://github.com/gradle/gradle/blob/6548746755a464cd473900cb9545d944470deee0/platforms/core-execution/persistent-cache/src/main/java/org/gradle/cache/internal/btree/BTreePersistentIndexedCache.java#L60-L62 | |
max_child_index_entries: int = 512 | |
max_free_list_entries: int = 512 | |
entries: Optional[list[Decodable]] = None | |
def size(self): raise "unknown" | |
def _decode(self, inp: Stream): | |
# technically the cache is always supposed to start with a | |
# `FreeListBlock` but... it's fine if we're overly permissive | |
self.entries = [] | |
dbg = lambda *a, **kw: print(*a, **kw) if DEBUG else None | |
while inp.peek(): | |
byte = peek_byte(inp) | |
dbg(f"[{hex(inp.tell())}] {hex(byte)}", end=": ", file=sys.stderr) | |
if byte == 0x33: | |
x = BTreeDataBlock[V]((self.value_type())).decode(inp) | |
elif byte == 0x44: | |
x = FreeListBlock(self.max_free_list_entries).decode(inp) | |
elif byte == 0x55: | |
x = BTreeHeaderBlock(self.max_child_index_entries).decode(inp) | |
elif byte == 0x77: | |
x = BTreeIndexBlock(self.max_child_index_entries).decode(inp) | |
else: | |
raise f"uh-oh: {hex(byte)}" | |
dbg(x, end="\n\n", file=sys.stderr) | |
self.entries.append(x) | |
################################################################################ | |
################################################################################ | |
# metadata bin files layout information: | |
# - module-artifact.bin: | |
# + https://github.com/gradle/gradle/blob/26101a599b44b7fd49b2db9e0cf3b475058be3a6/platforms/software/dependency-management/src/main/java/org/gradle/api/internal/artifacts/DependencyManagementBuildTreeScopeServices.java#L232-L238 | |
# + https://github.com/gradle/gradle/blob/5bb3182cf38a901dbffbacc0cb9c8efec9f87e9a/platforms/software/dependency-management/src/main/java/org/gradle/api/internal/artifacts/ivyservice/modulecache/artifacts/DefaultModuleArtifactCache.java#L45-L48 | |
# + key: ArtifactAtRepositoryKey | |
# * https://github.com/gradle/gradle/blob/5bb3182cf38a901dbffbacc0cb9c8efec9f87e9a/platforms/software/dependency-management/src/main/java/org/gradle/api/internal/artifacts/ivyservice/modulecache/artifacts/DefaultModuleArtifactCache.java#L84-L103 | |
# + value: CachedArtifact | |
# * https://github.com/gradle/gradle/blob/5bb3182cf38a901dbffbacc0cb9c8efec9f87e9a/platforms/software/dependency-management/src/main/java/org/gradle/api/internal/artifacts/ivyservice/modulecache/artifacts/DefaultModuleArtifactCache.java#L105-L181 | |
# - module-metadata.bin: | |
# + https://github.com/gradle/gradle/blob/5bb3182cf38a901dbffbacc0cb9c8efec9f87e9a/platforms/software/dependency-management/src/main/java/org/gradle/api/internal/artifacts/ivyservice/modulecache/PersistentModuleMetadataCache.java#L67 | |
# + key: RevisionKey | |
# * https://github.com/gradle/gradle/blob/5bb3182cf38a901dbffbacc0cb9c8efec9f87e9a/platforms/software/dependency-management/src/main/java/org/gradle/api/internal/artifacts/ivyservice/modulecache/PersistentModuleMetadataCache.java#L106-L136 | |
# + value: ModuleMetadataCacheEntry | |
# * https://github.com/gradle/gradle/blob/5bb3182cf38a901dbffbacc0cb9c8efec9f87e9a/platforms/software/dependency-management/src/main/java/org/gradle/api/internal/artifacts/ivyservice/modulecache/ModuleMetadataCacheEntrySerializer.java#L23-L55 | |
# - resource-at-url.bin: | |
# + https://github.com/gradle/gradle/blob/26101a599b44b7fd49b2db9e0cf3b475058be3a6/platforms/software/dependency-management/src/main/java/org/gradle/api/internal/artifacts/DependencyManagementBuildTreeScopeServices.java#L126-L134 | |
# + https://github.com/gradle/gradle/blob/5bb3182cf38a901dbffbacc0cb9c8efec9f87e9a/platforms/software/dependency-management/src/main/java/org/gradle/internal/resource/cached/ByUrlCachedExternalResourceIndex.java#L28 | |
# + key: String | |
# * https://github.com/gradle/gradle/blob/5bb3182cf38a901dbffbacc0cb9c8efec9f87e9a/platforms/software/dependency-management/src/main/java/org/gradle/internal/resource/cached/ByUrlCachedExternalResourceIndex.java#L28 | |
# + value: CachedExternalResource | |
# * https://github.com/gradle/gradle/blob/5bb3182cf38a901dbffbacc0cb9c8efec9f87e9a/platforms/software/dependency-management/src/main/java/org/gradle/internal/resource/cached/DefaultCachedExternalResourceIndex.java#L40 | |
# * https://github.com/gradle/gradle/blob/5bb3182cf38a901dbffbacc0cb9c8efec9f87e9a/platforms/software/dependency-management/src/main/java/org/gradle/internal/resource/cached/DefaultCachedExternalResourceIndex.java#L86-L135 | |
# | |
# We only care about modeling the value types; key contents actually aren't | |
# saved, only their hashes: https://github.com/gradle/gradle/blob/6548746755a464cd473900cb9545d944470deee0/platforms/core-execution/persistent-cache/src/main/java/org/gradle/cache/internal/btree/BTreePersistentIndexedCache.java#L141-L164 | |
# https://github.com/gradle/gradle/blob/5bb3182cf38a901dbffbacc0cb9c8efec9f87e9a/platforms/software/dependency-management/src/main/java/org/gradle/api/internal/artifacts/ivyservice/modulecache/artifacts/DefaultModuleArtifactCache.java#L105-L181 | |
@dataclass | |
class CachedArtifact(Decodable, Patchable): | |
is_missing: bool = False | |
cached_at: Optional[Timestamp] = None # timestamp, UNIX epoch milliseconds | |
encoded_hash: Optional[bytes] = None | |
# one of these will be present, depending on `is_missing` | |
artifact_file: Optional[DecodedKryoString] = None # if is_missing == False | |
attempted: Optional[list[DecodedKryoString]] = None # if is_missing == True | |
def size(self): raise "not known ahead of time" | |
def _decode(self, inp: Stream): | |
self.is_missing = read_boolean(inp) | |
self.cached_at = Timestamp().decode(inp) | |
self.encoded_hash = read_kryo_byte_array(inp) | |
if self.is_missing: | |
self.attempted = [] | |
size = read_kryo_varint_positive(inp) | |
for _ in range(size): | |
self.attempted.append(DecodedKryoString().decode(inp)) | |
else: | |
self.artifact_file = DecodedKryoString().decode(inp) | |
@override | |
def patch(self, writer: BufferedRandom, _: Optional[Any] = None): | |
self.cached_at.patch(writer) | |
# https://github.com/gradle/gradle/blob/5bb3182cf38a901dbffbacc0cb9c8efec9f87e9a/platforms/software/dependency-management/src/main/java/org/gradle/api/internal/artifacts/ivyservice/modulecache/ModuleMetadataCacheEntrySerializer.java#L23-L55 | |
@dataclass | |
class ModuleMetadataCacheEntry(Decodable, Patchable): | |
type: int = 0 | |
create_timestamp: Optional[Timestamp] = None | |
is_changing: Optional[bool] = None # only present if type == TYPE_PRESENT | |
# https://github.com/gradle/gradle/blob/5bb3182cf38a901dbffbacc0cb9c8efec9f87e9a/platforms/software/dependency-management/src/main/java/org/gradle/api/internal/artifacts/ivyservice/modulecache/ModuleMetadataCacheEntry.java#L22-L23 | |
TYPE_MISSING: ClassVar[int] = 0 | |
TYPE_PRESENT: ClassVar[int] = 1 | |
def is_missing(self) -> bool: self.type == ModuleMetadataCacheEntry.TYPE_MISSING | |
def is_present(self) -> bool: self.type == ModuleMetadataCacheEntry.TYPE_PRESENT | |
def size(self) -> int: raise "not static" | |
def _decode(self, inp: Stream): | |
self.type = read_byte(inp) | |
match self.type: | |
case ModuleMetadataCacheEntry.TYPE_MISSING: | |
self.create_timestamp = Timestamp().decode(inp) | |
case ModuleMetadataCacheEntry.TYPE_PRESENT: | |
self.is_changing = read_boolean(inp) | |
self.create_timestamp = Timestamp().decode(inp) | |
case other: | |
raise ValueError(f"invalid type: {hex(other)}") | |
@override | |
def patch(self, writer: BufferedRandom, _: Optional[Any] = None): | |
# NOTE: we're adjusting missing entry cache entries to point to a date | |
# in the future... might be problematic if we want to actually attempt | |
# a refetch? | |
self.create_timestamp.patch(writer) | |
@dataclass | |
class ExternalResourceMetadata(Decodable, Patchable): | |
uri: Optional[DecodedKryoString] = None | |
last_modified: Optional[Timestamp] = None | |
content_type: Optional[DecodedKryoString] = None | |
content_length: int = 0 # long | |
etag: Optional[DecodedKryoString] = None | |
sha1: Optional[DecodedKryoString] = None | |
def size(self) -> int: raise "not known before-hand" | |
def _decode(self, inp: Stream): | |
self.uri = DecodedKryoString().decode(inp) | |
has_last_modified = read_boolean(inp) | |
if has_last_modified: | |
self.last_modified = Timestamp().decode(inp) | |
self.content_type = DecodedKryoString().decode(inp) | |
self.content_length = read_kryo_varlong_positive(inp) | |
self.etag = DecodedKryoString().decode(inp) | |
if read_boolean(inp): | |
self.sha1 = DecodedKryoString().decode(inp) | |
@override | |
def patch(self, writer: BufferedRandom, _: Optional[Any] = None): | |
# TODO: I think we may be able to leave these timestamps as is? they're | |
# provided by the repository IIUC | |
if self.last_modified: | |
self.last_modified.patch(writer) | |
# https://github.com/gradle/gradle/blob/5bb3182cf38a901dbffbacc0cb9c8efec9f87e9a/platforms/software/dependency-management/src/main/java/org/gradle/internal/resource/cached/DefaultCachedExternalResourceIndex.java#L86-L135 | |
@dataclass | |
class CachedExternalResource(Decodable, Patchable): | |
cached_file: Optional[DecodedKryoString] = None | |
cached_at: Optional[Timestamp] = None | |
metadata: Optional[ExternalResourceMetadata] = None | |
def size(self) -> int: raise "unknown" | |
def _decode(self, inp: Stream): | |
has_cached_file = read_boolean(inp) | |
if has_cached_file: | |
self.cached_file = DecodedKryoString().decode(inp) | |
self.cached_at = Timestamp().decode(inp) | |
has_metadata = read_boolean(inp) | |
if has_metadata: | |
self.metadata = ExternalResourceMetadata().decode(inp) | |
@override | |
def patch(self, writer: BufferedRandom, _: Optional[Any] = None): | |
self.cached_at.patch(writer) | |
if self.metadata: self.metadata.patch(writer) | |
################################################################################ | |
################################################################################ | |
def patch(metadata_file: BufferedRandom, decoded: BTreePersistentIndexedCache): | |
for ent in decoded.entries: | |
if issubclass(type(ent), Patchable): | |
ent.patch(metadata_file) | |
################################################################################ | |
################################################################################ | |
def process_metadata_files_in_dir(dir: Path | str, do_patch: bool = True) -> int: | |
metadata_dirs = [ | |
d for d in Path(dir).iterdir() | |
if d.name.startswith("metadata-") | |
if d.is_dir() | |
] | |
if not metadata_dirs: | |
print(f"No metadata directories found under {dir}; skipping..", file=sys.stderr) | |
return 0 | |
for m in metadata_dirs: | |
version = m.name.removeprefix("metadata-") | |
if not version.startswith("2."): | |
print( | |
f"Error processing metadata dir {m}: don't know how to handle " | |
f"metadata version: {version}; expect metadata versions to " | |
f"start with '2.'", | |
file=sys.stderr, | |
) | |
return 1 | |
version = version.removeprefix("2.") | |
if not version in SUPPORTED_METADATA_LAYOUT_VERSIONS: | |
print( | |
f"Error processing metadata dir {m}: metadata version " | |
f"{version} is unsupported. We only know how to handle these " | |
f"metadata versions: " | |
f"{SUPPORTED_METADATA_LAYOUT_VERSIONS.keys()}", | |
file=sys.stderr, | |
) | |
return 2 | |
for file in sorted(m.iterdir()): | |
if not file.is_file(): continue | |
print(f"Processing '{file}'...", file=sys.stderr) | |
decoded = None | |
match file.name: | |
case "module-artifact.bin": | |
decoded = BTreePersistentIndexedCache(CachedArtifact) | |
case "module-metadata.bin": | |
decoded = BTreePersistentIndexedCache(ModuleMetadataCacheEntry) | |
case "resource-at-url.bin": | |
decoded = BTreePersistentIndexedCache(CachedExternalResource) | |
case _: | |
print( | |
f"Warning: unexpected file in metadata dir: {file}; " | |
f"ignoring...", | |
file=sys.stderr | |
) | |
continue | |
decoded.decode(open(file, "rb")) | |
if do_patch: | |
with open(file, "rb+") as w: | |
patch(w, decoded) | |
return 0 | |
def process(dirs: list[str], patch: bool): | |
for dir in dirs: | |
if (ret := process_metadata_files_in_dir(dir, patch)) != 0: | |
sys.exit(ret) | |
if __name__ == "__main__": | |
match sys.argv[1:]: | |
case ["--dump", *dirs]: | |
DEBUG = True | |
process(dirs, False) | |
case ["--patch", *dirs]: process(dirs, True) | |
case _: | |
print("usage: [--dump | --patch] [... module metadata directories]") | |
sys.exit(1) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment