GoogleContainerTools / rules_distroless

Apache License 2.0
31 stars 15 forks source link

Supporting non standard package sources #56

Open ericlchen1 opened 2 weeks ago

ericlchen1 commented 2 weeks ago

Hey, I ran into some issues when trying to use the nvidia repo as a source url https://developer.download.nvidia.com/compute/cuda/repos/debian12/x86_64. This is due to the way urls are parsed in rules_distroless and are used to find the Packages file. Also there doesn't seem to be much support when the packages are not multiarch. In this case, the nvidia packages only support amd64 or i386.

Currently I've put together a pretty nonstandard solution with a patch to work around this which supports my use case. However, I was wondering if this would be supported in the future. Thanks!

rules_distroless.patch

diff --git a/apt/private/package_index.bzl b/apt/private/package_index.bzl
index 620a021..c441d4d 100644
--- a/apt/private/package_index.bzl
+++ b/apt/private/package_index.bzl
@@ -2,19 +2,24 @@

 load(":util.bzl", "util")

-def _fetch_package_index(rctx, url, dist, comp, arch, integrity):
+def _fetch_package_index(rctx, url, dist, comp, arch, integrity, override_url = None):
     target_triple = "{dist}/{comp}/{arch}".format(dist = dist, comp = comp, arch = arch)
     output = "{}/Packages.xz".format(target_triple)
+    url = "{}/dists/{}/{}/binary-{}/Packages.xz".format(url, dist, comp, arch)
+    if override_url:
+        url = override_url
+        output = "{}/Packages".format(target_triple)
     r = rctx.download(
-        url = "{}/dists/{}/{}/binary-{}/Packages.xz".format(url, dist, comp, arch),
+        url = url,
         output = output,
         integrity = integrity,
     )
-    rctx.execute([
-        "xz",
-        "--decompress",
-        output,
-    ])
+    if not override_url:
+        rctx.execute([
+            "xz",
+            "--decompress",
+            output,
+        ])
     return ("{}/Packages".format(target_triple), r.integrity)

 def _parse_package_index(state, contents, arch, root):
@@ -59,9 +64,11 @@ def _create(rctx, sources, archs):
     )

     for arch in archs:
-        for (url, dist, comp) in sources:
+        for (url, dist, comp, override_url, override_arch) in sources:
+            if override_arch and override_arch != arch:
+                continue
             rctx.report_progress("Fetching package index: {}/{}".format(dist, arch))
-            (output, _) = _fetch_package_index(rctx, url, dist, comp, arch, "")
+            (output, _) = _fetch_package_index(rctx, url, dist, comp, arch, "", override_url)

             # TODO: this is expensive to perform.
             rctx.report_progress("Parsing package index: {}/{}".format(dist, arch))
diff --git a/apt/private/resolve.bzl b/apt/private/resolve.bzl
index ea443d2..b14b4ff 100644
--- a/apt/private/resolve.bzl
+++ b/apt/private/resolve.bzl
@@ -66,9 +66,11 @@ def _deb_resolve_impl(rctx):
         if not dist:
             dist = distr
         sources.append((
-            src["url"],
+            src.get("url"),
             distr,
             comp,
+            src.get("override_url"),
+            src.get("override_arch"),
         ))

     pkgindex = package_index.new(rctx, sources = sources, archs = manifest["archs"])
@@ -76,7 +78,11 @@ def _deb_resolve_impl(rctx):
     lockf = lockfile.empty(rctx)

     for arch in manifest["archs"]:
-        for dep_constraint in manifest["packages"]:
+        packages = manifest["packages"]
+        arch_packages = manifest.get("arch_packages", {})
+        if arch in arch_packages:
+            packages = packages + arch_packages[arch]
+        for dep_constraint in packages:
             constraint = package_resolution.parse_depends(dep_constraint).pop()

             rctx.report_progress("Resolving %s" % dep_constraint)

diff --git a/apt/private/package_index.bzl b/apt/private/package_index.bzl
index c441d4d..c344442 100644
--- a/apt/private/package_index.bzl
+++ b/apt/private/package_index.bzl
@@ -42,6 +42,11 @@ def _parse_package_index(state, contents, arch, root):

         if len(pkg.keys()) != 0:
             pkg["Root"] = root
+            if "Filename" in pkg:
+                pkg["Filename"] = pkg["Filename"].strip("./")
+            if "Architecture" in pkg and pkg["Architecture"] != "all" and pkg["Architecture"] != arch:
+                pkg = {}
+                continue
             util.set_dict(state.packages, value = pkg, keys = (arch, pkg["Package"], pkg["Version"]))
             last_key = ""
             pkg = {}
@@ -72,7 +74,7 @@ def _create(rctx, sources, archs):

             # TODO: this is expensive to perform.
             rctx.report_progress("Parsing package index: {}/{}".format(dist, arch))
-            _parse_package_index(state, rctx.read(output), arch, url)
+            _parse_package_index(state, rctx.read(output), arch, override_url.split("/Packages")[0] if override_url else url)

     return struct(
         package_versions = lambda **kwargs: _package_versions(state, **kwargs),

debian12.yaml

version: 1
sources:
- channel: bookworm main
  url: https://snapshot-cloudflare.debian.org/archive/debian/20240612T080215Z
- channel: bookworm-security main
  url: https://snapshot-cloudflare.debian.org/archive/debian-security/20240612T080215Z
- channel: bookworm-updates main
  url: https://snapshot-cloudflare.debian.org/archive/debian/20240612T080215Z
- channel: nvidia main
  override_url: https://developer.download.nvidia.com/compute/cuda/repos/debian12/x86_64/Packages
  override_arch: amd64
archs:
- amd64
- arm64
...
bunch of packages here
...
arch_packages:
  amd64:
  - libcuda1
ericlchen1 commented 2 weeks ago

Seems to be related to https://github.com/GoogleContainerTools/rules_distroless/pull/55 but the question of how architecture is handled is still there.