Open mkditto opened 2 months ago
I was able to accomplish 90% of what's needed here, though not without some jank. I was able to generate the embedcfg with the following setup:
//builddefs/go:embeds.bzl
def _embedcfg_generator():
return attrs.default_only(
attrs.exec_dep(default = "//builddefs/go:embeds"),
)
def go_embedcfg_impl(ctx: AnalysisContext) -> list[Provider]:
embedcfg = ctx.actions.declare_output("embedcfg.json")
srcs = {src.short_path: src for src in ctx.attrs.srcs}
srcs.update({k: v for v, k in ctx.attrs.mapped_srcs.items()})
ctx.actions.run(
cmd_args([
ctx.attrs._embedcfg_generator[RunInfo],
"--srcdir", ctx.actions.symlinked_dir("__srcs", srcs),
"--patterns", ",".join(ctx.attrs.patterns),
embedcfg.as_output(),
]),
category = "embeds",
)
return [DefaultInfo(default_output = embedcfg)]
go_embedcfg = rule(
impl = go_embedcfg_impl,
attrs = dict(
patterns = attrs.list(attrs.string(), default = []),
srcs = attrs.list(attrs.source(), default = []),
mapped_srcs = attrs.dict(
key = attrs.source(),
value = attrs.string(),
default = {},
),
_embedcfg_generator = _embedcfg_generator(),
),
)
//builddefs/go:embeds
package main
import (
"encoding/json"
"flag"
"fmt"
"log"
"os"
"path/filepath"
"strings"
)
var (
srcDir = flag.String("srcdir", "", "the source directory for all embeds")
patterns = flag.String("patterns", "", "the patterns to match against for embeds")
)
// EmbedCfg represents embed patterns to absolute file paths in the codebase.
// This is required to embed files or directories into go binaries at compile
// time.
type EmbedCfg struct {
// Patterns maps a pattern to a list of existing relative paths that
// match the pattern.
Patterns map[string][]string `json:"Patterns"`
// Files maps a relative path to a fully qualified path, following symlinks.
Files map[string]string `json:"Files"`
}
// NewEmbedCfg creates a new EmbedCfg with initialized zero values.
func NewEmbedCfg() *EmbedCfg {
return &EmbedCfg{
Patterns: make(map[string][]string),
Files: make(map[string]string),
}
}
// Push a pattern to the list of managed patterns in the embedcfg.
func (e *EmbedCfg) Push(directory, pattern string) error {
globs, err := filepath.Glob(fmt.Sprintf("%s/%s", directory, pattern))
if err != nil {
return err
}
matches := make([]string, 0)
files := make(map[string]string)
for _, glob := range globs {
strippedMatch, ok := strings.CutPrefix(glob, directory+"/")
if !ok {
return fmt.Errorf("failed to cut prefix %q from path %q", directory, glob)
}
matches = append(matches, strippedMatch)
dest, err := os.Readlink(glob)
if err != nil {
return fmt.Errorf("failed to read link for glob match %q: %w", glob, err)
}
var fullPath string
if fullPath, err = filepath.Abs(
filepath.Clean(
fmt.Sprintf("%s/%s", filepath.Dir(glob), dest),
),
); err != nil {
return fmt.Errorf("failed to get absolute path: %w", err)
}
files[strippedMatch] = fullPath
}
e.Patterns[pattern] = matches
for key, value := range files {
e.Files[key] = value
}
return nil
}
// Write the EmbedCfg to the given output path.
func (e *EmbedCfg) Write(path string) error {
output, err := os.Create(path)
if err != nil {
return fmt.Errorf("failed to create output: %w", err)
}
defer output.Close()
bytes, err := json.MarshalIndent(e, "", " ")
if err != nil {
return fmt.Errorf("failed to marshal embedcfg to json: %w", err)
}
if _, err := output.Write(bytes); err != nil {
return fmt.Errorf("failed to write to output: %w", err)
}
return nil
}
func main() {
flag.Parse()
exec, err := os.Executable()
if err != nil {
log.Fatal(err)
}
log.Printf("executable path: %s", filepath.Dir(exec))
cwd, err := os.Getwd()
if err != nil {
log.Fatal(err)
}
log.Printf("working directory: %s", cwd)
if flag.NArg() != 1 {
log.Fatal("output expected, none received")
}
entries, err := os.ReadDir(*srcDir)
if err != nil {
log.Fatalf("failed to read source directory: %v", err)
}
embedcfg := NewEmbedCfg()
for _, pattern := range strings.Split(*patterns, ",") {
if err := embedcfg.Push(*srcDir, pattern); err != nil {
log.Fatal(err)
}
}
output := flag.Arg(0)
if err := embedcfg.Write(output); err != nil {
log.Fatalf("failed to write %q: %v", output, err)
}
}
The following rule inside of a BUCK
file ...
load("//builddefs/go:embeds.bzl", "go_embedcfg")
go_embedcfg(
name = "embedcfg",
srcs = [".version"],
patterns = [".version"],
)
... generates the following embedcfg.json
artifact.
{
"Patterns": {
".version": [
".version"
]
},
"Files": {
".version": "/home/mkditto/Git/htmx-go/vendor/github.com/a-h/templ/.version"
}
}
It seems to work just fine for my local builds (even with mapped_srcs
, though that's not included in my example above), but I'm not sure whether this approach will work with proper build isolation with RBE since I'm doing a bunch of link following and resolving some absolute paths inside the little tool I wrote. Ideally I'd like to expose the sources described in the embedcfg as a target that could be included in the srcs
of a go_library
or go_binary
, but this was just something I threw together.
I wonder if some other people more familiar with writing rules and build isolation had any suggestions for improvement, it'd be much appreciated! If something like this could be integrated with the prelude, I'd be happy to fix this up and put up a PR for that.
@mkditto This is good! I did a very similar thing but using regex matching to execute the patterns in starlark instead of running a helper binary, which was a good 90% solution. This was for a slightly different use case, which was generating a BUCK file from go list -json
output. go list -json
gives you EmbedPatterns
and EmbedFiles
but, somewhat unhelpfully, doesn't actually do the glob matching, leaving us to reverse engineer what it does. Very hacky script but I got it to buckify a pretty large dependency tree.
Some next steps:
go_library
rule will be able to see. The only way to be 100% sure you've done this right is to configure remote execution and build with that.go_embedcfg
and the go_library
that uses it, I think this should actually be part of the go_library
/ go_binary
rules as an alternative to supplying an embedcfg file, rather than a separate rule.
go_library(embed_patterns = [ "*.xml" ], embed_files = glob(["*.xml"]))
would be ideal usage I think.
go list -json
output, except you have the exact list of EmbedFiles to work with.srcs
list. What if you want to embed a .go file! Can't be done today, because buck2 will try to compile it.package_root
arg to go_library
. You don't want parts of the github.com/golang/...
URL to be matched in the glob.all:images/jpegs
that need special treatment.go_bootstrap_binary
tool targets in it for this kind of thing these days. So this would be perfectly fine I think.buck2
repo even for the prelude.
I've been experimenting with building go with
buck2
, and up to this point I've had a pretty good experience. The one drawback I've had so far is using to goembed
package. Right now when building go with buck I have to manually create theembedcfg
for my embedded resources, and this comes with a few drawbacks:The documentation on the format of the embedcfg can be hard to find, so it takes a good bit of time to write one the first time. Since this is something that you don't have to do with standard go tooling, it's not something most regular go devs are familiar with doing.
The embedcfg needs to be a json source which you can either get with a genrule or by writing it to your repo directly.
If you choose to write a
genrule
, you have to write it for each library that requires embeds. Optionally, you can make this easier by writing your own macro wrapping thegenrule
, but now you have to maintain your own macro for something that feels like it should be supported by the native rules.If you choose to write the json sources to your repo directly, you'll pretty quickly run into issues because the embedcfg requires the absolute paths of the embedded sources, which is obviously not portable.
Abstracting away the generation of the embedcfg inside the standard
go_*
rules would be great and would save devs some effort learning how to write proper embedcfgs and maintaining their owngenrules
to generate them. This is fairly similar to what bazel already does with theembedsrcs
attribute on its go rules, and would probably look very much the same:Another option would be to add a macro to the prelude called
go_embedcfg
that can generate the embedcfg for you, which you can then depend on with the current ruleset. It would maybe be little bit more verbose than the above option, but would also work. Ergonomically I'd imagine it would look something like:(I think that by adding a
filegroup
to the second example it's no longer equivalent to the first, but I think it still demonstrates the point)