bazelbuild / bazel-buildfarm

Bazel remote caching and execution service
https://bazel.build
Apache License 2.0
635 stars 199 forks source link

Updated from java17 to java21 to use the new server and worker causes JVM to shutdown #1777

Closed Sowmya-Iyer closed 2 weeks ago

Sowmya-Iyer commented 2 weeks ago
pkg_tar(
    name= "x86_64_worker_tars",
    srcs=[
             "@build_buildfarm//src/main/java/build/buildfarm:buildfarm-shard-worker_deploy.jar",
             "debug.logging.properties"
    ],
    deps=[":x86_64_worker_config"],
    package_dir = "app",
    testonly = False,
    visibility = ["//visibility:public"]
)

oci_image(
    name = "x86_64_worker_container",
    base = "@ard_x86_64_base", // base image with java11
   entrypoint = ["/usr/bin/java",
        "-jar",
        "/app/buildfarm-shard-worker_deploy.jar"],
    workdir ="/app",
    exposed_ports = ["8980","8981"],
    cmd = [
            "/app/config/worker.yaml" 
    ],
    tars = [
        ":x86_64_worker_tars"
    ],
    visibility = ["//visibility:public"],
)

oci_tarball(
      name=  "x86_64_worker",
      image= "x86_64_worker_container",
      repo_tags = ["x86_64_worker:latest"]
)

Ran:

docker run --network=host x86_64_worker:latest 

Jun 18, 2024 5:46:14 PM build.buildfarm.common.config.BuildfarmConfigs loadConfigs INFO: BuildfarmConfigs(digestFunction=SHA256, defaultActionTimeout=600, maximumActionTimeout=3600, maxEntrySizeBytes=2147483648, prometheusPort=9090, allowSymlinkTargetAbsolute=false, server=Server(instanceType=SHARD, name=shard, actionCacheReadOnly=false, bindAddress=, port=8980, grpcMetrics=GrpcMetrics(enabled=false, provideLatencyHistograms=false, latencyBuckets=null, labelsToReport=null), casWriteTimeout=3600, bytestreamTimeout=3600, sslCertificatePath=null, sslPrivateKeyPath=null, runDispatchedMonitor=true, dispatchedMonitorIntervalSeconds=1, runFailsafeOperation=true, runOperationQueuer=true, ensureOutputsPresent=false, maxRequeueAttempts=5, useDenyList=true, grpcTimeout=3600, executeKeepaliveAfterSeconds=60, recordBesEvents=false, admin=Admin(deploymentEnvironment=null, clusterEndpoint=null, enableGracefulShutdown=false), metrics=Metrics(publisher=LOG, logLevel=FINEST, topic=null, topicMaxConnections=0, secretName=null), maxCpu=0, clusterId=, cloudRegion=null, publicName=null, maxInboundMessageSizeBytes=0, maxInboundMetadataSize=0, caches=ServerCacheConfigs(directoryCacheMaxEntries=65536, commandCacheMaxEntries=65536, digestToActionCacheMaxEntries=65536, recentServedExecutionsCacheMaxEntries=65536), findMissingBlobsViaBackplane=false, gracefulShutdownSeconds=0), backplane=Backplane(type=SHARD, redisUri=redis://localhost:6379, jedisPoolMaxTotal=4000, workersHashName=Workers, workerChannel=WorkerChannel, actionCachePrefix=ActionCache, actionCacheExpire=2419200, actionBlacklistPrefix=ActionBlacklist, actionBlacklistExpire=3600, invocationBlacklistPrefix=InvocationBlacklist, operationPrefix=Operation, operationExpire=604800, preQueuedOperationsListName={Arrival}:PreQueuedOperations, processingListName={Arrival}:ProcessingOperations, processingPrefix=Processing, processingTimeoutMillis=20000, queuedOperationsListName={Execution}:QueuedOperations, dispatchingPrefix=Dispatching, dispatchingTimeoutMillis=10000, dispatchedOperationsHashName=DispatchedOperations, operationChannelPrefix=OperationChannel, casPrefix=ContentAddressableStorage, casExpire=604800, maxInvocationIdTimeout=604800, subscribeToBackplane=true, runFailsafeOperation=true, maxQueueDepth=100000, maxPreQueueDepth=1000000, priorityQueue=false, queues=[Queue(name=x86_64, allowUnmatched=true, properties=[Property(name=host, value=x86_64), Property(name=target, value=x86_64)]), Queue(name=aarch64_native, allowUnmatched=true, properties=[Property(name=host, value=aarch64), Property(name=target, value=aarch64)]), Queue(name=aarch64_cross, allowUnmatched=true, properties=[Property(name=host, value=x86_64), Property(name=target, value=aarch64)]), Queue(name=aarch64_dhu, allowUnmatched=true, properties=[Property(name=host, value=x86_64), Property(name=target, value=dhu)])], redisCredentialFile=null, redisUsername=null, redisCertificateAuthorityFile=null, timeout=10000, redisNodes=[], maxAttempts=20, cacheCas=false, priorityPollIntervalMillis=100, resources=[]), worker=Worker(port=8981, grpcMetrics=GrpcMetrics(enabled=false, provideLatencyHistograms=false, latencyBuckets=null, labelsToReport=null), publicName=localhost:8981, capabilities=Capabilities(cas=true, execution=true), root=/tmp/worker, inlineContentLimit=1048567, operationPollPeriod=1, dequeueMatchSettings=DequeueMatchSettings(acceptEverything=false, allowUnmatched=true, properties=[Property(name=host, value=x86_64), Property(name=target, value=x86_64)]), storages=[Cas(type=FILESYSTEM, path=cache, hexBucketLevels=0, maxSizeBytes=0, fileDirectoriesIndexInMemory=false, skipLoad=false, execRootCopyFallback=false, target=null, readonly=false, publishTtlMetric=false)], executeStageWidth=0, executeStageWidthOffset=0, inputFetchStageWidth=0, inputFetchDeadline=60, linkExecFileSystem=true, linkInputDirectories=true, linkedInputDirectories=[(?!external)[^/]+], execOwner=null, defaultMaxCores=0, limitGlobalExecution=false, onlyMulticoreTests=false, allowBringYourOwnContainer=false, errorOperationRemainingResources=false, gracefulShutdownSeconds=0, executionPolicies=[], sandboxSettings=SandboxSettings(alwaysUseSandbox=false, alwaysUseAsNobody=false, alwaysUseCgroups=true, alwaysUseTmpFs=false, additionalWritePaths=[], tmpFsPaths=[], selectForBlockNetwork=false, selectForTmpFs=false), createSymlinkOutputs=false, zstdBufferPoolSize=2048, resources=[], errorOperationOutputSizeExceeded=false), executionWrappers=ExecutionWrappers(cgroups=/usr/bin/cgexec, unshare=/usr/bin/unshare, linuxSandbox=/app/build_buildfarm/linux-sandbox, asNobody=/app/build_buildfarm/as-nobody, processWrapper=/app/build_buildfarm/process-wrapper, skipSleep=/app/build_buildfarm/skip_sleep, skipSleepPreload=/app/build_buildfarm/skip_sleep_preload.so, delay=/app/build_buildfarm/delay.sh)) Jun 18, 2024 5:46:14 PM build.buildfarm.common.config.BuildfarmConfigs deriveCasStorage INFO: CAS size changed to 1770585794150 Jun 18, 2024 5:46:14 PM build.buildfarm.common.config.BuildfarmConfigs adjustExecuteStageWidth INFO: executeStageWidth modified to 16 Jun 18, 2024 5:46:14 PM build.buildfarm.common.config.BuildfarmConfigs adjustInputFetchStageWidth INFO: executeInputFetchWidth modified to 3 Jun 18, 2024 5:46:14 PM build.buildfarm.common.config.BuildfarmConfigs lambda$checkExecutionWrapperAvailability$0 WARNING: the execution wrapper /app/build_buildfarm/skip_sleep is missing and therefore the following features will not be available: skip-sleep, time-shift Jun 18, 2024 5:46:14 PM build.buildfarm.common.config.BuildfarmConfigs lambda$checkExecutionWrapperAvailability$0 WARNING: the execution wrapper /app/build_buildfarm/skip_sleep_preload.so is missing and therefore the following features will not be available: skip-sleep, time-shift Jun 18, 2024 5:46:14 PM build.buildfarm.common.config.BuildfarmConfigs lambda$checkExecutionWrapperAvailability$0 WARNING: the execution wrapper /app/build_buildfarm/delay.sh is missing and therefore the following features will not be available: skip-sleep, time-shift Jun 18, 2024 5:46:14 PM build.buildfarm.common.config.BuildfarmConfigs lambda$checkExecutionWrapperAvailability$0 WARNING: the execution wrapper /app/build_buildfarm/linux-sandbox is missing and therefore the following features will not be available: linux-sandbox, block-network, tmpfs Jun 18, 2024 5:46:14 PM build.buildfarm.common.config.BuildfarmConfigs lambda$checkExecutionWrapperAvailability$0 WARNING: the execution wrapper /app/build_buildfarm/as-nobody is missing and therefore the following features will not be available: as-nobody Jun 18, 2024 5:46:14 PM build.buildfarm.common.config.BuildfarmConfigs lambda$checkExecutionWrapperAvailability$0 WARNING: the execution wrapper /app/build_buildfarm/process-wrapper is missing and therefore the following features will not be available: process-wrapper Jun 18, 2024 5:46:14 PM build.buildfarm.common.config.BuildfarmConfigs lambda$checkExecutionWrapperAvailability$0 WARNING: the execution wrapper /usr/bin/cgexec is missing and therefore the following features will not be available: limit_execution, cores, min-cores, max-cores, min-mem, max-mem Jun 18, 2024 5:46:14 PM build.buildfarm.worker.shard.Worker main SEVERE: io.grpc.StatusRuntimeException: UNKNOWN Jun 18, 2024 5:46:14 PM build.buildfarm.worker.shard.Worker shutdown INFO: shutting down gRPC server since JVM is shutting down Jun 18, 2024 5:46:14 PM build.buildfarm.worker.shard.Worker prepareWorkerForGracefulShutdown INFO: Graceful Shutdown is not enabled. Worker is shutting down without finishing executions in progress. Jun 18, 2024 5:46:14 PM build.buildfarm.worker.shard.Worker shutdown INFO: server shut down

This worked fine with the java17 base and buildfarm worker JAR files. (Server image with java21-debian12 works fine.)

Dependency difference:

bazel_dep(name = "aspect_bazel_lib", version = "2.7.7")
bazel_dep(name = "rules_oci", version = "1.7.6")
git_override(
    module_name="rules_oci",
    remote="https://github.com/bazel-contrib/rules_oci.git",
    commit="8ac0e84f56c68f53c688108e9ad4a891da868be7"
)

git_override(
    module_name="build_buildfarm",
    remote="https://github.com/bazelbuild/bazel-buildfarm.git",
    commit='02602ce08dd2fc3dc84da2901d99b5f6e357449f'
)

bazel_dep(name = "remoteapis", version = "eb433accc6a666b782ea4b787eb598e5c3d27c93")
archive_override(
    module_name = "remoteapis",
    integrity = "sha256-68wzxNAkPZ49/zFwPYQ5z9MYbgxoeIEazKJ24+4YqIQ=",
    strip_prefix = "remote-apis-eb433accc6a666b782ea4b787eb598e5c3d27c93",
    urls = [
        "https://github.com/bazelbuild/remote-apis/archive/eb433accc6a666b782ea4b787eb598e5c3d27c93.zip",
    ],
)

bazel_dep(name = "googleapis", version = "0.0.0-20240326-1c8d509c5", repo_name = "com_google_googleapis")
bazel_dep(name = "grpc-java", version = "1.62.2")

googleapis_switched_rules = use_extension("@com_google_googleapis//:extensions.bzl", "switched_rules")

googleapis_switched_rules.use_languages(
    grpc = True,
    java = True,
)
use_repo(googleapis_switched_rules, "com_google_googleapis_imports")

bazel.rc:

common --registry=file:///%workspace%/registry
common --registry=https://bcr.bazel.build

common --java_language_version=21
common --java_runtime_version=remotejdk_21

common --tool_java_language_version=21
common --tool_java_runtime_version=remotejdk_21

common --enable_platform_specific_config

build:fuse --define=fuse=true

# Workaround for https://github.com/bazelbuild/bazel/issues/3236
build:linux --sandbox_tmpfs_path=/tmp

build --nojava_header_compilation

test --nojava_header_compilation
test --enable_runfiles
test --test_tag_filters=-redis,-integration
test --test_output=all
test --test_output=streamed 
# Ensure buildfarm is compatible with future versions of bazel.
# https://buildkite.com/bazel/bazelisk-plus-incompatible-flags
common --incompatible_disallow_empty_glob
# common --repo_env=OCI_ENABLE_OAUTH2_SUPPORT=1
common --repo_env=OCI_ENABLE_OAUTH2_SUPPORT

common --enable_bzlmod
# See also https://bazel.build/external/lockfile.
# common --lockfile_mode=off
# It's off because we have mac/windows/linux developers who may not have access
# to all three to update the platform-specific bits of the lockfile.