huggingface / dataset-viewer

Backend that powers the dataset viewer on Hugging Face dataset pages through a public API.
https://huggingface.co/docs/dataset-viewer
Apache License 2.0
688 stars 76 forks source link

Refine blocked datasets for open llm leaderboard #2869

Closed lhoestq closed 4 months ago

github-actions[bot] commented 4 months ago

ArgoCD Diff for commit 85a10b5

Updated at 5/29/2024, 4:24:57 PM CEST

App: datasets-server-prod YAML generation: Success 🟢 App sync status: Out of Sync ⚠️

```diff ===== apps/Deployment datasets-server/prod-datasets-server-admin ====== --- /tmp/argocd-diff1973429398/prod-datasets-server-admin-live.yaml 2024-05-29 14:24:55.208432270 +0000 +++ /tmp/argocd-diff1973429398/prod-datasets-server-admin 2024-05-29 14:24:55.208432270 +0000 @@ -408,7 +408,7 @@ name: datasets-server-prod-secrets optional: false - name: COMMON_BLOCKED_DATASETS - value: huggingface-leaderboard/*,open-llm-leaderboard/*,lunaluan/*,atom-in-the-universe/*,cot-leaderboard/cot-eval-traces,mitermix/yt-links,mcding-org/* + value: open-llm-leaderboard/details_*,lunaluan/*,atom-in-the-universe/*,cot-leaderboard/cot-eval-traces,mitermix/yt-links,mcding-org/* - name: COMMON_DATASET_SCRIPTS_ALLOW_LIST value: '{{ALL_DATASETS_WITH_NO_NAMESPACE}},hf-internal-testing/dataset_with_script,togethercomputer/RedPajama-Data-1T,togethercomputer/RedPajama-Data-V2,gaia-benchmark/GAIA,poloclub/diffusiondb,mozilla-foundation/common_voice_*,google/fleurs,speechcolab/gigaspeech,espnet/yodas' - name: COMMON_HF_ENDPOINT @@ -457,7 +457,7 @@ value: "9" - name: ADMIN_UVICORN_PORT value: "8080" - image: huggingface/datasets-server-services-admin:sha-3294b8d + image: huggingface/datasets-server-services-admin:sha-20f9c87 imagePullPolicy: IfNotPresent livenessProbe: failureThreshold: 30 ===== apps/Deployment datasets-server/prod-datasets-server-api ====== --- /tmp/argocd-diff1320773827/prod-datasets-server-api-live.yaml 2024-05-29 14:24:55.232432424 +0000 +++ /tmp/argocd-diff1320773827/prod-datasets-server-api 2024-05-29 14:24:55.228432398 +0000 @@ -409,7 +409,7 @@ name: datasets-server-prod-secrets optional: false - name: COMMON_BLOCKED_DATASETS - value: huggingface-leaderboard/*,open-llm-leaderboard/*,lunaluan/*,atom-in-the-universe/*,cot-leaderboard/cot-eval-traces,mitermix/yt-links,mcding-org/* + value: open-llm-leaderboard/details_*,lunaluan/*,atom-in-the-universe/*,cot-leaderboard/cot-eval-traces,mitermix/yt-links,mcding-org/* - name: COMMON_DATASET_SCRIPTS_ALLOW_LIST value: '{{ALL_DATASETS_WITH_NO_NAMESPACE}},hf-internal-testing/dataset_with_script,togethercomputer/RedPajama-Data-1T,togethercomputer/RedPajama-Data-V2,gaia-benchmark/GAIA,poloclub/diffusiondb,mozilla-foundation/common_voice_*,google/fleurs,speechcolab/gigaspeech,espnet/yodas' - name: COMMON_HF_ENDPOINT @@ -470,7 +470,7 @@ value: "9" - name: API_UVICORN_PORT value: "8080" - image: huggingface/datasets-server-services-api:sha-3294b8d + image: huggingface/datasets-server-services-api:sha-20f9c87 imagePullPolicy: IfNotPresent livenessProbe: failureThreshold: 30 ===== apps/Deployment datasets-server/prod-datasets-server-rows ====== --- /tmp/argocd-diff3673508185/prod-datasets-server-rows-live.yaml 2024-05-29 14:24:55.248432527 +0000 +++ /tmp/argocd-diff3673508185/prod-datasets-server-rows 2024-05-29 14:24:55.248432527 +0000 @@ -451,7 +451,7 @@ name: datasets-server-prod-secrets optional: false - name: COMMON_BLOCKED_DATASETS - value: huggingface-leaderboard/*,open-llm-leaderboard/*,lunaluan/*,atom-in-the-universe/*,cot-leaderboard/cot-eval-traces,mitermix/yt-links,mcding-org/* + value: open-llm-leaderboard/details_*,lunaluan/*,atom-in-the-universe/*,cot-leaderboard/cot-eval-traces,mitermix/yt-links,mcding-org/* - name: COMMON_DATASET_SCRIPTS_ALLOW_LIST value: '{{ALL_DATASETS_WITH_NO_NAMESPACE}},hf-internal-testing/dataset_with_script,togethercomputer/RedPajama-Data-1T,togethercomputer/RedPajama-Data-V2,gaia-benchmark/GAIA,poloclub/diffusiondb,mozilla-foundation/common_voice_*,google/fleurs,speechcolab/gigaspeech,espnet/yodas' - name: COMMON_HF_ENDPOINT @@ -502,7 +502,7 @@ value: "8080" - name: ROWS_INDEX_MAX_ARROW_DATA_IN_MEMORY value: "300_000_000" - image: huggingface/datasets-server-services-rows:sha-3294b8d + image: huggingface/datasets-server-services-rows:sha-20f9c87 imagePullPolicy: IfNotPresent livenessProbe: failureThreshold: 30 ===== apps/Deployment datasets-server/prod-datasets-server-search ====== --- /tmp/argocd-diff1709289221/prod-datasets-server-search-live.yaml 2024-05-29 14:24:55.272432681 +0000 +++ /tmp/argocd-diff1709289221/prod-datasets-server-search 2024-05-29 14:24:55.268432656 +0000 @@ -419,7 +419,7 @@ name: datasets-server-prod-secrets optional: false - name: COMMON_BLOCKED_DATASETS - value: huggingface-leaderboard/*,open-llm-leaderboard/*,lunaluan/*,atom-in-the-universe/*,cot-leaderboard/cot-eval-traces,mitermix/yt-links,mcding-org/* + value: open-llm-leaderboard/details_*,lunaluan/*,atom-in-the-universe/*,cot-leaderboard/cot-eval-traces,mitermix/yt-links,mcding-org/* - name: COMMON_DATASET_SCRIPTS_ALLOW_LIST value: '{{ALL_DATASETS_WITH_NO_NAMESPACE}},hf-internal-testing/dataset_with_script,togethercomputer/RedPajama-Data-1T,togethercomputer/RedPajama-Data-V2,gaia-benchmark/GAIA,poloclub/diffusiondb,mozilla-foundation/common_voice_*,google/fleurs,speechcolab/gigaspeech,espnet/yodas' - name: COMMON_HF_ENDPOINT @@ -478,7 +478,7 @@ value: /tmp/duckdb-extensions - name: HF_HUB_ENABLE_HF_TRANSFER value: "1" - image: huggingface/datasets-server-services-search:sha-3294b8d + image: huggingface/datasets-server-services-search:sha-20f9c87 imagePullPolicy: IfNotPresent livenessProbe: failureThreshold: 30 ===== apps/Deployment datasets-server/prod-datasets-server-sse-api ====== --- /tmp/argocd-diff780101573/prod-datasets-server-sse-api-live.yaml 2024-05-29 14:24:55.284432759 +0000 +++ /tmp/argocd-diff780101573/prod-datasets-server-sse-api 2024-05-29 14:24:55.280432733 +0000 @@ -273,7 +273,7 @@ name: datasets-server-prod-secrets optional: false - name: COMMON_BLOCKED_DATASETS - value: huggingface-leaderboard/*,open-llm-leaderboard/*,lunaluan/*,atom-in-the-universe/*,cot-leaderboard/cot-eval-traces,mitermix/yt-links,mcding-org/* + value: open-llm-leaderboard/details_*,lunaluan/*,atom-in-the-universe/*,cot-leaderboard/cot-eval-traces,mitermix/yt-links,mcding-org/* - name: COMMON_DATASET_SCRIPTS_ALLOW_LIST value: '{{ALL_DATASETS_WITH_NO_NAMESPACE}},hf-internal-testing/dataset_with_script,togethercomputer/RedPajama-Data-1T,togethercomputer/RedPajama-Data-V2,gaia-benchmark/GAIA,poloclub/diffusiondb,mozilla-foundation/common_voice_*,google/fleurs,speechcolab/gigaspeech,espnet/yodas' - name: COMMON_HF_ENDPOINT @@ -316,7 +316,7 @@ value: "1" - name: API_UVICORN_PORT value: "8080" - image: huggingface/datasets-server-services-sse-api:sha-3294b8d + image: huggingface/datasets-server-services-sse-api:sha-20f9c87 imagePullPolicy: IfNotPresent livenessProbe: failureThreshold: 30 ===== apps/Deployment datasets-server/prod-datasets-server-webhook ====== --- /tmp/argocd-diff1121696937/prod-datasets-server-webhook-live.yaml 2024-05-29 14:24:55.300432862 +0000 +++ /tmp/argocd-diff1121696937/prod-datasets-server-webhook 2024-05-29 14:24:55.296432836 +0000 @@ -388,7 +388,7 @@ name: datasets-server-prod-secrets optional: false - name: COMMON_BLOCKED_DATASETS - value: huggingface-leaderboard/*,open-llm-leaderboard/*,lunaluan/*,atom-in-the-universe/*,cot-leaderboard/cot-eval-traces,mitermix/yt-links,mcding-org/* + value: open-llm-leaderboard/details_*,lunaluan/*,atom-in-the-universe/*,cot-leaderboard/cot-eval-traces,mitermix/yt-links,mcding-org/* - name: COMMON_DATASET_SCRIPTS_ALLOW_LIST value: '{{ALL_DATASETS_WITH_NO_NAMESPACE}},hf-internal-testing/dataset_with_script,togethercomputer/RedPajama-Data-1T,togethercomputer/RedPajama-Data-V2,gaia-benchmark/GAIA,poloclub/diffusiondb,mozilla-foundation/common_voice_*,google/fleurs,speechcolab/gigaspeech,espnet/yodas' - name: COMMON_HF_ENDPOINT @@ -449,7 +449,7 @@ value: "9" - name: API_UVICORN_PORT value: "8080" - image: huggingface/datasets-server-services-webhook:sha-3294b8d + image: huggingface/datasets-server-services-webhook:sha-20f9c87 imagePullPolicy: IfNotPresent livenessProbe: failureThreshold: 30 ===== apps/Deployment datasets-server/prod-datasets-server-worker-heavy ====== --- /tmp/argocd-diff1763590210/prod-datasets-server-worker-heavy-live.yaml 2024-05-29 14:24:55.324433016 +0000 +++ /tmp/argocd-diff1763590210/prod-datasets-server-worker-heavy 2024-05-29 14:24:55.320432990 +0000 @@ -542,7 +542,7 @@ name: datasets-server-prod-secrets optional: false - name: COMMON_BLOCKED_DATASETS - value: huggingface-leaderboard/*,open-llm-leaderboard/*,lunaluan/*,atom-in-the-universe/*,cot-leaderboard/cot-eval-traces,mitermix/yt-links,mcding-org/* + value: open-llm-leaderboard/details_*,lunaluan/*,atom-in-the-universe/*,cot-leaderboard/cot-eval-traces,mitermix/yt-links,mcding-org/* - name: COMMON_DATASET_SCRIPTS_ALLOW_LIST value: '{{ALL_DATASETS_WITH_NO_NAMESPACE}},hf-internal-testing/dataset_with_script,togethercomputer/RedPajama-Data-1T,togethercomputer/RedPajama-Data-V2,gaia-benchmark/GAIA,poloclub/diffusiondb,mozilla-foundation/common_voice_*,google/fleurs,speechcolab/gigaspeech,espnet/yodas' - name: COMMON_HF_ENDPOINT @@ -683,7 +683,7 @@ value: "1" - name: WORKER_UVICORN_PORT value: "8080" - image: huggingface/datasets-server-services-worker:sha-3294b8d + image: huggingface/datasets-server-services-worker:sha-20f9c87 imagePullPolicy: IfNotPresent livenessProbe: failureThreshold: 30 ===== apps/Deployment datasets-server/prod-datasets-server-worker-light ====== --- /tmp/argocd-diff4163611528/prod-datasets-server-worker-light-live.yaml 2024-05-29 14:24:55.352433196 +0000 +++ /tmp/argocd-diff4163611528/prod-datasets-server-worker-light 2024-05-29 14:24:55.348433171 +0000 @@ -541,7 +541,7 @@ name: datasets-server-prod-secrets optional: false - name: COMMON_BLOCKED_DATASETS - value: huggingface-leaderboard/*,open-llm-leaderboard/*,lunaluan/*,atom-in-the-universe/*,cot-leaderboard/cot-eval-traces,mitermix/yt-links,mcding-org/* + value: open-llm-leaderboard/details_*,lunaluan/*,atom-in-the-universe/*,cot-leaderboard/cot-eval-traces,mitermix/yt-links,mcding-org/* - name: COMMON_DATASET_SCRIPTS_ALLOW_LIST value: '{{ALL_DATASETS_WITH_NO_NAMESPACE}},hf-internal-testing/dataset_with_script,togethercomputer/RedPajama-Data-1T,togethercomputer/RedPajama-Data-V2,gaia-benchmark/GAIA,poloclub/diffusiondb,mozilla-foundation/common_voice_*,google/fleurs,speechcolab/gigaspeech,espnet/yodas' - name: COMMON_HF_ENDPOINT @@ -682,7 +682,7 @@ value: "1" - name: WORKER_UVICORN_PORT value: "8080" - image: huggingface/datasets-server-services-worker:sha-3294b8d + image: huggingface/datasets-server-services-worker:sha-20f9c87 imagePullPolicy: IfNotPresent livenessProbe: failureThreshold: 30 ===== apps/Deployment datasets-server/prod-datasets-server-worker-medium ====== --- /tmp/argocd-diff3694318239/prod-datasets-server-worker-medium-live.yaml 2024-05-29 14:24:55.380433376 +0000 +++ /tmp/argocd-diff3694318239/prod-datasets-server-worker-medium 2024-05-29 14:24:55.372433325 +0000 @@ -541,7 +541,7 @@ name: datasets-server-prod-secrets optional: false - name: COMMON_BLOCKED_DATASETS - value: huggingface-leaderboard/*,open-llm-leaderboard/*,lunaluan/*,atom-in-the-universe/*,cot-leaderboard/cot-eval-traces,mitermix/yt-links,mcding-org/* + value: open-llm-leaderboard/details_*,lunaluan/*,atom-in-the-universe/*,cot-leaderboard/cot-eval-traces,mitermix/yt-links,mcding-org/* - name: COMMON_DATASET_SCRIPTS_ALLOW_LIST value: '{{ALL_DATASETS_WITH_NO_NAMESPACE}},hf-internal-testing/dataset_with_script,togethercomputer/RedPajama-Data-1T,togethercomputer/RedPajama-Data-V2,gaia-benchmark/GAIA,poloclub/diffusiondb,mozilla-foundation/common_voice_*,google/fleurs,speechcolab/gigaspeech,espnet/yodas' - name: COMMON_HF_ENDPOINT @@ -682,7 +682,7 @@ value: "1" - name: WORKER_UVICORN_PORT value: "8080" - image: huggingface/datasets-server-services-worker:sha-3294b8d + image: huggingface/datasets-server-services-worker:sha-20f9c87 imagePullPolicy: IfNotPresent livenessProbe: failureThreshold: 30 ===== batch/CronJob datasets-server/prod-datasets-server-job-backfill ====== --- /tmp/argocd-diff2673921950/prod-datasets-server-job-backfill-live.yaml 2024-05-29 14:24:55.404433531 +0000 +++ /tmp/argocd-diff2673921950/prod-datasets-server-job-backfill 2024-05-29 14:24:55.404433531 +0000 @@ -214,7 +214,7 @@ name: datasets-server-prod-secrets optional: false - name: COMMON_BLOCKED_DATASETS - value: huggingface-leaderboard/*,open-llm-leaderboard/*,lunaluan/*,atom-in-the-universe/*,cot-leaderboard/cot-eval-traces,mitermix/yt-links,mcding-org/* + value: open-llm-leaderboard/details_*,lunaluan/*,atom-in-the-universe/*,cot-leaderboard/cot-eval-traces,mitermix/yt-links,mcding-org/* - name: COMMON_DATASET_SCRIPTS_ALLOW_LIST value: '{{ALL_DATASETS_WITH_NO_NAMESPACE}},hf-internal-testing/dataset_with_script,togethercomputer/RedPajama-Data-1T,togethercomputer/RedPajama-Data-V2,gaia-benchmark/GAIA,poloclub/diffusiondb,mozilla-foundation/common_voice_*,google/fleurs,speechcolab/gigaspeech,espnet/yodas' - name: COMMON_HF_ENDPOINT @@ -257,7 +257,7 @@ value: backfill - name: LOG_LEVEL value: debug - image: huggingface/datasets-server-jobs-cache_maintenance:sha-3294b8d + image: huggingface/datasets-server-jobs-cache_maintenance:sha-20f9c87 imagePullPolicy: IfNotPresent name: prod-datasets-server-backfill resources: ===== batch/CronJob datasets-server/prod-datasets-server-job-backfill-retryable-errors ====== --- /tmp/argocd-diff4291373426/prod-datasets-server-job-backfill-retryable-errors-live.yaml 2024-05-29 14:24:55.416433608 +0000 +++ /tmp/argocd-diff4291373426/prod-datasets-server-job-backfill-retryable-errors 2024-05-29 14:24:55.416433608 +0000 @@ -214,7 +214,7 @@ name: datasets-server-prod-secrets optional: false - name: COMMON_BLOCKED_DATASETS - value: huggingface-leaderboard/*,open-llm-leaderboard/*,lunaluan/*,atom-in-the-universe/*,cot-leaderboard/cot-eval-traces,mitermix/yt-links,mcding-org/* + value: open-llm-leaderboard/details_*,lunaluan/*,atom-in-the-universe/*,cot-leaderboard/cot-eval-traces,mitermix/yt-links,mcding-org/* - name: COMMON_DATASET_SCRIPTS_ALLOW_LIST value: '{{ALL_DATASETS_WITH_NO_NAMESPACE}},hf-internal-testing/dataset_with_script,togethercomputer/RedPajama-Data-1T,togethercomputer/RedPajama-Data-V2,gaia-benchmark/GAIA,poloclub/diffusiondb,mozilla-foundation/common_voice_*,google/fleurs,speechcolab/gigaspeech,espnet/yodas' - name: COMMON_HF_ENDPOINT @@ -257,7 +257,7 @@ value: backfill-retryable-errors - name: LOG_LEVEL value: debug - image: huggingface/datasets-server-jobs-cache_maintenance:sha-3294b8d + image: huggingface/datasets-server-jobs-cache_maintenance:sha-20f9c87 imagePullPolicy: IfNotPresent name: prod-datasets-server-backfill-retryable-errors resources: ===== batch/CronJob datasets-server/prod-datasets-server-job-cache-metrics-collector ====== --- /tmp/argocd-diff3451131569/prod-datasets-server-job-cache-metrics-collector-live.yaml 2024-05-29 14:24:55.428433685 +0000 +++ /tmp/argocd-diff3451131569/prod-datasets-server-job-cache-metrics-collector 2024-05-29 14:24:55.424433659 +0000 @@ -174,7 +174,7 @@ name: datasets-server-prod-secrets optional: false - name: COMMON_BLOCKED_DATASETS - value: huggingface-leaderboard/*,open-llm-leaderboard/*,lunaluan/*,atom-in-the-universe/*,cot-leaderboard/cot-eval-traces,mitermix/yt-links,mcding-org/* + value: open-llm-leaderboard/details_*,lunaluan/*,atom-in-the-universe/*,cot-leaderboard/cot-eval-traces,mitermix/yt-links,mcding-org/* - name: COMMON_DATASET_SCRIPTS_ALLOW_LIST value: '{{ALL_DATASETS_WITH_NO_NAMESPACE}},hf-internal-testing/dataset_with_script,togethercomputer/RedPajama-Data-1T,togethercomputer/RedPajama-Data-V2,gaia-benchmark/GAIA,poloclub/diffusiondb,mozilla-foundation/common_voice_*,google/fleurs,speechcolab/gigaspeech,espnet/yodas' - name: COMMON_HF_ENDPOINT @@ -189,7 +189,7 @@ optional: false - name: CACHE_MAINTENANCE_ACTION value: collect-cache-metrics - image: huggingface/datasets-server-jobs-cache_maintenance:sha-3294b8d + image: huggingface/datasets-server-jobs-cache_maintenance:sha-20f9c87 imagePullPolicy: IfNotPresent name: prod-datasets-server-cache-metrics-collector resources: ===== batch/CronJob datasets-server/prod-datasets-server-job-post-messages ====== --- /tmp/argocd-diff441292658/prod-datasets-server-job-post-messages-live.yaml 2024-05-29 14:24:55.436433736 +0000 +++ /tmp/argocd-diff441292658/prod-datasets-server-job-post-messages 2024-05-29 14:24:55.432433711 +0000 @@ -185,7 +185,7 @@ name: datasets-server-prod-secrets optional: false - name: COMMON_BLOCKED_DATASETS - value: huggingface-leaderboard/*,open-llm-leaderboard/*,lunaluan/*,atom-in-the-universe/*,cot-leaderboard/cot-eval-traces,mitermix/yt-links,mcding-org/* + value: open-llm-leaderboard/details_*,lunaluan/*,atom-in-the-universe/*,cot-leaderboard/cot-eval-traces,mitermix/yt-links,mcding-org/* - name: COMMON_DATASET_SCRIPTS_ALLOW_LIST value: '{{ALL_DATASETS_WITH_NO_NAMESPACE}},hf-internal-testing/dataset_with_script,togethercomputer/RedPajama-Data-1T,togethercomputer/RedPajama-Data-V2,gaia-benchmark/GAIA,poloclub/diffusiondb,mozilla-foundation/common_voice_*,google/fleurs,speechcolab/gigaspeech,espnet/yodas' - name: COMMON_HF_ENDPOINT @@ -212,7 +212,7 @@ value: post-messages - name: LOG_LEVEL value: info - image: huggingface/datasets-server-jobs-cache_maintenance:sha-3294b8d + image: huggingface/datasets-server-jobs-cache_maintenance:sha-20f9c87 imagePullPolicy: IfNotPresent name: prod-datasets-server-post-messages resources: ===== batch/CronJob datasets-server/prod-datasets-server-job-queue-metrics-collector ====== --- /tmp/argocd-diff35807508/prod-datasets-server-job-queue-metrics-collector-live.yaml 2024-05-29 14:24:55.444433788 +0000 +++ /tmp/argocd-diff35807508/prod-datasets-server-job-queue-metrics-collector 2024-05-29 14:24:55.440433762 +0000 @@ -174,7 +174,7 @@ name: datasets-server-prod-secrets optional: false - name: COMMON_BLOCKED_DATASETS - value: huggingface-leaderboard/*,open-llm-leaderboard/*,lunaluan/*,atom-in-the-universe/*,cot-leaderboard/cot-eval-traces,mitermix/yt-links,mcding-org/* + value: open-llm-leaderboard/details_*,lunaluan/*,atom-in-the-universe/*,cot-leaderboard/cot-eval-traces,mitermix/yt-links,mcding-org/* - name: COMMON_DATASET_SCRIPTS_ALLOW_LIST value: '{{ALL_DATASETS_WITH_NO_NAMESPACE}},hf-internal-testing/dataset_with_script,togethercomputer/RedPajama-Data-1T,togethercomputer/RedPajama-Data-V2,gaia-benchmark/GAIA,poloclub/diffusiondb,mozilla-foundation/common_voice_*,google/fleurs,speechcolab/gigaspeech,espnet/yodas' - name: COMMON_HF_ENDPOINT @@ -189,7 +189,7 @@ optional: false - name: CACHE_MAINTENANCE_ACTION value: collect-queue-metrics - image: huggingface/datasets-server-jobs-cache_maintenance:sha-3294b8d + image: huggingface/datasets-server-jobs-cache_maintenance:sha-20f9c87 imagePullPolicy: IfNotPresent name: prod-datasets-server-queue-metrics-collector resources: ```

App: datasets-server-staging YAML generation: Success 🟢 App sync status: Out of Sync ⚠️

```diff ===== apps/Deployment datasets-server/staging-datasets-server-admin ====== --- /tmp/argocd-diff1591636670/staging-datasets-server-admin-live.yaml 2024-05-29 14:24:57.576447495 +0000 +++ /tmp/argocd-diff1591636670/staging-datasets-server-admin 2024-05-29 14:24:57.572447469 +0000 @@ -400,7 +400,7 @@ name: datasets-server-staging-secrets optional: false - name: COMMON_BLOCKED_DATASETS - value: huggingface-leaderboard/*,open-llm-leaderboard/*,lunaluan/*,atom-in-the-universe/*,cot-leaderboard/cot-eval-traces,mitermix/yt-links,mcding-org/* + value: open-llm-leaderboard/details_*,lunaluan/*,atom-in-the-universe/*,cot-leaderboard/cot-eval-traces,mitermix/yt-links,mcding-org/* - name: COMMON_DATASET_SCRIPTS_ALLOW_LIST value: '{{ALL_DATASETS_WITH_NO_NAMESPACE}},hf-internal-testing/dataset_with_script,togethercomputer/RedPajama-Data-1T,togethercomputer/RedPajama-Data-V2,gaia-benchmark/GAIA,poloclub/diffusiondb,mozilla-foundation/common_voice_*,google/fleurs,speechcolab/gigaspeech,espnet/yodas' - name: COMMON_HF_ENDPOINT @@ -449,7 +449,7 @@ value: "1" - name: ADMIN_UVICORN_PORT value: "8080" - image: huggingface/datasets-server-services-admin:sha-3294b8d + image: huggingface/datasets-server-services-admin:sha-20f9c87 imagePullPolicy: IfNotPresent livenessProbe: failureThreshold: 30 ===== apps/Deployment datasets-server/staging-datasets-server-api ====== --- /tmp/argocd-diff1001020640/staging-datasets-server-api-live.yaml 2024-05-29 14:24:57.592447597 +0000 +++ /tmp/argocd-diff1001020640/staging-datasets-server-api 2024-05-29 14:24:57.588447572 +0000 @@ -397,7 +397,7 @@ name: datasets-server-staging-secrets optional: false - name: COMMON_BLOCKED_DATASETS - value: huggingface-leaderboard/*,open-llm-leaderboard/*,lunaluan/*,atom-in-the-universe/*,cot-leaderboard/cot-eval-traces,mitermix/yt-links,mcding-org/* + value: open-llm-leaderboard/details_*,lunaluan/*,atom-in-the-universe/*,cot-leaderboard/cot-eval-traces,mitermix/yt-links,mcding-org/* - name: COMMON_DATASET_SCRIPTS_ALLOW_LIST value: '{{ALL_DATASETS_WITH_NO_NAMESPACE}},hf-internal-testing/dataset_with_script,togethercomputer/RedPajama-Data-1T,togethercomputer/RedPajama-Data-V2,gaia-benchmark/GAIA,poloclub/diffusiondb,mozilla-foundation/common_voice_*,google/fleurs,speechcolab/gigaspeech,espnet/yodas' - name: COMMON_HF_ENDPOINT @@ -453,7 +453,7 @@ value: "1" - name: API_UVICORN_PORT value: "8080" - image: huggingface/datasets-server-services-api:sha-3294b8d + image: huggingface/datasets-server-services-api:sha-20f9c87 imagePullPolicy: IfNotPresent livenessProbe: failureThreshold: 30 ===== apps/Deployment datasets-server/staging-datasets-server-rows ====== --- /tmp/argocd-diff286206308/staging-datasets-server-rows-live.yaml 2024-05-29 14:24:57.612447726 +0000 +++ /tmp/argocd-diff286206308/staging-datasets-server-rows 2024-05-29 14:24:57.612447726 +0000 @@ -461,7 +461,7 @@ name: datasets-server-staging-secrets optional: false - name: COMMON_BLOCKED_DATASETS - value: huggingface-leaderboard/*,open-llm-leaderboard/*,lunaluan/*,atom-in-the-universe/*,cot-leaderboard/cot-eval-traces,mitermix/yt-links,mcding-org/* + value: open-llm-leaderboard/details_*,lunaluan/*,atom-in-the-universe/*,cot-leaderboard/cot-eval-traces,mitermix/yt-links,mcding-org/* - name: COMMON_DATASET_SCRIPTS_ALLOW_LIST value: '{{ALL_DATASETS_WITH_NO_NAMESPACE}},hf-internal-testing/dataset_with_script,togethercomputer/RedPajama-Data-1T,togethercomputer/RedPajama-Data-V2,gaia-benchmark/GAIA,poloclub/diffusiondb,mozilla-foundation/common_voice_*,google/fleurs,speechcolab/gigaspeech,espnet/yodas' - name: COMMON_HF_ENDPOINT @@ -507,7 +507,7 @@ value: "8080" - name: ROWS_INDEX_MAX_ARROW_DATA_IN_MEMORY value: "300_000_000" - image: huggingface/datasets-server-services-rows:sha-3294b8d + image: huggingface/datasets-server-services-rows:sha-20f9c87 imagePullPolicy: IfNotPresent livenessProbe: failureThreshold: 30 ===== apps/Deployment datasets-server/staging-datasets-server-search ====== --- /tmp/argocd-diff2121155462/staging-datasets-server-search-live.yaml 2024-05-29 14:24:57.632447855 +0000 +++ /tmp/argocd-diff2121155462/staging-datasets-server-search 2024-05-29 14:24:57.628447829 +0000 @@ -428,7 +428,7 @@ name: datasets-server-staging-secrets optional: false - name: COMMON_BLOCKED_DATASETS - value: huggingface-leaderboard/*,open-llm-leaderboard/*,lunaluan/*,atom-in-the-universe/*,cot-leaderboard/cot-eval-traces,mitermix/yt-links,mcding-org/* + value: open-llm-leaderboard/details_*,lunaluan/*,atom-in-the-universe/*,cot-leaderboard/cot-eval-traces,mitermix/yt-links,mcding-org/* - name: COMMON_DATASET_SCRIPTS_ALLOW_LIST value: '{{ALL_DATASETS_WITH_NO_NAMESPACE}},hf-internal-testing/dataset_with_script,togethercomputer/RedPajama-Data-1T,togethercomputer/RedPajama-Data-V2,gaia-benchmark/GAIA,poloclub/diffusiondb,mozilla-foundation/common_voice_*,google/fleurs,speechcolab/gigaspeech,espnet/yodas' - name: COMMON_HF_ENDPOINT @@ -482,7 +482,7 @@ value: /tmp/duckdb-extensions - name: HF_HUB_ENABLE_HF_TRANSFER value: "1" - image: huggingface/datasets-server-services-search:sha-3294b8d + image: huggingface/datasets-server-services-search:sha-20f9c87 imagePullPolicy: IfNotPresent livenessProbe: failureThreshold: 30 ===== apps/Deployment datasets-server/staging-datasets-server-sse-api ====== --- /tmp/argocd-diff3943095159/staging-datasets-server-sse-api-live.yaml 2024-05-29 14:24:57.644447932 +0000 +++ /tmp/argocd-diff3943095159/staging-datasets-server-sse-api 2024-05-29 14:24:57.644447932 +0000 @@ -281,7 +281,7 @@ name: datasets-server-staging-secrets optional: false - name: COMMON_BLOCKED_DATASETS - value: huggingface-leaderboard/*,open-llm-leaderboard/*,lunaluan/*,atom-in-the-universe/*,cot-leaderboard/cot-eval-traces,mitermix/yt-links,mcding-org/* + value: open-llm-leaderboard/details_*,lunaluan/*,atom-in-the-universe/*,cot-leaderboard/cot-eval-traces,mitermix/yt-links,mcding-org/* - name: COMMON_DATASET_SCRIPTS_ALLOW_LIST value: '{{ALL_DATASETS_WITH_NO_NAMESPACE}},hf-internal-testing/dataset_with_script,togethercomputer/RedPajama-Data-1T,togethercomputer/RedPajama-Data-V2,gaia-benchmark/GAIA,poloclub/diffusiondb,mozilla-foundation/common_voice_*,google/fleurs,speechcolab/gigaspeech,espnet/yodas' - name: COMMON_HF_ENDPOINT @@ -319,7 +319,7 @@ value: "1" - name: API_UVICORN_PORT value: "8080" - image: huggingface/datasets-server-services-sse-api:sha-3294b8d + image: huggingface/datasets-server-services-sse-api:sha-20f9c87 imagePullPolicy: IfNotPresent livenessProbe: failureThreshold: 30 ===== apps/Deployment datasets-server/staging-datasets-server-webhook ====== --- /tmp/argocd-diff1018965711/staging-datasets-server-webhook-live.yaml 2024-05-29 14:24:57.660448035 +0000 +++ /tmp/argocd-diff1018965711/staging-datasets-server-webhook 2024-05-29 14:24:57.656448009 +0000 @@ -383,7 +383,7 @@ name: datasets-server-staging-secrets optional: false - name: COMMON_BLOCKED_DATASETS - value: huggingface-leaderboard/*,open-llm-leaderboard/*,lunaluan/*,atom-in-the-universe/*,cot-leaderboard/cot-eval-traces,mitermix/yt-links,mcding-org/* + value: open-llm-leaderboard/details_*,lunaluan/*,atom-in-the-universe/*,cot-leaderboard/cot-eval-traces,mitermix/yt-links,mcding-org/* - name: COMMON_DATASET_SCRIPTS_ALLOW_LIST value: '{{ALL_DATASETS_WITH_NO_NAMESPACE}},hf-internal-testing/dataset_with_script,togethercomputer/RedPajama-Data-1T,togethercomputer/RedPajama-Data-V2,gaia-benchmark/GAIA,poloclub/diffusiondb,mozilla-foundation/common_voice_*,google/fleurs,speechcolab/gigaspeech,espnet/yodas' - name: COMMON_HF_ENDPOINT @@ -439,7 +439,7 @@ value: "1" - name: API_UVICORN_PORT value: "8080" - image: huggingface/datasets-server-services-webhook:sha-3294b8d + image: huggingface/datasets-server-services-webhook:sha-20f9c87 imagePullPolicy: IfNotPresent livenessProbe: failureThreshold: 30 ===== apps/Deployment datasets-server/staging-datasets-server-worker-all ====== --- /tmp/argocd-diff394728035/staging-datasets-server-worker-all-live.yaml 2024-05-29 14:24:57.688448215 +0000 +++ /tmp/argocd-diff394728035/staging-datasets-server-worker-all 2024-05-29 14:24:57.684448190 +0000 @@ -539,7 +539,7 @@ name: datasets-server-staging-secrets optional: false - name: COMMON_BLOCKED_DATASETS - value: huggingface-leaderboard/*,open-llm-leaderboard/*,lunaluan/*,atom-in-the-universe/*,cot-leaderboard/cot-eval-traces,mitermix/yt-links,mcding-org/* + value: open-llm-leaderboard/details_*,lunaluan/*,atom-in-the-universe/*,cot-leaderboard/cot-eval-traces,mitermix/yt-links,mcding-org/* - name: COMMON_DATASET_SCRIPTS_ALLOW_LIST value: '{{ALL_DATASETS_WITH_NO_NAMESPACE}},hf-internal-testing/dataset_with_script,togethercomputer/RedPajama-Data-1T,togethercomputer/RedPajama-Data-V2,gaia-benchmark/GAIA,poloclub/diffusiondb,mozilla-foundation/common_voice_*,google/fleurs,speechcolab/gigaspeech,espnet/yodas' - name: COMMON_HF_ENDPOINT @@ -680,7 +680,7 @@ value: "1" - name: WORKER_UVICORN_PORT value: "8080" - image: huggingface/datasets-server-services-worker:sha-3294b8d + image: huggingface/datasets-server-services-worker:sha-20f9c87 imagePullPolicy: IfNotPresent livenessProbe: failureThreshold: 30 ===== apps/Deployment datasets-server/staging-datasets-server-worker-light ====== --- /tmp/argocd-diff1711473049/staging-datasets-server-worker-light-live.yaml 2024-05-29 14:24:57.716448395 +0000 +++ /tmp/argocd-diff1711473049/staging-datasets-server-worker-light 2024-05-29 14:24:57.712448370 +0000 @@ -539,7 +539,7 @@ name: datasets-server-staging-secrets optional: false - name: COMMON_BLOCKED_DATASETS - value: huggingface-leaderboard/*,open-llm-leaderboard/*,lunaluan/*,atom-in-the-universe/*,cot-leaderboard/cot-eval-traces,mitermix/yt-links,mcding-org/* + value: open-llm-leaderboard/details_*,lunaluan/*,atom-in-the-universe/*,cot-leaderboard/cot-eval-traces,mitermix/yt-links,mcding-org/* - name: COMMON_DATASET_SCRIPTS_ALLOW_LIST value: '{{ALL_DATASETS_WITH_NO_NAMESPACE}},hf-internal-testing/dataset_with_script,togethercomputer/RedPajama-Data-1T,togethercomputer/RedPajama-Data-V2,gaia-benchmark/GAIA,poloclub/diffusiondb,mozilla-foundation/common_voice_*,google/fleurs,speechcolab/gigaspeech,espnet/yodas' - name: COMMON_HF_ENDPOINT @@ -680,7 +680,7 @@ value: "1" - name: WORKER_UVICORN_PORT value: "8080" - image: huggingface/datasets-server-services-worker:sha-3294b8d + image: huggingface/datasets-server-services-worker:sha-20f9c87 imagePullPolicy: IfNotPresent livenessProbe: failureThreshold: 30 ===== batch/CronJob datasets-server/staging-datasets-server-job-cache-metrics-collector ====== --- /tmp/argocd-diff2081034681/staging-datasets-server-job-cache-metrics-collector-live.yaml 2024-05-29 14:24:57.732448498 +0000 +++ /tmp/argocd-diff2081034681/staging-datasets-server-job-cache-metrics-collector 2024-05-29 14:24:57.732448498 +0000 @@ -171,7 +171,7 @@ name: datasets-server-staging-secrets optional: false - name: COMMON_BLOCKED_DATASETS - value: huggingface-leaderboard/*,open-llm-leaderboard/*,lunaluan/*,atom-in-the-universe/*,cot-leaderboard/cot-eval-traces,mitermix/yt-links,mcding-org/* + value: open-llm-leaderboard/details_*,lunaluan/*,atom-in-the-universe/*,cot-leaderboard/cot-eval-traces,mitermix/yt-links,mcding-org/* - name: COMMON_DATASET_SCRIPTS_ALLOW_LIST value: '{{ALL_DATASETS_WITH_NO_NAMESPACE}},hf-internal-testing/dataset_with_script,togethercomputer/RedPajama-Data-1T,togethercomputer/RedPajama-Data-V2,gaia-benchmark/GAIA,poloclub/diffusiondb,mozilla-foundation/common_voice_*,google/fleurs,speechcolab/gigaspeech,espnet/yodas' - name: COMMON_HF_ENDPOINT @@ -186,7 +186,7 @@ optional: false - name: CACHE_MAINTENANCE_ACTION value: collect-cache-metrics - image: huggingface/datasets-server-jobs-cache_maintenance:sha-3294b8d + image: huggingface/datasets-server-jobs-cache_maintenance:sha-20f9c87 imagePullPolicy: IfNotPresent name: staging-datasets-server-cache-metrics-collector resources: ===== batch/CronJob datasets-server/staging-datasets-server-job-post-messages ====== --- /tmp/argocd-diff217701664/staging-datasets-server-job-post-messages-live.yaml 2024-05-29 14:24:57.740448550 +0000 +++ /tmp/argocd-diff217701664/staging-datasets-server-job-post-messages 2024-05-29 14:24:57.740448550 +0000 @@ -183,7 +183,7 @@ name: datasets-server-staging-secrets optional: false - name: COMMON_BLOCKED_DATASETS - value: huggingface-leaderboard/*,open-llm-leaderboard/*,lunaluan/*,atom-in-the-universe/*,cot-leaderboard/cot-eval-traces,mitermix/yt-links,mcding-org/* + value: open-llm-leaderboard/details_*,lunaluan/*,atom-in-the-universe/*,cot-leaderboard/cot-eval-traces,mitermix/yt-links,mcding-org/* - name: COMMON_DATASET_SCRIPTS_ALLOW_LIST value: '{{ALL_DATASETS_WITH_NO_NAMESPACE}},hf-internal-testing/dataset_with_script,togethercomputer/RedPajama-Data-1T,togethercomputer/RedPajama-Data-V2,gaia-benchmark/GAIA,poloclub/diffusiondb,mozilla-foundation/common_voice_*,google/fleurs,speechcolab/gigaspeech,espnet/yodas' - name: COMMON_HF_ENDPOINT @@ -210,7 +210,7 @@ value: post-messages - name: LOG_LEVEL value: info - image: huggingface/datasets-server-jobs-cache_maintenance:sha-3294b8d + image: huggingface/datasets-server-jobs-cache_maintenance:sha-20f9c87 imagePullPolicy: IfNotPresent name: staging-datasets-server-post-messages resources: ===== batch/CronJob datasets-server/staging-datasets-server-job-queue-metrics-collector ====== --- /tmp/argocd-diff885166423/staging-datasets-server-job-queue-metrics-collector-live.yaml 2024-05-29 14:24:57.752448627 +0000 +++ /tmp/argocd-diff885166423/staging-datasets-server-job-queue-metrics-collector 2024-05-29 14:24:57.752448627 +0000 @@ -172,7 +172,7 @@ name: datasets-server-staging-secrets optional: false - name: COMMON_BLOCKED_DATASETS - value: huggingface-leaderboard/*,open-llm-leaderboard/*,lunaluan/*,atom-in-the-universe/*,cot-leaderboard/cot-eval-traces,mitermix/yt-links,mcding-org/* + value: open-llm-leaderboard/details_*,lunaluan/*,atom-in-the-universe/*,cot-leaderboard/cot-eval-traces,mitermix/yt-links,mcding-org/* - name: COMMON_DATASET_SCRIPTS_ALLOW_LIST value: '{{ALL_DATASETS_WITH_NO_NAMESPACE}},hf-internal-testing/dataset_with_script,togethercomputer/RedPajama-Data-1T,togethercomputer/RedPajama-Data-V2,gaia-benchmark/GAIA,poloclub/diffusiondb,mozilla-foundation/common_voice_*,google/fleurs,speechcolab/gigaspeech,espnet/yodas' - name: COMMON_HF_ENDPOINT @@ -187,7 +187,7 @@ optional: false - name: CACHE_MAINTENANCE_ACTION value: collect-queue-metrics - image: huggingface/datasets-server-jobs-cache_maintenance:sha-3294b8d + image: huggingface/datasets-server-jobs-cache_maintenance:sha-20f9c87 imagePullPolicy: IfNotPresent name: staging-datasets-server-queue-metrics-collector resources: ```

Legend Status
The app is synced in ArgoCD, and diffs you see are solely from this PR.
⚠️ The app is out-of-sync in ArgoCD, and the diffs you see include those changes plus any from this PR.
🛑 There was an error generating the ArgoCD diffs due to changes in this PR.