Don't show a "Location of duplicates: digitised work" note if there's a METS work on the page

This is the code I vaguely ended up gravitating towards:

from elasticsearch.helpers import scan
import tqdm
from pprint import pprint

es = get_es_client()

index = "calm_catalog"

scanner = scan(
    es,
    index=index,
    query={"_source": "Copies"},
    scroll="1m"
)

items = {
    it["_id"]: it["_source"].get("Copies")
    for it in tqdm.tqdm(scanner)
    if it["_source"].get("Copies")
}

tally = collections.Counter()

def tidy(v):
    return v
    return (
        v.replace("A digitised copy is held by the Wellcome Library as part of Codebreakers: Makers of Modern Genetics.", "")
              .replace("A digitised copy is held by the Wellcome Library as part of the Codebreakers: Makers of Modern Genetics programme.", "")
              .replace("A digitised copy is held by the Wellcome Library as part of The Mental Health Archives digitisation project.", "")
              .replace("A digitised copy is held by the Wellcome Library.", "")
              .replace("A digitised copy is held by the Wellcome Library as part of The Mental Health Archives digitisation project", "")
              .replace("A digitised copy is held by the Wellcome Library as part of Codebreakers: Makers of Modern Genetics", "")
              .replace("This material has been digitised and can be freely accessed online through the Wellcome Library catalogue.", "")
              .strip()
    )

import termcolor

for v in items.values():
    if isinstance(v, list):
        for v_e in v:
            tally[tidy(v_e)] += 1
    else:
        tally[tidy(v)
    ] += 1

tally.most_common()

from weco_datascience.reporting import get_es_client
from elasticsearch.helpers import scan
import tqdm
from pprint import pprint

es = get_es_client()

index = "sierra_varfields"

terms = {
    "varField.marcTag": "535",
    "varField.ind1": "2",
}

scanner = scan(
    es,
    index=index,
    query={"query": {"bool": {"filter": [{"term": {k: v}} for k, v in terms.items()]}}},
    scroll="1m"
)

notes = list(tqdm.tqdm(scanner))

But it only affects ~50k works, not sure it's worth it right now.

wellcomecollection / platform

Don't show a "Location of duplicates: digitised work" note if there's a METS work on the page #5318