Closed maudetes closed 9 months ago
A correction has been made due to some redirect from domain registered in source and results in DCAT. Here is the sample code made for this correction:
from datetime import datetime
from udata.app import create_app
from udata.models import Dataset
from udata.harvest.models import HarvestSource
# Mapping of old domains -> new domains
source_mapping = {
'https://sdem.opendatasoft.com': 'https://www.opendata56.fr',
'https://paysdelaloire.opendatasoft.com': 'https://data.paysdelaloire.fr',
'https://enedis.opendatasoft.com': 'https://data.enedis.fr',
'http://data.haute-garonne.fr': 'https://data.haute-garonne.fr',
'http://breizh.opendatasoft.com': 'https://data.bretagne.bzh',
'http://data.ratp.fr': 'https://data.ratp.fr',
'http://data.laregion.fr': 'https://data.laregion.fr',
'https://datainfogreffe.fr': 'https://opendata.datainfogreffe.fr',
'http://opendata.stif.info': 'https://data.iledefrance-mobilites.fr',
}
def rename(url, domain):
return url.replace(domain, source_mapping[domain])
app = create_app()
with app.app_context():
for domain in source_mapping:
for source in HarvestSource.objects(url__contains=domain, validation__state="accepted"):
print(source.id)
# Rename source
source.url = rename(source.url, domain)
source.save()
# Delete new datasets duplicate created with new domain
for dat in Dataset.objects(
harvest__source_id=str(source.id),
harvest__remote_id__contains=source_mapping[domain],
created_at_internal__gte="2024-01-08"
):
# print(f"Suppression de : {dat.harvest.remote_id}")
dat.deleted=datetime.now()
dat.save()
# Rename old datasets and resources with new domain
for dat in Dataset.objects(
harvest__source_id=str(source.id),
harvest__remote_id__contains=domain,
created_at_internal__lte="2024-01-08"
):
# print(f"Renommage : {dat.harvest.remote_id} -> {rename(dat.harvest.remote_id, domain)}")
dat.harvest.remote_id = rename(dat.harvest.remote_id, domain)
for res in dat.resources:
res.url = rename(res.url, domain)
dat.save()
Fix https://github.com/etalab/data.gouv.fr/issues/1086 Update HarvestSource as well as harvested Datasets to migrate to the DCAT catalog endpoint.