// TODO: determine schema for this
"extract-selectors": {
// TODO: flag to indicate whether we should mark up the source to
// make it more amenable for CSS extraction
// TODO: how to do invariant things, eg if we wanted to build
// out a join table of categories, we'd extract the category
// from the same root HTML element... how to infer that?
// TODO: how to do many-to-many, if such a thing exists? maybe it's
// an advanced case and can be ignored.
"url-regex": "/garments/",
// optional
"database": "dbname",
"extractors": {
"patterns": {
"selector": ".w-full.container",
"attributes": {
"url!": [
"h3 a",
{ "attribute": "href" }
],
"name": [
"h3 a",
{ "text": true }
],
"designer_name": [
"h3 + p a",
{ "text": true }
]
}
},
"pattern_categories": {
"selector": ".w-full.container",
"attributes": {
"url!": [
"h3 a",
{ "attribute": "href" }
],
"category!": [
"html h1",
{ "text": true }
]
}
}
]
}
}
TODO: name and schema is up in the air.
Needs https://github.com/cldellow/datasette-scraper#extract_from_responsescraper-config-url-response