Open kijung-iM opened 1 year ago
@kijung-iM I just published typesense/docsearch-scraper:0.9.0.rc1
which adds support for setting custom field definitions, using which you can change the locale for fields as needed.
You should now be able to do something like this in the scraper config:
{
"index_name": "typesense_docs",
"start_urls": [
{
"url": "https://typesense.org/docs/(?P<version>.*?)/",
"variables": {
"version": [
"0.21.0"
]
}
}
],
"selectors": {
"default": {
"lvl0": ".content__default h1",
"lvl1": ".content__default h2",
"lvl2": ".content__default h3",
"lvl3": ".content__default h4",
"lvl4": ".content__default h5",
"text": ".content__default p, .content__default ul li, .content__default table tbody tr"
}
},
"custom_settings": {
"field_definitions": [ <==== You can set the `locale` field inside each field now.
{"name": "anchor", "type": "string", "optional": true},
{"name": "content", "type": "string", "locale": "ko", "optional": true},
{"name": "url", "type": "string", "facet": true},
{"name": "url_without_anchor", "type": "string", "facet": true, "optional": true},
{"name": "version", "type": "string[]", "facet": true, "optional": true},
{"name": "hierarchy.lvl0", "type": "string", "facet": true, "locale": "ko", "optional": true},
{"name": "hierarchy.lvl1", "type": "string", "facet": true, "locale": "ko", "optional": true},
{"name": "hierarchy.lvl2", "type": "string", "facet": true, "locale": "ko", "optional": true},
{"name": "hierarchy.lvl3", "type": "string", "facet": true, "locale": "ko", "optional": true},
{"name": "hierarchy.lvl4", "type": "string", "facet": true, "locale": "ko", "optional": true},
{"name": "hierarchy.lvl5", "type": "string", "facet": true, "locale": "ko", "optional": true},
{"name": "hierarchy.lvl6", "type": "string", "facet": true, "locale": "ko", "optional": true},
{"name": "type", "type": "string", "facet": true, "locale": "ko", "optional": true},
{"name": ".*_tag", "type": "string", "facet": true, "locale": "ko", "optional": true},
{"name": "language", "type": "string", "facet": true, "optional": true},
{"name": "tags", "type": "string[]", "facet": true, "locale": "ko", "optional": true},
{"name": "item_priority", "type": "int64"}
]
}
}
Description
Hello, how to set 'locale'? Can I set locale using doc-search-scraper?