Open rexwangcc opened 2 years ago
FYI, currently our Crawler is using the follwoing config:
new Crawler({
appId: "***",
apiKey: "***",
rateLimit: 8,
startUrls: ["https://docs.taichi.graphics/"],
renderJavaScript: false,
sitemaps: ["https://docs.taichi.graphics/sitemap.xml"],
exclusionPatterns: ["**/community/members/**", "**/acknowledgments/**"],
ignoreCanonicalTo: true,
discoveryPatterns: ["https://docs.taichi.graphics/**"],
schedule: "at 19:00 on Friday",
actions: [
{
indexName: "taichi",
pathsToMatch: [
"https://docs.taichi.graphics/**",
"https://docs.taichi.graphics/api/**",
],
recordExtractor: ({ $, helpers }) => {
return helpers.docsearch({
recordProps: {
lvl1: "header h1, section h1",
content: "article p, article li, article td:last-child, dt",
lvl0: {
selectors: "",
defaultValue: "Docs",
},
lvl2: "article h2",
lvl3: "article h3",
lvl4: "article h4",
lvl5: "article h5, article td:first-child",
lvl6: "article h6",
},
indexHeadings: true,
});
},
},
],
initialIndexSettings: {
taichi: {
attributesForFaceting: [
"type",
"lang",
"language",
"version",
"docusaurus_tag",
],
attributesToRetrieve: [
"hierarchy",
"content",
"anchor",
"url",
"url_without_anchor",
"type",
],
attributesToHighlight: ["hierarchy", "hierarchy_camel", "content"],
attributesToSnippet: ["content:10"],
camelCaseAttributes: ["hierarchy", "hierarchy_radio", "content"],
searchableAttributes: [
"unordered(hierarchy_radio_camel.lvl0)",
"unordered(hierarchy_radio.lvl0)",
"unordered(hierarchy_radio_camel.lvl1)",
"unordered(hierarchy_radio.lvl1)",
"unordered(hierarchy_radio_camel.lvl2)",
"unordered(hierarchy_radio.lvl2)",
"unordered(hierarchy_radio_camel.lvl3)",
"unordered(hierarchy_radio.lvl3)",
"unordered(hierarchy_radio_camel.lvl4)",
"unordered(hierarchy_radio.lvl4)",
"unordered(hierarchy_radio_camel.lvl5)",
"unordered(hierarchy_radio.lvl5)",
"unordered(hierarchy_radio_camel.lvl6)",
"unordered(hierarchy_radio.lvl6)",
"unordered(hierarchy_camel.lvl0)",
"unordered(hierarchy.lvl0)",
"unordered(hierarchy_camel.lvl1)",
"unordered(hierarchy.lvl1)",
"unordered(hierarchy_camel.lvl2)",
"unordered(hierarchy.lvl2)",
"unordered(hierarchy_camel.lvl3)",
"unordered(hierarchy.lvl3)",
"unordered(hierarchy_camel.lvl4)",
"unordered(hierarchy.lvl4)",
"unordered(hierarchy_camel.lvl5)",
"unordered(hierarchy.lvl5)",
"unordered(hierarchy_camel.lvl6)",
"unordered(hierarchy.lvl6)",
"content",
],
distinct: true,
attributeForDistinct: "url",
customRanking: [
"desc(weight.pageRank)",
"desc(weight.level)",
"asc(weight.position)",
],
ranking: [
"words",
"filters",
"typo",
"attribute",
"proximity",
"exact",
"custom",
],
highlightPreTag: '<span class="algolia-docsearch-suggestion--highlight">',
highlightPostTag: "</span>",
minWordSizefor1Typo: 3,
minWordSizefor2Typos: 7,
allowTyposOnNumericTokens: false,
minProximity: 1,
ignorePlurals: true,
advancedSyntax: true,
attributeCriteriaComputedByMinProximity: true,
removeWordsIfNoResults: "allOptional",
separatorsToIndex: "_",
},
},
});
All of the search results are categorized under
docs
, such as https://docs.taichi.graphics/search?q=field or https://docs.taichi.graphics/search?q=high%20performance%20computing, which is confusing. Reported by @qiao-bo