Closed eunjae-lee closed 3 years ago
related to my post at #1
I've actually asked this on sanity's slack channel, and just as a reference this was the serverless function I ended up creating:
(There's a lot of "garbagge" there, but I'm saving a dictionary of references so I can access them later... might be a good starting point). Ideally every time there's a reference we should recursively get the data).
* This is a serverless method. It goes through all data in sanity and passes it on into algolia
*/
// https://www.sanity.io/blog/indexing-in-algolia-using-serverless-functions-and-observables
const algoliasearch = require('algoliasearch');
const request = require('request');
const ndjson = require('ndjson');
const ndjsonParser = require('ndjson-parse');
const { bindNodeCallback } = require('rxjs');
const { streamToRx } = require('rxjs-stream');
const {
bufferCount,
map,
filter,
mergeMap,
toArray,
} = require('rxjs/operators');
module.exports = async (req, res) => {
const { token } = req.query;
// if there's no query params then return earlier
if (!token || token !== process.env.SANITY_WEBHOOK) {
return res.json({
timestap: Date.now(),
message: 'bypass',
sanityWebhook: process.env.SANITY_WEBHOOK,
danityProjectId: process.env.SANITY_PROJECT_ID,
sanityDataset: process.env.SANITY_DATASET,
algoliaAppId: process.env.ALGOLIA_APP_ID,
algoliaToken: process.env.ALGOLIA_TOKEN,
algoliaIndex: process.env.ALGOLIA_INDEX,
});
}
// if there's query params then run it through
const sanityExportURL = `https://${process.env.SANITY_PROJECT_ID}.api.sanity.io/v1/data/export/${process.env.SANITY_DATASET}`;
const client = algoliasearch(
process.env.ALGOLIA_APP_ID,
process.env.ALGOLIA_TOKEN
);
const index = client.initIndex(process.env.ALGOLIA_INDEX);
const partialUpdateObjects = bindNodeCallback((...args) => {
index.saveObjects(...args);
});
// 1) get ndjson string
const ndjsonString = await fetch(sanityExportURL).then((e) => e.text());
// 2) parse string and filter by tag.
const parsed = ndjsonParser(ndjsonString);
const _tags = parsed.filter((e) => e._type === 'tag');
const _images = parsed.filter((e) => e._type === 'sanity.imageAsset');
// 3) save all tags in a dictionary
const tags = [];
_tags.forEach((e) => {
tags[e._id] = { key: e._id, title: e.title, slug: e.slug };
});
const images = [];
_images.forEach((e) => {
images[e._id] = {
key: e._id,
url: e.url,
palette: e.metadata.palette,
};
});
// 4) parse data for algolia as in the rest of the tutorial
streamToRx(request(sanityExportURL).pipe(ndjson.parse()))
.pipe(
map(function sanityToAlgolia(doc) {
const obj = {
objectID: doc._id, // required by algolia
key: doc._id,
type: doc._type,
title: doc.title,
slug: doc.slug, // this needs to be resolved to a string
description: doc.description,
content: blocksToText(doc.content || []),
};
// if tags
if (doc.tags) {
obj.tags = doc.tags.map((e) => {
return tags[e._ref];
});
}
// if coverImage
if (doc.coverImage) {
obj.image = {
url: images[doc.coverImage.asset._ref].url,
palette: images[doc.coverImage.asset._ref].palette,
};
}
return obj;
}),
filter((e) => e.type === 'spotlight' || e.type === 'post'),
bufferCount(100),
mergeMap((docs) => partialUpdateObjects(docs), 1),
toArray()
)
.subscribe((batchResults) => {
const totalLength = batchResults.reduce(
(count, batchResult) => count + batchResult.length,
0
);
console.log(
`Updated ${totalLength} documents in ${batchResults.length} batches`
);
});
res.json({
timestamp: Date.now(),
message: 'Algolia indices updated',
index: process.env.ALGOLIA_INDEX,
dataset: process.env.SANITY_DATASET,
});
};
const defaults = { nonTextBehavior: 'remove' };
function blocksToText(blocks, opts = {}) {
const options = Object.assign({}, defaults, opts);
return blocks
.map((block) => {
if (block._type !== 'block' || !block.children) {
return options.nonTextBehavior === 'remove'
? ''
: `[${block._type} block]`;
}
return block.children.map((child) => child.text).join('');
})
.join('\n\n');
}
Oh @andrevenancio, you're right. I missed your issue :)
Thanks for reporting, @andrevenancio and @eunjae-lee ! I've given this some though, and since the actual querying of documents is automated here, how about a way to specify your own projection for the document types you are handling? Or do you simply want some nice way of running arbitrary queries?
I was thinking something like just specifying the projection, as part of the mapping you already do to index. So in @eunjae-lee example, you could specify
{
post: {
index: algoliaIndex,
projection: `{
title,
isKorean,
categories[]->{title}
}`
}
}
How does that look?
Version 1.0.0
is now released to address this limitation. Resolving references are now possible with custom projections. See updated README and release notes for 1.0.0
I'd like to have a helper method in this library to retrieve references.
This is one of my document, and I want to get the actual
categories
.Although I can do everything with
@sanity/client
, I'd appreciate a little helper method in this library just likeflattenBlocks
.What do you think?