sanity-io / sanity-algolia

Utilities for indexing Sanity documents in Algolia
MIT License
67 stars 16 forks source link

Additional tool to retrieve references #4

Closed eunjae-lee closed 3 years ago

eunjae-lee commented 3 years ago

I'd like to have a helper method in this library to retrieve references.

image

This is one of my document, and I want to get the actual categories.

Although I can do everything with @sanity/client, I'd appreciate a little helper method in this library just like flattenBlocks.

What do you think?

andrevenancio commented 3 years ago

related to my post at #1

I've actually asked this on sanity's slack channel, and just as a reference this was the serverless function I ended up creating:

(There's a lot of "garbagge" there, but I'm saving a dictionary of references so I can access them later... might be a good starting point). Ideally every time there's a reference we should recursively get the data).

 * This is a serverless method. It goes through all data in sanity and passes it on into algolia
 */
// https://www.sanity.io/blog/indexing-in-algolia-using-serverless-functions-and-observables
const algoliasearch = require('algoliasearch');
const request = require('request');
const ndjson = require('ndjson');
const ndjsonParser = require('ndjson-parse');
const { bindNodeCallback } = require('rxjs');
const { streamToRx } = require('rxjs-stream');
const {
    bufferCount,
    map,
    filter,
    mergeMap,
    toArray,
} = require('rxjs/operators');

module.exports = async (req, res) => {
    const { token } = req.query;
    // if there's no query params then return earlier
    if (!token || token !== process.env.SANITY_WEBHOOK) {
        return res.json({
            timestap: Date.now(),
            message: 'bypass',
            sanityWebhook: process.env.SANITY_WEBHOOK,
            danityProjectId: process.env.SANITY_PROJECT_ID,
            sanityDataset: process.env.SANITY_DATASET,
            algoliaAppId: process.env.ALGOLIA_APP_ID,
            algoliaToken: process.env.ALGOLIA_TOKEN,
            algoliaIndex: process.env.ALGOLIA_INDEX,
        });
    }

    // if there's query params then run it through
    const sanityExportURL = `https://${process.env.SANITY_PROJECT_ID}.api.sanity.io/v1/data/export/${process.env.SANITY_DATASET}`;

    const client = algoliasearch(
        process.env.ALGOLIA_APP_ID,
        process.env.ALGOLIA_TOKEN
    );
    const index = client.initIndex(process.env.ALGOLIA_INDEX);

    const partialUpdateObjects = bindNodeCallback((...args) => {
        index.saveObjects(...args);
    });

    // 1) get ndjson string
    const ndjsonString = await fetch(sanityExportURL).then((e) => e.text());

    // 2) parse string and filter by tag.
    const parsed = ndjsonParser(ndjsonString);
    const _tags = parsed.filter((e) => e._type === 'tag');
    const _images = parsed.filter((e) => e._type === 'sanity.imageAsset');

    // 3) save all tags in a dictionary
    const tags = [];
    _tags.forEach((e) => {
        tags[e._id] = { key: e._id, title: e.title, slug: e.slug };
    });

    const images = [];
    _images.forEach((e) => {
        images[e._id] = {
            key: e._id,
            url: e.url,
            palette: e.metadata.palette,
        };
    });

    // 4) parse data for algolia as in the rest of the tutorial
    streamToRx(request(sanityExportURL).pipe(ndjson.parse()))
        .pipe(
            map(function sanityToAlgolia(doc) {
                const obj = {
                    objectID: doc._id, // required by algolia
                    key: doc._id,
                    type: doc._type,
                    title: doc.title,
                    slug: doc.slug, // this needs to be resolved to a string
                    description: doc.description,
                    content: blocksToText(doc.content || []),
                };

                // if tags
                if (doc.tags) {
                    obj.tags = doc.tags.map((e) => {
                        return tags[e._ref];
                    });
                }

                // if coverImage
                if (doc.coverImage) {
                    obj.image = {
                        url: images[doc.coverImage.asset._ref].url,
                        palette: images[doc.coverImage.asset._ref].palette,
                    };
                }

                return obj;
            }),

            filter((e) => e.type === 'spotlight' || e.type === 'post'),
            bufferCount(100),
            mergeMap((docs) => partialUpdateObjects(docs), 1),
            toArray()
        )
        .subscribe((batchResults) => {
            const totalLength = batchResults.reduce(
                (count, batchResult) => count + batchResult.length,
                0
            );
            console.log(
                `Updated ${totalLength} documents in ${batchResults.length} batches`
            );
        });

    res.json({
        timestamp: Date.now(),
        message: 'Algolia indices updated',
        index: process.env.ALGOLIA_INDEX,
        dataset: process.env.SANITY_DATASET,
    });
};

const defaults = { nonTextBehavior: 'remove' };

function blocksToText(blocks, opts = {}) {
    const options = Object.assign({}, defaults, opts);
    return blocks
        .map((block) => {
            if (block._type !== 'block' || !block.children) {
                return options.nonTextBehavior === 'remove'
                    ? ''
                    : `[${block._type} block]`;
            }

            return block.children.map((child) => child.text).join('');
        })
        .join('\n\n');
}
eunjae-lee commented 3 years ago

Oh @andrevenancio, you're right. I missed your issue :)

runeb commented 3 years ago

Thanks for reporting, @andrevenancio and @eunjae-lee ! I've given this some though, and since the actual querying of documents is automated here, how about a way to specify your own projection for the document types you are handling? Or do you simply want some nice way of running arbitrary queries?

I was thinking something like just specifying the projection, as part of the mapping you already do to index. So in @eunjae-lee example, you could specify

{
    post: {
      index: algoliaIndex,
      projection: `{
        title,
        isKorean,
        categories[]->{title}
      }`
    }
}

How does that look?

runeb commented 3 years ago

Version 1.0.0 is now released to address this limitation. Resolving references are now possible with custom projections. See updated README and release notes for 1.0.0