TBD54566975 / dwn-sdk-js

Decentralized Web Node (DWN) Reference implementation
https://identity.foundation/decentralized-web-node/spec/
Apache License 2.0
325 stars 100 forks source link

(optimization) reduce # of underlying db queries needed to fulfill a `RecordsQuery` #453

Closed mistermoe closed 1 year ago

mistermoe commented 1 year ago

The storage architecture of DWNs separates record metadata from record data by storing each in entirely separate data stores. the design rationale behind this is motivated by DWN’s aim to support small and large amounts application data (e.g. images) within an individual RecordsWrite.

The cost of this architectural decision is incurred at query time. First, all records that match a given query are found. Then, an individual query is executed to fetch the data for each individual record. This means that a RecordsQuery matching 1000 records requires 1001 underlying sql queries.

repro:

/* eslint-disable @typescript-eslint/no-unused-vars */

import ms from 'ms';
import { createPool } from 'mysql2';
import { faker } from '@faker-js/faker';
import { Dwn, DataStream, DidKeyResolver, Jws, RecordsWrite, RecordsQuery } from '@tbd54566975/dwn-sdk-js';
import { MysqlDialect, MessageStoreSql, DataStoreSql, EventLogSql } from '@tbd54566975/dwn-sql-store';

const mysqlDialect = new MysqlDialect({
  pool: async () => createPool({
    host     : 'localhost',
    port     : 3306,
    database : 'dwn',
    user     : 'root',
    password : 'dwn',
    debug    : ['ComQueryPacket', 'RowDataPacket']
  })
});

const messageStore = new MessageStoreSql(mysqlDialect);
const dataStore = new DataStoreSql(mysqlDialect);
const eventLog = new EventLogSql(mysqlDialect);

const dwn = await Dwn.create({ messageStore, dataStore, eventLog });
const didKey = {
  'did'     : 'did:key:z6MkfC8pVLTxqGa7UXPs3JLxVSyUwoch3bALhzRSdo9xFYm6',
  'keyId'   : 'did:key:z6MkfC8pVLTxqGa7UXPs3JLxVSyUwoch3bALhzRSdo9xFYm6#z6MkfC8pVLTxqGa7UXPs3JLxVSyUwoch3bALhzRSdo9xFYm6',
  'keyPair' : {
    'publicJwk': {
      'alg' : 'EdDSA',
      'kty' : 'OKP',
      'crv' : 'Ed25519',
      'x'   : 'CvvN8BqLXSVkUf7Ek89Z5j1HxTgG9bqrQo3xQu2cQXk'
    },
    'privateJwk': {
      'alg' : 'EdDSA',
      'kty' : 'OKP',
      'crv' : 'Ed25519',
      'x'   : 'CvvN8BqLXSVkUf7Ek89Z5j1HxTgG9bqrQo3xQu2cQXk',
      'd'   : '237GyDsodQdIV0XLsHRRNa_bsN81ihkJ-Vf--16XjRA'
    }
  }
};

function generateRandomPost() {
  return {
    id       : faker.string.uuid(),
    userId   : faker.number.int(),
    content  : faker.lorem.sentences(),
    username : faker.internet.userName(),
    location : `${faker.location.city()}, ${faker.location.country()}`,
    tags     : faker.lorem.words().split(' ')
  };
}

async function generateDid() {
  return await DidKeyResolver.generate();
}

async function seedRecords(numRecords) {
  const encoder = new TextEncoder();
  const startTime = Date.now();

  for (let i = 0; i < numRecords; i += 1) {
  // create some data
    const post = generateRandomPost();
    const data = encoder.encode(JSON.stringify(post));

    // create a RecordsWrite message
    const recordsWrite = await RecordsWrite.create({
      data,
      dataFormat                  : 'application/json',
      published                   : true,
      schema                      : 'yeeter/post',
      authorizationSignatureInput : Jws.createSignatureInput(didKey as any)
    });

    // get the DWN to process the RecordsWrite
    const dataStream = DataStream.fromBytes(data);
    const result = await dwn.processMessage(didKey.did, recordsWrite.message, dataStream);

    if (result.status.code !== 202) {
      console.warn(`yikes result: ${JSON.stringify(result, null, 2)}`);
    }

    if (i % 1_000 === 0) {
      console.log(`(${i} / ${numRecords}): ${ms(Date.now() - startTime)}`);
    }
  }
}

async function queryRecords() {
  const query = await RecordsQuery.create({
    filter: {
      schema: 'yeeter/post'
    },
    authorizationSignatureInput: Jws.createSignatureInput(didKey as any)
  });

  const result = await dwn.processMessage(didKey.did, query.message);

  if (result.status.code !== 200) {
    throw new Error(`yikes result: ${JSON.stringify(result, null, 2)}`);
  }
}

await seedRecords(1000);
await queryRecords();

await dwn.close();

debug mode is enabled for the sql connection in the script above which prints out every query sent to the underlying DB.

thehenrytsai commented 1 year ago

This is done.