cheminfo / sdf-parser

Parse a SDF file and convert it to an array of objects
http://cheminfo.github.io/sdf-parser/
MIT License
11 stars 7 forks source link

Improve parser for iterator #15

Open lpatiny opened 3 weeks ago

lpatiny commented 3 weeks ago

We still need to deal with \n and \r\n

export async function* getSDFIterator(readStream) {
  const moleculeStream = readStream.pipeThrough(createMoleculeStream());

  for await (let molecule of moleculeStream) {
    console.log(molecule);
    console.log("----------------");
    yield molecule;
  }
}

function createMoleculeStream() {
  let buffer = "";
  return new TransformStream({
    async transform(chunk, controller) {
      buffer += chunk;
      let begin = 0;
      for (let i = 0; i < buffer.length; i++) {
        if (
          buffer[i] === "$" &&
          buffer[i + 1] === "$" &&
          buffer[i + 2] === "$" &&
          buffer[i + 3] === "$" &&
          buffer[i + 4] === "\r" &&
          buffer[i + 5] === "\n"
        ) {
          controller.enqueue(buffer.slice(begin, i));
          i += 6;
          begin = i;
        }
      }
      buffer = buffer.slice(begin);
    },
    flush(controller) {
      if (buffer) controller.enqueue(buffer);
    },
  });
}
targos commented 2 weeks ago
class MoleculeStream extends TransformStream {
  #buffer = '';

  constructor() {
    super({
      transform: (chunk, controller) => {
        this.#buffer += chunk;
        let begin = 0;
        let index = 0;
        while ((index = this.#buffer.indexOf('$$$$\r\n', index)) !== -1) {
          controller.enqueue(this.#buffer.slice(begin, index));
          index += 6;
          begin = index;
        }
        this.#buffer = this.#buffer.slice(begin);
      },
      flush: (controller) => {
        if (this.#buffer) {
          controller.enqueue(this.#buffer);
        }
      },
    });
  }
}

const response = await fetch('http://localhost:8080');
const byteStream = response.body;

const decompressionStream = byteStream.pipeThrough(
  new DecompressionStream('gzip'),
);

const textStream = decompressionStream.pipeThrough(new TextDecoderStream());

const moleculeStream = textStream.pipeThrough(new MoleculeStream());

console.time('Process time');
let count = 0;
for await (const molecule of moleculeStream) {
  count++;
}
console.log('Processed %i molecules', count);
console.timeEnd('Process time');