langchain-ai / langchainjs

πŸ¦œπŸ”— Build context-aware reasoning applications πŸ¦œπŸ”—
https://js.langchain.com/docs/
MIT License
12.73k stars 2.2k forks source link

PineconeStore: NullCannot read properties of undefined (reading 'toString') #6919

Closed sam-trost closed 1 month ago

sam-trost commented 1 month ago

Checked other resources

Example Code

/* eslint-disable @typescript-eslint/no-explicit-any */
import { expect, jest, test } from "@jest/globals";
import { FakeEmbeddings } from "@langchain/core/utils/testing";
import { PineconeStore } from "../vectorstores.js";

describe("PineconeStore with null pageContent", () => {
  it("should handle null pageContent correctly in _formatMatches", async () => {
    const mockQueryResponse = {
      matches: [
        {
          id: "1",
          score: 0.9,
          metadata: { textKey: null, otherKey: "value" },
        },
      ],
    };

    const client = {
      namespace: jest.fn<any>().mockReturnValue({
        query: jest.fn<any>().mockResolvedValue(mockQueryResponse),
      }),
    };
    const embeddings = new FakeEmbeddings();

    const store = new PineconeStore(embeddings, {
      pineconeIndex: client as any,
    });

    const results = await store.similaritySearchVectorWithScore([], 0);
    expect(results[0][0].pageContent).toEqual("");
  });
});

Error Message and Stack Trace (if applicable)

TypeError: Cannot read properties of undefined (reading 'toString')

  426 |           new Document({
  427 |             id,
> 428 |             pageContent: pageContent.toString(),
      |                                      ^
  429 |             metadata,
  430 |           }),
  431 |           score,

  at PineconeStore.toString [as _formatMatches] (src/vectorstores.ts:428:38)
  at PineconeStore._formatMatches [as similaritySearchVectorWithScore] (src/vectorstores.ts:453:26)
  at Object.<anonymous> (src/tests/vectorstores.test.ts:145:21)

Description

The current version of the pinecone vector store assumes a property named pageContent will be set on each record. This was not previously required when ingesting content.

System Info

└─ langchain@workspace:langchain β”œβ”€ Instances: 1 β”œβ”€ Version: 0.3.2 β”‚ └─ Dependencies β”œβ”€ @faker-js/faker@npm:^7.6.0 β†’ npm:7.6.0 β”œβ”€ @jest/globals@npm:^29.5.0 β†’ npm:29.5.0 β”œβ”€ @langchain/core@workspace: β†’ workspace:langchain-core β”œβ”€ @langchain/scripts@>=0.1.0 <0.2.0 β†’ workspace:libs/langchain-scripts β”œβ”€ @tsconfig/recommended@npm:^1.0.2 β†’ npm:1.0.2 β”œβ”€ @types/handlebars@npm:^4.1.0 β†’ npm:4.1.0 β”œβ”€ @types/html-to-text@npm:^9 β†’ npm:9.0.0 β”œβ”€ @types/js-yaml@npm:^4 β†’ npm:4.0.5 β”œβ”€ @types/jsdom@npm:^21.1.1 β†’ npm:21.1.1 β”œβ”€ @types/uuid@npm:^9 β†’ npm:9.0.1 β”œβ”€ @types/ws@npm:^8 β†’ npm:8.5.8 β”œβ”€ axios@npm:^0.26.0 β†’ npm:0.26.1 β”œβ”€ cheerio@npm:1.0.0-rc.12 β†’ npm:1.0.0-rc.12 β”œβ”€ dotenv@npm:^16.0.3 β†’ npm:16.0.3 β”œβ”€ dpdm@patch:dpdm@npm%3A3.12.0#./.yarn/patches/dpdm-npm-3.12.0-0dfdd8e3b8.patch::locator=langchainjs%40workspace%3A. β†’ patch:dpdm@npm%3A3.12.0#./.yarn/patches/dpdm-npm-3.12.0-0dfdd8e3b8.patch::version=3.12.0&hash=2406a8&locator=langchainjs%40workspace%3A. β”œβ”€ eslint-plugin-no-instanceof@npm:^1.0.1 β†’ npm:1.0.1 β”œβ”€ eslint@npm:^8.33.0 β†’ npm:8.35.0 β”œβ”€ handlebars@npm:^4.7.8 β†’ npm:4.7.8 β”œβ”€ jest-environment-node@npm:^29.6.4 β†’ npm:29.6.4 β”œβ”€ js-tiktoken@npm:^1.0.12 β†’ npm:1.0.12 β”œβ”€ js-yaml@npm:^4.1.0 β†’ npm:4.1.0 β”œβ”€ jsonpointer@npm:^5.0.1 β†’ npm:5.0.1 β”œβ”€ openai@npm:^4.41.1 β†’ npm:4.42.0 β”œβ”€ openapi-types@npm:^12.1.3 β†’ npm:12.1.3 β”œβ”€ p-retry@npm:4 β†’ npm:4.6.2 β”œβ”€ peggy@npm:^3.0.2 β†’ npm:3.0.2 β”œβ”€ prettier@npm:^2.8.3 β†’ npm:2.8.4 β”œβ”€ release-it@npm:^17.6.0 β†’ npm:17.6.0 β”œβ”€ rimraf@npm:^5.0.1 β†’ npm:5.0.1 β”œβ”€ rollup@npm:^3.19.1 β†’ npm:3.19.1 β”œβ”€ typescript@patch:typescript@~5.1.6#~builtin<compat/typescript> β†’ patch:typescript@npm%3A5.1.6#~builtin<compat/typescript>::version=5.1.6&hash=77c9e2 β”œβ”€ uuid@npm:^10.0.0 β†’ npm:10.0.0 β”œβ”€ wikipedia@npm:^2.1.2 β†’ npm:2.1.2 β”œβ”€ yaml@npm:^2.2.1 β†’ npm:2.2.1 β”œβ”€ zod@npm:3.23.8 β†’ npm:3.23.8 β”œβ”€ @langchain/anthropic@workspace: β†’ workspace:libs/langchain-anthropic [773c0] β”œβ”€ @langchain/aws@workspace: β†’ workspace:libs/langchain-aws [773c0] β”œβ”€ @langchain/cohere@workspace: β†’ workspace:libs/langchain-cohere [773c0] β”œβ”€ @langchain/google-genai@workspace: β†’ workspace:libs/langchain-google-genai [773c0] β”œβ”€ @langchain/google-vertexai@workspace: β†’ workspace:libs/langchain-google-vertexai [773c0] β”œβ”€ @langchain/groq@workspace: β†’ workspace:libs/langchain-groq [773c0] β”œβ”€ @langchain/mistralai@workspace: β†’ workspace:libs/langchain-mistralai [773c0] β”œβ”€ @langchain/ollama@workspace: β†’ workspace:libs/langchain-ollama [773c0] β”œβ”€ @langchain/openai@workspace: β†’ workspace:libs/langchain-openai [773c0] β”œβ”€ @langchain/textsplitters@workspace:* β†’ workspace:libs/langchain-textsplitters [773c0] β”œβ”€ @swc/core@npm:^1.3.90 β†’ npm:1.3.90 [6dbfd] β”œβ”€ @swc/jest@npm:^0.2.29 β†’ npm:0.2.29 [6dbfd] β”œβ”€ @typescript-eslint/eslint-plugin@npm:^5.58.0 β†’ npm:5.58.0 [6d556] β”œβ”€ @typescript-eslint/parser@npm:^5.58.0 β†’ npm:5.58.0 [6d556] β”œβ”€ eslint-config-airbnb-base@npm:^15.0.0 β†’ npm:15.0.0 [6d556] β”œβ”€ eslint-config-prettier@npm:^8.6.0 β†’ npm:8.7.0 [6dbfd] β”œβ”€ eslint-plugin-import@npm:^2.27.5 β†’ npm:2.27.5 [6d556] β”œβ”€ eslint-plugin-jest@npm:^27.6.0 β†’ npm:27.6.0 [6d556] β”œβ”€ eslint-plugin-prettier@npm:^4.2.1 β†’ npm:4.2.1 [6dbfd] β”œβ”€ jest@npm:^29.5.0 β†’ npm:29.7.0 [6dbfd] β”œβ”€ langsmith@npm:^0.1.56 β†’ npm:0.1.56 [0b9fc] β”œβ”€ ts-jest@npm:^29.1.0 β†’ npm:29.1.0 [221f6] β”œβ”€ typeorm@npm:^0.3.20 β†’ npm:0.3.20 [3a972] └─ zod-to-json-schema@npm:^3.22.3 β†’ npm:3.22.3 [221f6]

dosubot[bot] commented 1 month ago

Hey there, @sam-trost! I'm here to help you with any bugs, questions, or contributions. Let's work on solving the issue together.

I found a similar discussion regarding the "TypeError: Cannot read properties of undefined (reading 'toString')" error in the PineconeStore when handling null pageContent. The issue is that the current implementation assumes pageContent is always defined, which can lead to this error if it is null.

To resolve this, you can modify the code in the _formatMatches method to handle null pageContent by providing a default value. Here's a possible solution:

private _formatMatches(
  matches: ScoredPineconeRecord<RecordMetadata>[] = []
): [Document, number][] {
  const documentsWithScores: [Document, number][] = [];

  for (const record of matches) {
    const {
      id,
      score,
      metadata: { [this.textKey]: pageContent, ...metadata } = {
        [this.textKey]: "",
      },
    } = record;

    if (score) {
      documentsWithScores.push([
        new Document({
          id,
          pageContent: pageContent ? pageContent.toString() : "", // Provide a default empty string if pageContent is null
          metadata,
        }),
        score,
      ]);
    }
  }

  return documentsWithScores;
}

This modification ensures that if pageContent is null, an empty string is used instead, preventing the toString method from being called on an undefined value [1].

To continue talking to Dosu, mention @dosu.