langchain-ai / langchain

🦜🔗 Build context-aware reasoning applications
https://python.langchain.com
MIT License
92.62k stars 14.83k forks source link

Vector store: MongoDB #26772

Open j4kaizen opened 8 hours ago

j4kaizen commented 8 hours ago

Checked other resources

Example Code

I am having an error with Mongodb (I use MongoDB compass), I also tried following the default of the documentation guide but still got an error: https://js.langchain.com/docs/integrations/vectorstores/mongodb_atlas/ I connect to db, to collection, I create a new collection, and add data to the collection, but I can not query, I use similaritySearch: Code: import { OnApplicationBootstrap } from '@nestjs/common' import { MongoClient } from 'mongodb' import { OpenAIEmbeddings } from '@langchain/openai' import { MongoDBAtlasVectorSearch } from '@langchain/mongodb'

export class MongodbService implements OnApplicationBootstrap { async initializeMongodb(collectionName: string) { const embeddingModel = new OpenAIEmbeddings({ modelName: process.env.EMBEDDING_MODEL_NAME || 'text-embedding-3-large', // openAIApiKey: process.env.OPEN_AI_API_KEY, openAIApiKey: Buffer.from(process.env.OPEN_AI_API_KEY, 'base64').toString('utf-8'), }) const client = new MongoClient(process.env.DATABASE_URL || '') const collection = client.db(process.env.DATABASE_NAME).collection(collectionName) const vectorStore = new MongoDBAtlasVectorSearch(embeddingModel, { collection: collection, indexName: 'vector_index', textKey: 'text', embeddingKey: 'embedding', }) return vectorStore }

async seedData(collectionName: string, docName: string, documents = []) {
    const vectorStore = await this.initializeMongodb(collectionName)
    const data = documents.map((doc) => ({
        ...doc,
        metadata: {
            ...doc.metadata,
            docName,
        },
    }))
    await vectorStore.addDocuments(data)
}

async queryVector(collectionName: string, question: string) {
    const vectorStore = await this.initializeMongodb(collectionName)
    return vectorStore.similaritySearch(question, 2)
}

async onApplicationBootstrap() {
    // await this.initializeMongodb()
    // console.log('111')
    // await this.seedData()
    // console.log('2222')
    // console.log('as: ', await this.queryVector('', ''))
}

}

Error Message and Stack Trace (if applicable)

No response

Description

I have tried many times

System Info

Package.json file: { "name": "be-ai-chatbot", "version": "0.0.1", "description": "", "author": "", "private": true, "license": "UNLICENSED", "scripts": { "build": "nest build", "format": "prettier --write \"src//*.ts\" \"test/*/.ts\"", "start": "nest start", "start:dev": "nest start --watch", "start:debug": "nest start --debug --watch", "start:prod": "node dist/main", "lint": "eslint \"{src,apps,libs,test}//.ts\" --fix", "test": "jest", "test:watch": "jest --watch", "test:cov": "jest --coverage", "test:debug": "node --inspect-brk -r tsconfig-paths/register -r ts-node/register node_modules/.bin/jest --runInBand", "test:e2e": "jest --config ./test/jest-e2e.json", "migrate:dev": "prisma migrate dev", "generate:prisma": "prisma generate", "make-migration": "yarn prisma migrate dev --create-only --schema prisma/schema.prisma", "generate:view": "yarn tsc prisma/generate_view.ts", "migrate": "yarn prisma migrate deploy --schema prisma/schema.prisma" }, "dependencies": { "@langchain/community": "^0.3.0", "@langchain/core": "^0.2.32", "@langchain/mongodb": "^0.1.0", "@langchain/openai": "^0.3.0", "@nestjs/common": "^10.0.0", "@nestjs/config": "^3.2.3", "@nestjs/core": "^10.0.0", "@nestjs/event-emitter": "^2.0.4", "@nestjs/jwt": "^10.2.0", "@nestjs/passport": "^10.0.3", "@nestjs/platform-express": "^10.0.0", "@nestjs/platform-socket.io": "^10.4.1", "@nestjs/schedule": "^4.1.0", "@nestjs/swagger": "^7.4.0", "@nestjs/websockets": "^10.4.1", "@prisma/client": "5.18.0", "@zilliz/milvus2-sdk-node": "^2.4.8", "axios": "^1.7.4", "bcrypt": "^5.1.1", "bcryptjs": "^2.4.3", "class-transformer": "^0.5.1", "class-validator": "^0.14.1", "dotenv": "^16.4.5", "html-to-text": "^9.0.5", "jsdom": "^25.0.0", "langchain": "^0.2.19", "lodash": "^4.17.21", "moment": "^2.30.1", "mongodb": "^6.9.0", "officeparser": "^4.1.1", "passport": "^0.7.0", "passport-google-oauth20": "^2.0.0", "passport-jwt": "^4.0.1", "passport-oauth2": "^1.8.0", "passport-twitter": "^1.0.4", "puppeteer": "^23.3.0", "reflect-metadata": "^0.2.0", "rxjs": "^7.8.1", "socket.io": "^4.7.5" }, "devDependencies": { "@nestjs/cli": "^10.0.0", "@nestjs/schematics": "^10.0.0", "@nestjs/testing": "^10.0.0", "@types/bcrypt": "^5.0.2", "@types/express": "^4.17.17", "@types/jest": "^29.5.2", "@types/lodash": "^4.17.7", "@types/multer": "^1.4.12", "@types/node": "^20.3.1", "@types/passport-google-oauth20": "^2.0.16", "@types/supertest": "^6.0.0", "@typescript-eslint/eslint-plugin": "^7.0.0", "@typescript-eslint/parser": "^7.0.0", "eslint": "^8.42.0", "eslint-config-prettier": "^9.0.0", "eslint-plugin-prettier": "^5.0.0", "jest": "^29.5.0", "prettier": "^3.0.0", "prisma": "^5.18.0", "source-map-support": "^0.5.21", "supertest": "^7.0.0", "ts-jest": "^29.1.0", "ts-loader": "^9.4.3", "ts-node": "^10.9.1", "tsconfig-paths": "^4.2.0", "typescript": "^5.1.3" }, "jest": { "moduleFileExtensions": [ "js", "json", "ts" ], "rootDir": "src", "testRegex": ".\.spec\.ts$", "transform": { "^.+\.(t|j)s$": "ts-jest" }, "collectCoverageFrom": [ "*/.(t|j)s" ], "coverageDirectory": "../coverage", "testEnvironment": "node" } }

j4kaizen commented 3 hours ago

I tried using similaritySearchVectorWithScore but it still bug. Code: async queryVector(collectionName: string, question?: string) { const vectorStore = await this.initializeMongodb(collectionName) const embeddingModel = new OpenAIEmbeddings({ modelName: process.env.EMBEDDING_MODEL_NAME || 'text-embedding-3-large', // openAIApiKey: process.env.OPEN_AI_API_KEY, openAIApiKey: Buffer.from(process.env.OPEN_AI_API_KEY, 'base64').toString('utf-8'), }) const vector = await embeddingModel.embedQuery(question) console.log('vector: ', vector) const response = await vectorStore.similaritySearchVectorWithScore(vector, 3) console.log('response: ', response) return response }