langchain-ai / langchain-postgres

LangChain abstractions backed by Postgres Backend
MIT License
133 stars 48 forks source link

Enable Deletion of Vectors by Metadata in PGVector #128

Open shamspias opened 1 month ago

shamspias commented 1 month ago

This pull request adds support for deleting vectors based on metadata filters in the PGVector class. It modifies the delete and adelete methods to accept an optional filter parameter, allowing users to specify conditions on metadata fields for deletion.

Changes

Code Changes

delete Method

def delete(
    self,
    ids: Optional[List[str]] = None,
    *,
    filter: Optional[Dict[str, Any]] = None,
    collection_only: bool = False,
    **kwargs: Any,
) -> None:
    if ids is None and filter is None:
        self.logger.warning("No ids or filter provided for deletion.")
        return

    with self._make_sync_session() as session:
        stmt = delete(self.EmbeddingStore)
        if collection_only:
            collection = self.get_collection(session)
            if not collection:
                self.logger.warning("Collection not found.")
                return
            stmt = stmt.where(self.EmbeddingStore.collection_id == collection.uuid)

        if ids:
            stmt = stmt.where(self.EmbeddingStore.id.in_(ids))

        if filter:
            filter_clause = self._create_filter_clause(filter)
            stmt = stmt.where(filter_clause)

        session.execute(stmt)
        session.commit()

adelete Method

async def adelete(
    self,
    ids: Optional[List[str]] = None,
    *,
    filter: Optional[Dict[str, Any]] = None,
    collection_only: bool = False,
    **kwargs: Any,
) -> None:
    if ids is None and filter is None:
        self.logger.warning("No ids or filter provided for deletion.")
        return

    await self.__apost_init__()
    async with self._make_async_session() as session:
        stmt = delete(self.EmbeddingStore)
        if collection_only:
            collection = await self.aget_collection(session)
            if not collection:
                self.logger.warning("Collection not found.")
                return
            stmt = stmt.where(self.EmbeddingStore.collection_id == collection.uuid)

        if ids:
            stmt = stmt.where(self.EmbeddingStore.id.in_(ids))

        if filter:
            filter_clause = self._create_filter_clause(filter)
            stmt = stmt.where(filter_clause)

        await session.execute(stmt)
        await session.commit()

Usage Example

# Delete vectors where the 'category' metadata field equals 'news'
vector_store.delete(filter={"category": {"$eq": "news"}})

Testing

Documentation

Reference Issues: