Closed uogbuji closed 5 months ago
Notes from testing:
Grab a history key via: select distinct history_key from oorichat_v0_7_2_chatlog;
Query to isolate the most recent 2 messages per a key. The logic will select for delete all other messages with that key.
SELECT ts
FROM oorichat_v0_7_5_chatlog
WHERE
history_key = 'af251429-ca09-4340-a477-cc82e7820ed8'
ORDER BY
ts DESC
LIMIT 2;
Naive & probably too slow approach:
DELETE FROM oorichat_v0_7_5_chatlog
WHERE
history_key = 'af251429-ca09-4340-a477-cc82e7820ed8'
AND
ts NOT IN (
SELECT ts
FROM oorichat_v0_7_2_chatlog
WHERE
history_key = 'af251429-ca09-4340-a477-cc82e7820ed8'
ORDER BY
ts DESC
LIMIT 10
);
Experimenting with the ctid
approach, which shoul dbe faster.
SELECT ctid, ts, content FROM oorichat_v0_7_5_chatlog WHERE history_key = '39d56d4a-a3d6-4cce-886f-9bece4ecfd0a' ORDER BY ts DESC LIMIT 2;
Attempt at the ctid
approach:
DELETE FROM oorichat_v0_7_5_chatlog
WHERE history_key = '39d56d4a-a3d6-4cce-886f-9bece4ecfd0a'
AND ctid NOT IN (
SELECT t_inner.ctid FROM oorichat_v0_7_5_chatlog t_inner
WHERE t_inner.history_key = '39d56d4a-a3d6-4cce-886f-9bece4ecfd0a'
ORDER BY t_inner.ts
DESC LIMIT 2);
Adminer was bugging out, so wrote some Python code to work some things out:
import os
import uuid
from ogbujipt.embedding.pgvector import DocDB, MessageDB
from sentence_transformers import SentenceTransformer
HOST = os.environ.get('PG_HOST', 'localhost')
DB_NAME = os.environ.get('PG_DATABASE', 'mock_db')
USER = os.environ.get('PG_USER', 'mock_user')
PASSWORD = os.environ.get('PG_PASSWORD', 'mock_password')
PORT = os.environ.get('PG_PORT', 5432)
e_model = SentenceTransformer('all-MiniLM-L6-v2') # Load the embedding model
MESSAGES = [ # Test data: history_key, role, content, timestamp, metadata
('00000000-0000-0000-0000-000000000000', 'ama', 'Hello Eme!', '2021-10-01 00:00:00+00:00', {'1': 'a'}),
('00000000-0000-0000-0000-000000000001', 'ugo', 'Greetings Ego', '2021-10-01 00:00:01+00:00', {'2': 'b'}),
('00000000-0000-0000-0000-000000000000', 'eme', 'How you dey, Ama!', '2021-10-01 00:00:02+00:00', {'3': 'c'}),
('00000000-0000-0000-0000-000000000001', 'ego', 'What a pleasant surprise', '2021-10-01 00:00:03+00:00', {'4': 'd'}), # noqa: E501
('00000000-0000-0000-0000-000000000000', 'ama', 'Not bad, not bad at all', '2021-10-01 00:00:04+00:00', {'5': 'e'}), # noqa: E501
('00000000-0000-0000-0000-000000000001', 'ugo', 'Glad you think so. I was planning to drop by later', '2021-10-01 00:00:05+00:00', {'6': 'f'}), # noqa: E501
('00000000-0000-0000-0000-000000000000', 'eme', 'Very good. Say hello to your family for me.', '2021-10-01 00:00:06+00:00', {'7': 'g'}), # noqa: E501
('00000000-0000-0000-0000-000000000001', 'ugo', 'An even better surprise, I hope!', '2021-10-01 00:00:07+00:00', {'8': 'h'}) # noqa: E501
]
return [(history_key, role, content, datetime.fromisoformat(timestamp), metadata)
for history_key, role, content, timestamp, metadata in messages
]
DELETE_OLDEST_MESSAGES = '''-- Delete oldest messages for given history key, such that only the newest N messages remain
DELETE FROM {table_name}
WHERE
history_key = {hk}
AND
ts NOT IN (
SELECT ts
FROM {table_name}
WHERE
history_key = {hk}
ORDER BY
ts DESC
LIMIT {window}
);
'''
async def trial1():
mdb = await MessageDB.from_conn_params(
embedding_model=e_model,
table_name='baseball',
user=USER,
password=PASSWORD,
db_name=DB_NAME,
host=HOST,
port=int(PORT),
window=2
)
for index, (row) in enumerate(MESSAGES):
await mdb.insert(*row)
hk = MESSAGES[0][0]
async with mdb.pool.acquire() as conn:
async with conn.transaction():
await conn.execute(
DELETE_OLDEST_MESSAGES.format(table_name=mdb.table_name, hk=hk, window=2))
Was a bit easier in the end to use sync psycopg2 rather than async for quick experiments.
# pip install psycopg2-binary
# https://www.psycopg.org/docs/usage.html
import psycopg2
conn = psycopg2.connect("dbname=PGv user=oori password=example host=localhost")
cur = conn.cursor()
cur.execute('SELECT DISTINCT history_key from oorichat_v0_7_5_chatlog;')
result = cur.fetchall()
cur.close()
cur = conn.cursor()
cur.execute("SELECT count(*) from oorichat_v0_7_5_chatlog WHERE history_key = '186f516b-9f47-4a24-bb38-f9d47a700f4d';")
result = cur.fetchall()
cur.close()
This is ready to review, @choccccy