Closed makkarss929 closed 6 months ago
import sentence_transformers
from superduperdb import Model, ObjectModel, vector
Use datatype and encoder both
encoder = datatype = vector(shape=(1024,))
. both should be same
model = Model(
identifier='embedding',
object=sentence_transformers.SentenceTransformer('BAAI/bge-large-en-v1.5'),
encoder=vector(shape=(1024,)),
predict_method='encode', # Specify the prediction method
postprocess=lambda x: x.tolist(), # Define postprocessing function
batch_predict=True, # Generate predictions for a set of observations all at once
datatype=vector(shape=(1024,))
)
import sentence_transformers
from superduperdb import Model, ObjectModel, vector
Use datatype and encoder both
encoder = datatype = vector(shape=(1024,))
. both should be same
model = Model(
identifier='embedding',
object=sentence_transformers.SentenceTransformer('BAAI/bge-large-en-v1.5'),
encoder=vector(shape=(1024,)),
predict_method='encode', # Specify the prediction method
postprocess=lambda x: x.tolist(), # Define postprocessing function
batch_predict=True, # Generate predictions for a set of observations all at once
datatype=vector(shape=(1024,))
)
Contact Details [Optional]
makkarss929@gmail.com
System Information
What happened?
ValueError Traceback (most recent call last) Cell In[10], line 5 2 from superduperdb import VectorIndex 4 # Add a VectorIndex to the SuperDuperDB database with the specified identifier and indexing listener ----> 5 _ = db.add( 6 VectorIndex( 7 identifier='my-index', # Unique identifier for the VectorIndex 8 indexing_listener=listener # Listener to be used for indexing documents 9 ) 10 )
File ~/Desktop/superduperDB/superduperdb/superduperdb/base/datalayer.py:478, in Datalayer.add(self, object, dependencies) 470 return type(object)( 471 self._add( 472 object=component, (...) 475 for component in object 476 ) 477 elif isinstance(object, Component): --> 478 return self._add(object=object, dependencies=dependencies), object 479 else: 480 return self._add(superduper(object)), object
File ~/Desktop/superduperDB/superduperdb/superduperdb/base/datalayer.py:861, in Datalayer._add(self, object, dependencies, parent) 859 object.post_create(self) 860 self._add_component_to_cache(object) --> 861 these_jobs = object.schedule_jobs(self, dependencies=dependencies) 862 jobs.extend(these_jobs) 863 return jobs
File ~/Desktop/superduperDB/superduperdb/superduperdb/components/vector_index.py:198, in VectorIndex.schedule_jobs(self, db, dependencies) 188 if not db.cdc.running: 189 job = FunctionJob( 190 callable=copy_vectors, 191 args=[], (...) 196 }, 197 ) --> 198 job(db, dependencies=dependencies) 199 return [job] 200 return []
File ~/Desktop/superduperDB/superduperdb/superduperdb/jobs/job.py:146, in FunctionJob.call(self, db, dependencies) 143 self.db = db 144 db.metadata.create_job(self.dict()) --> 146 self.submit(dependencies=dependencies) 147 return self
File ~/Desktop/superduperDB/superduperdb/superduperdb/jobs/job.py:124, in FunctionJob.submit(self, dependencies) 118 def submit(self, dependencies=()): 119 """ 120 Submit job for execution 121 122 :param dependencies: list of dependencies 123 """ --> 124 self.future = self.db.compute.submit( 125 callable_job, 126 cfg=s.CFG.dict(), 127 function_to_call=self.callable, 128 job_id=self.identifier, 129 args=self.args, 130 kwargs=self.kwargs, 131 dependencies=dependencies, 132 db=self.db if self.db.compute.type == 'local' else None, 133 ) 135 return
File ~/Desktop/superduperDB/superduperdb/superduperdb/backends/local/compute.py:35, in LocalComputeBackend.submit(self, function, compute_kwargs, *args, *kwargs) 29 """ 30 Submits a function for local execution. 31 32 :param function: The function to be executed. 33 """ 34 logging.info(f"Submitting job. function:{function}") ---> 35 future = function(args, **kwargs) 37 future_key = str(uuid.uuid4()) 38 self.__outputs[future_key] = future
File ~/Desktop/superduperDB/superduperdb/superduperdb/jobs/tasks.py:107, in callable_job(cfg, function_to_call, args, kwargs, job_id, dependencies, db) 105 db.metadata.update_job(job_id, 'status', 'failed') 106 db.metadata.update_job(job_id, 'msg', tb) --> 107 raise e 108 else: 109 db.metadata.update_job(job_id, 'status', 'success')
File ~/Desktop/superduperDB/superduperdb/superduperdb/jobs/tasks.py:102, in callable_job(cfg, function_to_call, args, kwargs, job_id, dependencies, db) 100 output = None 101 try: --> 102 output = function_to_call(*args, db=db, **kwargs) 103 except Exception as e: 104 tb = traceback.format_exc()
File ~/Desktop/superduperDB/superduperdb/superduperdb/vector_search/update_tasks.py:85, in copy_vectors(vector_index, query, ids, db) 82 r['vector'] = r['vector'].numpy() 84 if vectors: ---> 85 db.fast_vector_searchers[vi.identifier].add( 86 [VectorItem(**vector) for vector in vectors] 87 )
File ~/Desktop/superduperDB/superduperdb/superduperdb/base/datalayer.py:1067, in LoadDict.missing(self, key) 1065 msg = f'callable is
None
for {key}' 1066 assert self.callable is not None, msg -> 1067 value = self[key] = self.callable(key) 1068 return valueFile ~/Desktop/superduperDB/superduperdb/superduperdb/base/datalayer.py:132, in Datalayer.initialize_vector_searcher(self, identifier, searcher_type, backfill) 129 clt = vi.indexing_listener.select.table_or_collection 131 vector_search_cls = vector_searcher_implementations[searcher_type] --> 132 vector_comparison = vector_search_cls.from_component(vi) 134 assert isinstance(clt.identifier, str), 'clt.identifier must be a string' 136 self.backfill_vector_search(vi, vector_comparison)
File ~/Desktop/superduperDB/superduperdb/superduperdb/vector_search/base.py:19, in BaseVectorSearcher.from_component(cls, vi) 16 @classmethod 17 def from_component(cls, vi: 'VectorIndex'): 18 return cls( ---> 19 identifier=vi.identifier, dimensions=vi.dimensions, measure=vi.measure 20 )
File ~/Desktop/superduperDB/superduperdb/superduperdb/components/vector_index.py:173, in VectorIndex.dimensions(self) 171 if shape := getattr(self.indexing_listener.model.datatype, 'shape', None): 172 return shape[-1] --> 173 raise ValueError('Couldn\'t get shape of model outputs from model encoder')
ValueError: Couldn't get shape of model outputs from model encoder
Steps to reproduce
...
Relevant log output
No response