chrthomsen / pygrametl

Official repository for pygrametl - ETL programming in Python
http://pygrametl.org
BSD 2-Clause "Simplified" License
289 stars 41 forks source link

Cache optimization use sqlite3 #36

Closed qianxuanyon closed 3 years ago

qianxuanyon commented 3 years ago

The sqlite database has a memory model and a file model We can write the table to sqlite when using the cache to achieve a complete SQL query

This is my own implementation Another thing to pay attention to is the conversion of data types.

class Dimension(table_basic):
    def __init__(self,conn,table_name,pks=[],atts = [],lookup_atts = [],cache=False,next_pk = False,nexts = [],batch_size =1,default_values={},**kwargs):
        super().__init__(conn,table_name,columns = atts,pks = pks,lookup_cols = lookup_atts,batch_size=batch_size,default_values=default_values,**kwargs)
        self.cache = cache
        self._next_pk = next_pk
        self.nexts = {}
        self._nexts = nexts
        self.next_id = 0
        if cache:
            cache_conn = Cache_database(table_name = table_name,columns = atts)
            self.cache_conn = cache_conn
            self.cache_tag = table_basic(cache_conn,table_name = table_name,columns = atts,pks = pks,lookup_cols = lookup_atts)
            src = source_sql(conn,'select * from ({})'.format(table))
            for row in src:
                self.cache_tag.insert(row)
            self.cache_tag.endload()

    def lookup(self,row,rename={}):
        if self.cache:
            res = self.cache_tag.lookup(row,rename)
        if res:
            return res
        else:
            res = super().lookup(row,rename)

        if not res and self._next_pk:
            self.next_id = get_table_max(self.conn,self.table_name,self._next_pk[0])+1

        for n in self._nexts:
            next_id = get_table_max(self.conn,self.table_name,n)
            if isinstance(Tint(next_id),int):
                self.nexts[n] = next_id+1
        return res