Open eunja511005 opened 7 months ago
!pip install langchain
!pip install chromadb
!pip install sentence-transformers
import os
from google.colab import userdata
from langchain_community.document_loaders import WebBaseLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain_community.document_loaders import WebBaseLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain_community.embeddings import OllamaEmbeddings, HuggingFaceEmbeddings
from langchain_community.vectorstores import Chroma
hf_token = userdata.get('HF_TOKEN')
loader = WebBaseLoader(
web_paths=("https://raw.githubusercontent.com/puzzlet/constitution-kr/master/%EB%8C%80%ED%95%9C%EB%AF%BC%EA%B5%AD%20%ED%97%8C%EB%B2%95.txt",),
)
raw_documents = loader.load()
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
splits = text_splitter.split_documents(raw_documents )
len(splits)
raw_documents = loader.load()
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
splits = text_splitter.split_documents(raw_documents )
len(splits)
# embeddings = HuggingFaceEmbeddings()
embeddings = HuggingFaceEmbeddings(model_name="google/gemma-1.1-2b-it")
db = Chroma.from_documents(splits, embeddings)
retriver = db.as_retriever(search_kwargs={"k": 5})
relevants = retriver.get_relevant_documents("대통령 임기는?")
i = 1
for relevant in relevants:
print(f"{i}번째 페이지 : ")
print("***" * 20)
print(relevant.page_content)
i += 1
https://www.linkedin.com/pulse/exploring-googles-gemma-2b-new-era-language-models-rany-lxhlc
https://limitsinx.tistory.com/67 ④ GPU 환경세팅 : 런타임 -> 런타임 유형 변경 -> T4 GPU 선택 후 저장