Closed yhygta closed 1 year ago
finetuner.run(epochs=1, batch_size=6)
่ฟๆฏ็ฑไบไธไธช่พน็ๆกไปถๅฏผ่ด็ bug๏ผๅ ไธบไฝ ็ๅพฎ่ฐๆฐๆฎ้ๅคชๅฐไบ๏ผ็ปไธๆไธไธช batch ๏ผ็ปๆ drop_last ๆ่ฟไธช batch ็ดๆฅ drop ๆไบ๏ผๆๅจ่ฐๅฐ batch_size ๅฏไปฅ่งฃๅณ่ฟไธช้ฎ้ข๏ผๅฐฑๅไธ้ข็ไปฃ็ ไธๆ ทใไนๅ็็ๆฌๆไผๅขๅ ๆดๆๆ็้่ฏฏไฟกๆฏใ
ๅฅฝ็๏ผๆ่ฐขๅคงไฝฌ๏ผไธ้ข็่ฟๆ ทๅฏไปฅ็จไบ
from datasets import load_dataset from uniem.finetuner import FineTuner dataset = load_dataset('/opt/dl/llm/langchain-ChatGLM/m3ef/ft4/',encoding="GBK") finetuner = FineTuner('/media/root/_data01/ai/embeddings/m3e-base/', dataset=dataset) finetuner.run(epochs=1, output_dir='finetuned-model-riddle', batch_size=5)
ft4.csvๆพๅจft4ๆไปถๅคนไธ๏ผๅ ๅฎนๅฆไธ๏ผ
sentence1,sentence2,label ้้็ด ๆไน็จ,็บข้็ด ็ไฝฟ็จๆนๆณ,0 ้้็ด ๆไน็จ,ๅฆฅๅธ้็ด ็ไฝฟ็จๆนๆณ0 ้้็ด ๆไน็จ,็กซ้ ธๆฐ้็ด ็ไฝฟ็จๆนๆณ,0 ้้็ด ๆไน็จ,็ฐ้ป้็ด ็ไฝฟ็จๆนๆณ,0 ้้็ด ๆไน็จ,ๆฒ้็ด ็ไฝฟ็จๆนๆณ,0 ้้็ด ๆไน็จ,ๅคๆนๆฐ้็ด ็ไฝฟ็จๆนๆณ,0 ้้็ด ๆไน็จ,ๅบๅคง้็ด ็ไฝฟ็จๆนๆณ,0 ้้็ด ๆไน็จ,ๅ ๆ้็ด ็ไฝฟ็จๆนๆณ,0 ้้็ด ๆไน็จ,้ปๆฒ้็ด ็ไฝฟ็จๆนๆณ,0 ้้็ด ๆไน็จ,้้็ด ็ไฝฟ็จๆนๆณ,0 ้้็ด ๆไน็จ,ๅ้็ด ็ไฝฟ็จๆนๆณ,0 ้้็ด ๆไน็จ,้ฟๅฅ้็ด ็ไฝฟ็จๆนๆณ,0 ้้็ด ๆไน็จ,ๆๅฏ้็ด ็ไฝฟ็จๆนๆณ,0
ไฝๆฏๆๆไธ้ข้ฃไธชcsvๆไปถๅญไธๆฅ๏ผไปฅ่ฟไธชไปฃ็ ๏ผhttps://github.com/wangyuxinwhy/uniem/issues/34#issuecomment-1623492087๏ผๆฅๅ๏ผไผๆฅ้TypeError: must be real number, not NoneType
่ฝๆๆฅ้็ไฟกๆฏ่ดดไธไธๅ๏ผ็ฐๅจ่ฟไธชๆฅ้็ไธๅบๆฅๅฅใ
ๅฆๅค๏ผๅพฎ่ฐๆฏๅบไบๅฏนๆฏๅญฆไน ็๏ผไฝไฝ ็ label ้ฝไธบ 0 ๏ผๆฏๆฒกๆๅๆณ่ฟ่กๅฏนๆฏ็ใ้่ฆๆทปๅ ไธไบๆญฃไพ
ๅฅฝ็๏ผๆ่ฐขๆ้ๅ๏ผๆๆฏๅ็ฐX้็ด ๅจLLMไผๆททๅจไธ่ตท๏ผๆฏๅธๆๅจembedding้้ข่ฝๅๅผ๏ผๆไปฅๅไบ0๏ผ่ฟ็งๆ ๅตไธๆๆณๅขๅคงๅบๅๅบฆ๏ผ่ฏท้ฎ่ฏฅๆไนๅค็ๅข๏ผ
ๆฅ้ไฟกๆฏๅฆไธ๏ผ
0it [7:30:19, ?it/s]
โญโโโโโโโโโโโโโโโโโโโโโ Traceback (most recent call last) โโโโโโโโโโโโโโโโโโโโโโโฎ
โ /home/epetai01/anaconda3/envs/cg2_310/lib/python3.10/code.py:90 in runcode โ
โ โ
โ 87 โ โ โ
โ 88 โ โ """ โ
โ 89 โ โ try: โ
โ โฑ 90 โ โ โ exec(code, self.locals) โ
โ 91 โ โ except SystemExit: โ
โ 92 โ โ โ raise โ
โ 93 โ โ except: โ
โ :1 in
็่ตทๆฅๆฏๆไธไธชๆ ทๆฌ็ๆ ็ญพๆฏ None ๏ผไฝ ๅฏไปฅๆๅฐ dataset ไธญ็ๅ จ้จๆ ทๆฌ๏ผๆฃๆฅไธไธๆฏไธๆฏๆๆ ทๆฌ็ๆ ็ญพๆฏ Noneใ
็กฎๅฎๆฏๆNone๏ผๆ่ฐขๅคงไฝฌ่งฃ็ญ
๐ bug ่ฏดๆ
ๆจๅฅฝ๏ผ็ ง็ๅฎ็ฝ็ๆ็จ๏ผไฝฟ็จไบ่ชๅทฑ็ๆฐๆฎ้๏ผfinetuneๆถๆฅ้ argument after ** must be a mapping, not NoneType ๆ่ฐขๅคงไฝฌไปฌ็็๏ผๅไธไธชissueไธๅฐๅฟๅ ณ้ญไบ ไปฃ็ ๅฆไธ๏ผ
import pandas as pd from uniem.finetuner import FineTuner df3_raw = """{"sentence1":"้้็ด ", "sentence2":"็บข้็ด ", "label":"0.0"} {"sentence1":"้้็ด ", "sentence2":"ๅฆฅๅธ้็ด ", "label":"0.0"} {"sentence1":"้้็ด ", "sentence2":"็กซ้ ธๆฐ้็ด ", "label":"0.0"} {"sentence1":"้้็ด ", "sentence2":"็ฐ้ป้็ด ", "label":"0.0"} {"sentence1":"้้็ด ", "sentence2":"ๆฒ้็ด ", "label":"0.0"} {"sentence1":"้้็ด ", "sentence2":"ๅคๆนๆฐ้็ด ", "label":"0.0"} {"sentence1":"้้็ด ", "sentence2":"ๅบๅคง้็ด ", "label":"0.0"} {"sentence1":"้้็ด ", "sentence2":"ๅ ๆ้็ด ", "label":"0.0"} {"sentence1":"้้็ด ", "sentence2":"้ปๆฒ้็ด ", "label":"0.0"} {"sentence1":"้้็ด ", "sentence2":"้้็ด ", "label":"0.0"} {"sentence1":"้้็ด ", "sentence2":"ๅ้็ด ", "label":"0.0"} {"sentence1":"้้็ด ", "sentence2":"้ฟๅฅ้็ด ", "label":"0.0"} {"sentence1":"้้็ด ", "sentence2":"ๆๅฏ้็ด ", "label":"0.0"} {"sentence1":"้้็ด ", "sentence2":"ๆฐฏ้็ด ", "label":"0.0"} {"sentence1":"้้็ด ", "sentence2":"่บๆ้็ด ", "label":"0.0"}""" df3 = pd.read_json(df3_raw, lines=True) finetuner = FineTuner('moka-ai3/m3e-base/', dataset=df3.to_dict('records')) finetuner.run(epochs=1)
Python Version
3.10