Open NoobPythoner opened 1 month ago
Hello, @NoobPythoner! I'm here to assist you with any bugs, questions, or contributions you may have. Let's work together to resolve the issue you're facing.
To resolve the ModuleNotFoundError
when trying to import JsonlDataset
from cohere.custom_model_dataset
in the CohereRerankerFinetuneEngine.finetune
method, you need to ensure that the cohere
package is installed in your environment. You can install the cohere
package using pip:
pip install cohere
After installing the package, the import statement should work correctly:
from cohere.custom_model_dataset import JsonlDataset
Make sure to run the above pip command in the same environment where your script is being executed. Additionally, ensure that the cohere
module is correctly imported in the CohereRerankerFinetuneEngine
class as shown below:
import importlib.util
import os
from typing import Optional
from llama_index.finetuning.types import BaseCohereRerankerFinetuningEngine
from llama_index.postprocessor.cohere_rerank import CohereRerank
class CohereRerankerFinetuneEngine(BaseCohereRerankerFinetuningEngine):
"""Cohere Reranker Finetune Engine."""
def __init__(
self,
train_file_name: str = "train.jsonl",
val_file_name: Optional[str] = None,
model_name: str = "exp_finetune",
model_type: str = "RERANK",
base_model: str = "english",
api_key: Optional[str] = None,
) -> None:
"""Init params."""
# This will be None if 'cohere' module is not available
cohere_spec = importlib.util.find_spec("cohere")
if cohere_spec is not None:
import cohere
else:
# Raise an ImportError if 'cohere' is not installed
raise ImportError(
"Cannot import cohere. Please install the package using `pip install cohere`."
)
try:
self.api_key = api_key or os.environ["COHERE_API_KEY"]
except IndexError:
raise ValueError(
"Must pass in cohere api key or "
"specify via COHERE_API_KEY environment variable "
)
self._model = cohere.Client(self.api_key, client_name="llama_index")
self._train_file_name = train_file_name
self._val_file_name = val_file_name
self._model_name = model_name
self._model_type = model_type
self._base_model = base_model
self._finetune_model = None
def finetune(self) -> None:
"""Finetune model."""
from cohere.custom_model_dataset import JsonlDataset
if self._val_file_name:
# Uploading both train file and eval file
dataset = JsonlDataset(
train_file=self._train_file_name, eval_file=self._val_file_name
)
else:
# Single Train File Upload:
dataset = JsonlDataset(train_file=self._train_file_name)
self._finetune_model = self._model.create_custom_model(
name=self._model_name,
dataset=dataset,
model_type=self._model_type,
base_model=self._base_model,
)
def get_finetuned_model(self, top_n: int = 5) -> CohereRerank:
"""Gets finetuned model id."""
if self._finetune_model is None:
raise RuntimeError(
"Finetuned model is not set yet. Please run the finetune method first."
)
return CohereRerank(
model=self._finetune_model.id, top_n=top_n, api_key=self.api_key
)
Ensure you are using the correct version of the cohere
package, which should be ^5.2.5
for compatibility with LlamaIndex v0.10.55 [1][2][3][4].
To continue talking to Dosu, mention @dosu.
I have installed the cohere-5.2.5 and still cann't import cohere.custom_model_dataset.I checked the code of cohere and indeed there is no custom_model_dataset and JsonlDataset。。。
I don't think I made such a low-level mistake here.Are you serious?
It appears that the cohere.custom_model_dataset
module and the JsonlDataset
class do not exist in the cohere
package. This is not an issue with your setup but rather a discrepancy in the code or documentation.
As an alternative, you can use the CohereRerankerFinetuneDataset
class within the llama_index
package, which provides similar functionality. Here is an example of how you can modify your code to use this class:
from llama_index.finetuning.rerankers.dataset_gen import CohereRerankerFinetuneDataset
class CohereRerankerFinetuneEngine(BaseCohereRerankerFinetuningEngine):
"""Cohere Reranker Finetune Engine."""
def __init__(
self,
train_file_name: str = "train.jsonl",
val_file_name: Optional[str] = None,
model_name: str = "exp_finetune",
model_type: str = "RERANK",
base_model: str = "english",
api_key: Optional[str] = None,
) -> None:
"""Init params."""
cohere_spec = importlib.util.find_spec("cohere")
if cohere_spec is not None:
import cohere
else:
raise ImportError(
"Cannot import cohere. Please install the package using `pip install cohere`."
)
try:
self.api_key = api_key or os.environ["COHERE_API_KEY"]
except IndexError:
raise ValueError(
"Must pass in cohere api key or "
"specify via COHERE_API_KEY environment variable "
)
self._model = cohere.Client(self.api_key, client_name="llama_index")
self._train_file_name = train_file_name
self._val_file_name = val_file_name
self._model_name = model_name
self._model_type = model_type
self._base_model = base_model
self._finetune_model = None
def finetune(self) -> None:
"""Finetune model."""
if self._val_file_name:
dataset = CohereRerankerFinetuneDataset(
train_file=self._train_file_name, eval_file=self._val_file_name
)
else:
dataset = CohereRerankerFinetuneDataset(train_file=self._train_file_name)
self._finetune_model = self._model.create_custom_model(
name=self._model_name,
dataset=dataset,
model_type=self._model_type,
base_model=self._base_model,
)
This should resolve the issue with the missing JsonlDataset
class [1].
@logan-markewich, this one's a bit out of my wheelhouse, could you step in and provide your guidance?
To continue talking to Dosu, mention @dosu.
Bug Description
llama_index/finetuning/rerankers/cohere_reranker.py:51, in CohereRerankerFinetuneEngine.finetune(self) 49 def finetune(self) -> None: 50 """Finetune model.""" ---> 51 from cohere.custom_model_dataset import JsonlDataset 53 if self._val_file_name: 54 # Uploading both train file and eval file 55 dataset = JsonlDataset( 56 train_file=self._train_file_name, eval_file=self._val_file_name 57 )
ModuleNotFoundError: No module named 'cohere.custom_model_dataset'
Version
0.10.55
Steps to Reproduce
the demo code:
Reranker model with 0 hard negatives.
finetune_model_no_hard_negatives = CohereRerankerFinetuneEngine( train_file_name="train.jsonl", val_file_name="val.jsonl", model_name="lyft_reranker_0_hard_negatives", model_type="RERANK", base_model="english", ) finetune_model_no_hard_negatives.finetune()
Reranker model with 5 hard negatives selected at random
finetune_model_random_hard_negatives = CohereRerankerFinetuneEngine( train_file_name="train_5_random.jsonl", val_file_name="val_5_random.jsonl", model_name="lyft_reranker_5_random_hard_negatives", model_type="RERANK", base_model="english", ) finetune_model_random_hard_negatives.finetune()
Reranker model with 5 hard negatives selected based on cosine similarity
finetune_model_cosine_hard_negatives = CohereRerankerFinetuneEngine( train_file_name="train_5_cosine_similarity.jsonl", val_file_name="val_5_cosine_similarity.jsonl", model_name="lyft_reranker_5_cosine_hard_negatives", model_type="RERANK", base_model="english", ) finetune_model_cosine_hard_negatives.finetune()
Relevant Logs/Tracbacks
No response