Open simonw opened 1 year ago
max_tokens
seems like the right approach, this change gets me better results
diff --git a/llm_llama_cpp.py b/llm_llama_cpp.py
index f2fc977..09c00f6 100644
--- a/llm_llama_cpp.py
+++ b/llm_llama_cpp.py
@@ -234,7 +234,7 @@ class LlamaModel(llm.Model):
response._prompt_json = {"prompt_bits": prompt_bits}
else:
prompt_text = prompt.prompt
- stream = llm_model(prompt_text, stream=True)
+ stream = llm_model(prompt_text, stream=True, max_tokens=4000)
for item in stream:
# Each item looks like this:
# {'id': 'cmpl-00...', 'object': 'text_completion', 'created': .., 'model': '/path', 'choices': [
Using llm -m l2c '400 names for a cat'
I get the following:
I'm glad you're interested in finding a unique name for your feline friend! Here are 400 creative and fun name suggestions for cats:
I'm glad you're interested in finding a unique name for your feline friend! Here are 400 creative and fun name suggestions for cats:
Only 98 names for some reason from Llama2 but many more tokens than the 110ish I was getting before
Also running into this -- responses getting truncated; this is an amazing tool already though :)
This seems to help:
diff --git a/llm_llama_cpp.py b/llm_llama_cpp.py
index f2fc977..62f716b 100644
--- a/llm_llama_cpp.py
+++ b/llm_llama_cpp.py
@@ -226,7 +226,9 @@ class LlamaModel(llm.Model):
def execute(self, prompt, stream, response, conversation):
with SuppressOutput(verbose=prompt.options.verbose):
llm_model = Llama(
- model_path=self.path, verbose=prompt.options.verbose, n_ctx=4000
+ model_path=self.path,
+ verbose=prompt.options.verbose,
+ n_ctx=4000,
)
if self.is_llama2_chat:
prompt_bits = self.build_llama2_chat_prompt(prompt, conversation)
@@ -234,7 +236,7 @@ class LlamaModel(llm.Model):
response._prompt_json = {"prompt_bits": prompt_bits}
else:
prompt_text = prompt.prompt
- stream = llm_model(prompt_text, stream=True)
+ stream = llm_model(prompt_text, stream=True, max_tokens=4000)
for item in stream:
# Each item looks like this:
# {'id': 'cmpl-00...', 'object': 'text_completion', 'created': .., 'model': '/path', 'choices': [
I ran into this problem immediately with local models and this fixed it FWIW.
https://twitter.com/mullinsms/status/1686480711211945984
Solution may be the
max_tokens
parameter.