Closed abetlen closed 1 week ago
You probably know this already, but the llama_context_params.seed
field should be removed everywhere:
diff --git a/llama_cpp/llama_cpp.py b/llama_cpp/llama_cpp.py
index efec065..fa53135 100644
--- a/llama_cpp/llama_cpp.py
+++ b/llama_cpp/llama_cpp.py
@@ -850,11 +850,10 @@ class llama_model_params(ctypes.Structure):
# };
class llama_context_params(ctypes.Structure):
"""Parameters for llama_context
Attributes:
- seed (int): RNG seed, -1 for random
n_ctx (int): text context, 0 = from model
n_batch (int): logical maximum batch size that can be submitted to llama_decode
n_ubatch (int): physical maximum batch size
n_seq_max (int): max number of sequences (i.e. distinct states for recurrent models)
n_threads (int): number of threads to use for generation
@@ -881,11 +880,10 @@ class llama_context_params(ctypes.Structure):
abort_callback (ggml_abort_callback): abort callback if it returns true, execution of llama_decode() will be aborted
abort_callback_data (ctypes.ctypes.c_void_p): data for abort_callback
"""
if TYPE_CHECKING:
- seed: int
n_ctx: int
n_batch: int
n_ubatch: int
n_seq_max: int
n_threads: int
@@ -911,11 +909,10 @@ class llama_context_params(ctypes.Structure):
flash_attn: bool
abort_callback: Callable[[ctypes.c_void_p], bool]
abort_callback_data: ctypes.c_void_p
_fields_ = [
- ("seed", ctypes.c_uint32),
("n_ctx", ctypes.c_uint32),
("n_batch", ctypes.c_uint32),
("n_ubatch", ctypes.c_uint32),
("n_seq_max", ctypes.c_uint32),
("n_threads", ctypes.c_int32),
@e-c-d yes thank you forgot to push this!
Updates llama-cpp-python to use the new llama.cpp sampler api.
Known Issues: