diff --git a/python/server.py b/python/server.py
index 0efa17b..0322341 100644
--- a/python/server.py
+++ b/python/server.py
@@ -94,6 +94,7 @@ class LLMStepRequestHandler(BaseHTTPRequestHandler):
response = result
self.wfile.write(json.dumps(response).encode('utf-8'))
except Exception as e:
+ print("Error", e)
error_response = {'error': str(e)}
self.wfile.write(json.dumps(error_response).encode('utf-8'))
And the error shown is:
Error "LayerNormKernelImpl" not implemented for 'Half'
The error goes away if I revert 92990596f91aad7e1323a8985d46a488ce8aef57.
For additional context:
I'm running python python/server.py with no arguments. Additionally, I've disabled cuda because my card does not have enough memory:
diff --git a/python/server.py b/python/server.py
index 0efa17b..0c2c8b5 100644
--- a/python/server.py
+++ b/python/server.py
@@ -18,7 +18,7 @@ def load_hf(hf_model):
model = transformers.AutoModelForCausalLM.from_pretrained(hf_model)
tokenizer = transformers.AutoTokenizer.from_pretrained(hf_model)
- if torch.cuda.is_available():
+ if False:
model.cuda()
model.eval()
print("Done.")
After https://github.com/wellecks/llmstep/commit/92990596f91aad7e1323a8985d46a488ce8aef57, I get this error whenever I try to use the
llmstep
tactic:I tried adding this error logging:
And the error shown is:
The error goes away if I revert 92990596f91aad7e1323a8985d46a488ce8aef57.
For additional context: I'm running
python python/server.py
with no arguments. Additionally, I've disabled cuda because my card does not have enough memory: