ahyatt / llm

A package abstracting llm capabilities for emacs.
GNU General Public License v3.0
142 stars 19 forks source link

Error using ollama through a proxy #48

Closed theasp closed 1 month ago

theasp commented 1 month ago

When this library is used with a proxy (http_proxy and http_proxy env variables defined) it encounters an error dealing with the HTTP header regex. This does not happen when not using the proxy, and plz seems to have no problem accessing it. It looks like it's an issue with plz-media-type to me.

ELISP> emacs-version
"30.0.50"

ELISP> ellama-provider
#s(llm-ollama :default-chat-temperature nil :default-chat-max-tokens
              nil :default-chat-non-standard-params nil :scheme
              "https" :host "xxxxx" :port 443
              :chat-model "DEFAULT/llama3-8b:latest" :embedding-model
              "DEFAULT/llama3-8b:latest")

ELISP> (plz 'post "https://xxxxx:443/api/chat" :body
"{\"stream\":false,\"model\":\"DEFAULT/llama3-8b:latest\",\"messages\":[{\"role\":\"user\",\"content\":\"Describe elisp in one word?\"}]}" :decode t :headers
'(("Content-Type" . "application/json")))

"{\"model\":\"DEFAULT/llama3-8b:latest\",\"created_at\":\"2024-06-06T14:27:25.339586897Z\",\"message\":{\"role\":\"assistant\",\"content\":\"**Esoteric**\"},\"done_reason\\
":\"stop\",\"done\":true,\"total_duration\":292413786,\"load_duration\":2318982,\"prompt_eval_duration\":72714000,\"eval_count\":5,\"eval_duration\":85079000}"

ELISP> (llm-chat ellama-provider (llm-make-chat-prompt "Describe elisp in one word?"))
*** Eval error ***  Wrong type argument: number-or-marker-p, nil
Debugger entered--Lisp error: (search-failed "\\(?:\\(?:\n\\|\15\n\15\\)\n\\)")
  plz-media-type--parse-headers()
  plz-media-type--parse-response()
  plz-media-type-process-filter(#<process plz-request-curl> ((application/json . #<plz-media-type:application/json plz-media-type:application/json-1033f28d3632>) (application/octet-stream . #<plz-media-type:application/octet-stream plz-media-type:application/octet-stream-1033f28d360b>) (application/xml . #<plz-media-type:application/xml plz-media-type:application/xml-1033f28d3604>) (text/html . #<plz-media-type:text/html plz-media-type:text/html-1033f28d39c5>) (text/xml . #<plz-media-type:text/xml plz-media-type:text/xml-1033f28d39ca>) (t . #<plz-media-type:application/octet-stream plz-media-type:application/octet-stream-1033f28d39db>)) "HTTP/1.1 200 Connection established\15\n\15\n")
  #f(compiled-function (process chunk) #<bytecode 0x2ad5e9ab453d307>)(#<process plz-request-curl> "HTTP/1.1 200 Connection established\15\n\15\n")
  accept-process-output(#<process plz-request-curl>)
  plz(post "https://xxxxxx:443/api/chat" :as buffer :body "{\"stream\":false,\"model\":\"DEFAULT/llama3-8b:latest\",\"messages\":[{\"role\":\"user\",\"content\":\"Describe elisp in one word?\"}]}" :body-type text :connect-timeout 10 :decode t :else #f(compiled-function (error) #<bytecode -0xa1acaa5e53cb633>) :finally #f(compiled-function () #<bytecode 0x9130a714e227dfa>) :headers (("Content-Type" . "application/json")) :noquery nil :filter #f(compiled-function (process chunk) #<bytecode 0x2ad5e9ab453d307>) :timeout nil :then sync)
  plz-media-type-request(post "https://xxxxxx:443/api/chat" :as (media-types ((application/json . #<plz-media-type:application/json plz-media-type:application/json-1033f28d3632>) (application/octet-stream . #<plz-media-type:application/octet-stream plz-media-type:application/octet-stream-1033f28d360b>) (application/xml . #<plz-media-type:application/xml plz-media-type:application/xml-1033f28d3604>) (text/html . #<plz-media-type:text/html plz-media-type:text/html-1033f28d39c5>) (text/xml . #<plz-media-type:text/xml plz-media-type:text/xml-1033f28d39ca>) (t . #<plz-media-type:application/octet-stream plz-media-type:application/octet-stream-1033f28d39db>))) :body "{\"stream\":false,\"model\":\"DEFAULT/llama3-8b:latest\",\"messages\":[{\"role\":\"user\",\"content\":\"Describe elisp in one word?\"}]}" :connect-timeout 10 :headers (("Content-Type" . "application/json")) :timeout nil)
  llm-request-plz-sync-raw-output("https://xxxxxx:443/api/chat" :headers nil :data (("stream" . :json-false) ("model" . "DEFAULT/llama3-8b:latest") ("messages" (("role" . "user") ("content" . "Describe elisp in one word?")))) :timeout nil)
  llm-request-plz-sync("https://xxxxxx:443/api/chat" :headers nil :data (("stream" . :json-false) ("model" . "DEFAULT/llama3-8b:latest") ("messages" (("role" . "user") ("content" . "Describe elisp in one word?")))))
  #f(compiled-function (provider prompt) #<bytecode -0x10962013be9b5add>)(#s(llm-ollama :default-chat-temperature nil :default-chat-max-tokens nil :default-chat-non-standard-params nil :scheme "https" :host "xxxxxx" :port 443 :chat-model "DEFAULT/llama3-8b:latest" :embedding-model "DEFAULT/llama3-8b:latest") #s(llm-chat-prompt :context nil :examples nil :interactions (#s(llm-chat-prompt-interaction :role user :content "Describe elisp in one word?" :function-call-result nil)) :functions nil :temperature nil :max-tokens nil :non-standard-params nil))
  apply(#f(compiled-function (provider prompt) #<bytecode -0x10962013be9b5add>) (#s(llm-ollama :default-chat-temperature nil :default-chat-max-tokens nil :default-chat-non-standard-params nil :scheme "https" :host "xxxxxx" :port 443 :chat-model "DEFAULT/llama3-8b:latest" :embedding-model "DEFAULT/llama3-8b:latest") #s(llm-chat-prompt :context nil :examples nil :interactions (#s(llm-chat-prompt-interaction :role user :content "Describe elisp in one word?" :function-call-result nil)) :functions nil :temperature nil :max-tokens nil :non-standard-params nil)))
  #f(compiled-function (&rest args) #<bytecode -0x32dba651725b8fb>)(#s(llm-ollama :default-chat-temperature nil :default-chat-max-tokens nil :default-chat-non-standard-params nil :scheme "https" :host "xxxxxx" :port 443 :chat-model "DEFAULT/llama3-8b:latest" :embedding-model "DEFAULT/llama3-8b:latest") #s(llm-chat-prompt :context nil :examples nil :interactions (#s(llm-chat-prompt-interaction :role user :content "Describe elisp in one word?" :function-call-result nil)) :functions nil :temperature nil :max-tokens nil :non-standard-params nil))
  apply(#f(compiled-function (&rest args) #<bytecode -0x32dba651725b8fb>) (#s(llm-ollama :default-chat-temperature nil :default-chat-max-tokens nil :default-chat-non-standard-params nil :scheme "https" :host "xxxxxx" :port 443 :chat-model "DEFAULT/llama3-8b:latest" :embedding-model "DEFAULT/llama3-8b:latest") #s(llm-chat-prompt :context nil :examples nil :interactions (#s(llm-chat-prompt-interaction :role user :content "Describe elisp in one word?" :function-call-result nil)) :functions nil :temperature nil :max-tokens nil :non-standard-params nil)))
  #f(compiled-function (&rest args) #<bytecode -0x15d83c6ffa66ce2f>)()
  #f(compiled-function (cl--cnm provider prompt) #<bytecode 0xf2da543041f40b2>)(#f(compiled-function (&rest args) #<bytecode -0x15d83c6ffa66ce2f>) #s(llm-ollama :default-chat-temperature nil :default-chat-max-tokens nil :default-chat-non-standard-params nil :scheme "https" :host "xxxxxx" :port 443 :chat-model "DEFAULT/llama3-8b:latest" :embedding-model "DEFAULT/llama3-8b:latest") #s(llm-chat-prompt :context nil :examples nil :interactions (#s(llm-chat-prompt-interaction :role user :content "Describe elisp in one word?" :function-call-result nil)) :functions nil :temperature nil :max-tokens nil :non-standard-params nil))
  apply(#f(compiled-function (cl--cnm provider prompt) #<bytecode 0xf2da543041f40b2>) #f(compiled-function (&rest args) #<bytecode -0x15d83c6ffa66ce2f>) (#s(llm-ollama :default-chat-temperature nil :default-chat-max-tokens nil :default-chat-non-standard-params nil :scheme "https" :host "xxxxxx" :port 443 :chat-model "DEFAULT/llama3-8b:latest" :embedding-model "DEFAULT/llama3-8b:latest") #s(llm-chat-prompt :context nil :examples nil :interactions (#s(llm-chat-prompt-interaction :role user :content "Describe elisp in one word?" :function-call-result nil)) :functions nil :temperature nil :max-tokens nil :non-standard-params nil)))
  #f(compiled-function (provider prompt) "Log the input to llm-chat." #<bytecode -0x122fca2c8b2b2dd2>)(#s(llm-ollama :default-chat-temperature nil :default-chat-max-tokens nil :default-chat-non-standard-params nil :scheme "https" :host "xxxxxx" :port 443 :chat-model "DEFAULT/llama3-8b:latest" :embedding-model "DEFAULT/llama3-8b:latest") #s(llm-chat-prompt :context nil :examples nil :interactions (#s(llm-chat-prompt-interaction :role user :content "Describe elisp in one word?" :function-call-result nil)) :functions nil :temperature nil :max-tokens nil :non-standard-params nil))
  apply(#f(compiled-function (provider prompt) "Log the input to llm-chat." #<bytecode -0x122fca2c8b2b2dd2>) #s(llm-ollama :default-chat-temperature nil :default-chat-max-tokens nil :default-chat-non-standard-params nil :scheme "https" :host "xxxxxx" :port 443 :chat-model "DEFAULT/llama3-8b:latest" :embedding-model "DEFAULT/llama3-8b:latest") #s(llm-chat-prompt :context nil :examples nil :interactions (#s(llm-chat-prompt-interaction :role user :content "Describe elisp in one word?" :function-call-result nil)) :functions nil :temperature nil :max-tokens nil :non-standard-params nil))
  llm-chat(#s(llm-ollama :default-chat-temperature nil :default-chat-max-tokens nil :default-chat-non-standard-params nil :scheme "https" :host "xxxxxx" :port 443 :chat-model "DEFAULT/llama3-8b:latest" :embedding-model "DEFAULT/llama3-8b:latest") #s(llm-chat-prompt :context nil :examples nil :interactions (#s(llm-chat-prompt-interaction :role user :content "Describe elisp in one word?" :function-call-result nil)) :functions nil :temperature nil :max-tokens nil :non-standard-params nil))
  eval((llm-chat ellama-provider (llm-make-chat-prompt "Describe elisp in one word?")) t)
  ielm-eval-input(#("(llm-chat ellama-provider (llm-make-chat-prompt \"Describe elisp in one word?\"))" 0 48 (fontified t) 48 77 (face font-lock-string-face fontified t) 77 78 (fontified t) 78 79 (rear-nonsticky t fontified t)) nil)
  ielm-send-input(nil)
  ielm-return()
  funcall-interactively(ielm-return)
  command-execute(ielm-return)
ahyatt commented 1 month ago

@r0man can you take a look at this?

r0man commented 1 month ago

Hi @theasp,

which proxy are you using? I tried to re-produce this with tinyproxy but my request seems to work.

Could you please run this:

(with-current-buffer (plz 'post "https://xxxxx:443/api/chat"
                       :as 'buffer
                       :body "{\"stream\":false,\"model\":\"DEFAULT/llama3-8b:latest\",\"messages\":[{\"role\":\"user\",\"content\":\"Describe elisp in one word?\"}]}"
                       :decode t
                       :headers '(("Content-Type" . "application/json")))
  (widen)
  (write-region (point-min) (point-max) "/tmp/response.txt"))

and send me /tmp/response.txt?

theasp commented 1 month ago

Hi @r0man, it's an old version of Squid. I'm not the one that maintains it sadly.

Based on the output below, I suspect that HTTP/1.1 200 came from the proxy and HTTP/2 200 came from Traefik which is in front of Ollama on the remote side.

This is response.txt when using the proxy:

HTTP/1.1 200 Connection established

HTTP/2 200
content-type: application/json; charset=utf-8
date: Thu, 06 Jun 2024 18:40:19 GMT
content-length: 318

{"model":"DEFAULT/llama3-8b:latest","created_at":"2024-06-06T18:40:19.165000711Z","message":{"role":"assistant","content":"**Esoteric**"},"done_reason":"stop","done":true,"total_duration":350939608,"load_duration":2292974,"prompt_eval_count":15,"prompt_eval_duration":133575000,"eval_count":5,"eval_duration":81834000}

This is without the proxy:

HTTP/2 200
content-type: application/json; charset=utf-8
date: Thu, 06 Jun 2024 18:44:10 GMT
content-length: 294

{"model":"DEFAULT/llama3-8b:latest","created_at":"2024-06-06T18:44:10.531876786Z","message":{"role":"assistant","content":"**Esoteric**"},"done_reason":"stop","done":true,"total_duration":295274440,"load_duration":1335630,"prompt_eval_duration":74681000,"eval_count":5,"eval_duration":87733000}
r0man commented 1 month ago

@theasp The support for proxies was missing. I added this here: https://github.com/r0man/plz-media-type/pull/8

But now I'm running into another issue:

Debugger entered--Lisp error: (wrong-type-argument number-or-marker-p nil)
  <=(200 nil 299)
  (let* ((status val)) (<= 200 status 299))
  (if (let* ((status val)) (<= 200 status 299)) (let ((status val)) (ignore status) (ignore status) (funcall (process-get process :plz-then))) (let nil (let ((err (make-plz-error :response (plz--response)))) (let* ((val (process-get process :plz-else))) (cond ((null val) (let nil (process-put process :plz-result err))) ((functionp val) (let (...) (funcall fn err))) (t (let (...) (error "No clause matching `%S'" x46))))))))
  (let* ((val (plz--http-status))) (if (let* ((status val)) (<= 200 status 299)) (let ((status val)) (ignore status) (ignore status) (funcall (process-get process :plz-then))) (let nil (let ((err (make-plz-error :response (plz--response)))) (let* ((val (process-get process :plz-else))) (cond ((null val) (let nil ...)) ((functionp val) (let ... ...)) (t (let ... ...))))))))
  (save-current-buffer (set-buffer buffer) (goto-char (point-min)) (plz--skip-proxy-headers) (while (plz--skip-redirect-headers)) (let* ((val (plz--http-status))) (if (let* ((status val)) (<= 200 status 299)) (let ((status val)) (ignore status) (ignore status) (funcall (process-get process :plz-then))) (let nil (let ((err (make-plz-error :response ...))) (let* ((val ...)) (cond (... ...) (... ...) (t ...))))))))
  (let nil (save-current-buffer (set-buffer buffer) (goto-char (point-min)) (plz--skip-proxy-headers) (while (plz--skip-redirect-headers)) (let* ((val (plz--http-status))) (if (let* ((status val)) (<= 200 status 299)) (let ((status val)) (ignore status) (ignore status) (funcall (process-get process :plz-then))) (let nil (let ((err ...)) (let* (...) (cond ... ... ...))))))))
  (cond ((eql val 0) (let nil (save-current-buffer (set-buffer buffer) (goto-char (point-min)) (plz--skip-proxy-headers) (while (plz--skip-redirect-headers)) (let* ((val (plz--http-status))) (if (let* (...) (<= 200 status 299)) (let (...) (ignore status) (ignore status) (funcall ...)) (let nil (let ... ...))))))) ((let* ((code val)) (<= 1 code 90)) (let ((code val)) (ignore code) (let* ((curl-exit-code (cond (... ...) (... code))) (curl-error-message (alist-get curl-exit-code plz-curl-errors)) (err (record 'plz-error (cons curl-exit-code curl-error-message) nil nil))) (let* ((val (process-get process :plz-else))) (cond ((null val) (let nil ...)) ((functionp val) (let ... ...)) (t (let ... ...))))))) ((let* ((code val)) (not (<= 1 code 90))) (let ((code val)) (ignore code) (let* ((message (cond (... ...) (... ...) (t ...))) (err (record 'plz-error nil nil message))) (let* ((val (process-get process :plz-else))) (cond ((null val) (let nil ...)) ((functionp val) (let ... ...)) (t (let ... ...))))))) (t (let ((code val)) (let* ((message (format "Unexpected curl process status:%S code:%S.  Please..." status code)) (err (record 'plz-error nil nil message))) (let* ((val (process-get process :plz-else))) (cond ((null val) (let nil ...)) ((functionp val) (let ... ...)) (t (let ... ...))))))))
  (let* ((val (process-exit-status process))) (cond ((eql val 0) (let nil (save-current-buffer (set-buffer buffer) (goto-char (point-min)) (plz--skip-proxy-headers) (while (plz--skip-redirect-headers)) (let* ((val ...)) (if (let* ... ...) (let ... ... ... ...) (let nil ...)))))) ((let* ((code val)) (<= 1 code 90)) (let ((code val)) (ignore code) (let* ((curl-exit-code (cond ... ...)) (curl-error-message (alist-get curl-exit-code plz-curl-errors)) (err (record ... ... nil nil))) (let* ((val ...)) (cond (... ...) (... ...) (t ...)))))) ((let* ((code val)) (not (<= 1 code 90))) (let ((code val)) (ignore code) (let* ((message (cond ... ... ...)) (err (record ... nil nil message))) (let* ((val ...)) (cond (... ...) (... ...) (t ...)))))) (t (let ((code val)) (let* ((message (format "Unexpected curl process status:%S code:%S.  Please..." status code)) (err (record ... nil nil message))) (let* ((val ...)) (cond (... ...) (... ...) (t ...))))))))
  (unwind-protect (let* ((val (process-exit-status process))) (cond ((eql val 0) (let nil (save-current-buffer (set-buffer buffer) (goto-char (point-min)) (plz--skip-proxy-headers) (while (plz--skip-redirect-headers)) (let* (...) (if ... ... ...))))) ((let* ((code val)) (<= 1 code 90)) (let ((code val)) (ignore code) (let* ((curl-exit-code ...) (curl-error-message ...) (err ...)) (let* (...) (cond ... ... ...))))) ((let* ((code val)) (not (<= 1 code 90))) (let ((code val)) (ignore code) (let* ((message ...) (err ...)) (let* (...) (cond ... ... ...))))) (t (let ((code val)) (let* ((message ...) (err ...)) (let* (...) (cond ... ... ...))))))) (let* ((finally (and t (process-get process :plz-finally)))) (if finally (funcall finally) nil)) (if (or (process-get process :plz-sync) (eq 'buffer (process-get process :plz-as))) nil (kill-buffer buffer)))
  plz--respond(#<process plz-request-curl> #<buffer  *plz-request-curl*-661119> "finished\n")
  apply(plz--respond (#<process plz-request-curl> #<buffer  *plz-request-curl*-661119> "finished\n"))
  timer-event-handler([t 26210 58253 269908 nil plz--respond (#<process plz-request-curl> #<buffer  *plz-request-curl*-661119> "finished\n") nil 635000 nil])

I will see what I can do ....

r0man commented 1 month ago

@theasp Well, I think the remaining issue is with the event source and OpenAI. But since you use Ollama, could you try evaluating this file: https://github.com/r0man/plz-media-type/blob/154a71ee4f0d5549f355714a58d8fc5ca8201e13/plz-media-type.el

And see if it is working for you?

I don't have Ollama setup via an HTTPS proxy. But I'm using this now with Vertex and it seems to be working.

theasp commented 1 month ago

@r0man that seems to be working fine for me, thanks!

r0man commented 1 month ago

@theasp Ok, perfect. Thanks for trying! I will open a PR here when I got the event source and OpenAI working as well.