Failure to print result html->xexp in doom emacs

kouvas commented 2 months ago

Package

metadata

nil

package-archives

(("melpa" . "http://melpa.org/packages/")
 ("org" . "http://orgmode.org/elpa/")
 ("gnu" . "https://elpa.gnu.org/packages/")
 ("nongnu" . "https://elpa.nongnu.org/nongnu/"))

racket--el-source-dir

"/Users/aaa/.emacs.d/.local/straight/build-29.1/racket-mode/"

racket--rkt-source-dir

"/Users/aaa/.emacs.d/.local/straight/build-29.1/racket-mode/racket/"

System values

emacs-version

"29.1"

major-mode

help-mode

system-type

darwin

display-graphic-p

Buffer values

after-change-functions

nil

before-change-functions

(beacon--vanish)

completion-at-point-functions

(tags-completion-at-point-function)

eldoc-documentation-function

eldoc-documentation-default

font-lock-defaults

nil

pre-command-hook

(evil--jump-hook t)

post-command-hook

(evil--jump-handle-buffer-crossing evil-normal-post-command yas--post-command-handler hl-line-highlight t)

post-self-insert-hook

(electric-indent-post-self-insert-function blink-paren-post-self-insert-function)

xref-backend-functions

nil

Racket Mode values

racket--cmd-open-p

racket-after-run-hook

nil

racket-back-end-configurations

((:directory "/" :racket-program nil :remote-source-dir nil :restart-watch-directories nil :windows nil))

racket-before-run-hook

(racket-ansi-color-context-reset)

racket-browse-url-function

racket-browse-url-using-temporary-file

racket-command-timeout

racket-documentation-search-location

"https://docs.racket-lang.org/search/index.html?q=%s"

racket-error-context

medium

racket-expand-hiding

standard

racket-hash-lang-mode-hook

nil

racket-hash-lang-module-language-hook

nil

racket-hash-lang-token-face-alist

((constant . font-lock-constant-face)
 (error . error)
 (other . font-lock-doc-face)
 (keyword . font-lock-keyword-face)
 (hash-colon-keyword . racket-keyword-argument-face)
 (at . font-lock-doc-face))

racket-history-filter-regexp

"\\`\\s *\\'"

racket-imagemagick-props

nil

racket-images-do-not-use-svg

nil

racket-images-inline

racket-images-keep-last

racket-images-system-viewer

"open"

racket-indent-curly-as-sequence

racket-indent-sequence-depth

racket-logger-config

((cm-accomplice . warning)
 (GC . info)
 (module-prefetch . warning)
 (optimizer . info)
 (racket/contract . error)
 (racket-mode-debugger . info)
 (sequence-specialization . info)
 (* . fatal))

racket-memory-limit

racket-mode-hook

(racket-smart-open-bracket-mode rainbow-delimiters-mode highlight-quoted-mode +lookup--init-racket-mode-handlers-h evil-cleverparens-mode)

racket-module-forms

"\\s(\\(?:module[*+]?\\|library\\)"

racket-pretty-lambda

nil

racket-pretty-print

racket-program

"racket"

racket-repl-buffer-name-function

nil

racket-repl-command-file

"/Users/aaaa/.emacs.d/.local/cache/racket-mode/repl.rkt"

racket-repl-history-directory

"~/.emacs.d/.local/cache/racket-mode/"

racket-repl-mode-hook

(+lookup--init-racket-repl-mode-handlers-h)

racket-sexp-comment-fade

0.5

racket-shell-or-terminal-function

racket-shell

racket-show-functions

(racket-show-pseudo-tooltip)

racket-smart-open-bracket-enable

nil

racket-submodules-to-run

((test)
 (main))

racket-use-repl-submit-predicate

nil

racket-xp-add-binding-faces

nil

racket-xp-after-change-refresh-delay

racket-xp-highlight-unused-regexp

"^[^_]"

racket-xp-mode-lighter

(:eval
 (racket--xp-mode-lighter))

Minor modes

enabled

((+popup-mode)
 (anzu-mode)
 (auto-composition-mode)
 (auto-compression-mode)
 (auto-encryption-mode)
 (beacon-mode)
 (better-jumper-local-mode)
 (better-jumper-mode)
 (buffer-read-only)
 (column-number-mode)
 (diff-hl-flydiff-mode)
 (doom-modeline-mode)
 (electric-indent-mode)
 (evil-escape-mode)
 (evil-goggles-mode)
 (evil-local-mode)
 (evil-mode)
 (evil-snipe-local-mode)
 (evil-snipe-mode)
 (evil-snipe-override-local-mode)
 (evil-snipe-override-mode)
 (evil-surround-mode)
 (file-name-shadow-mode)
 (font-lock-mode)
 (gcmh-mode)
 (general-override-mode)
 (global-anzu-mode)
 (global-company-mode)
 (global-diff-hl-mode)
 (global-eldoc-mode)
 (global-evil-surround-mode)
 (global-flycheck-mode)
 (global-font-lock-mode)
 (global-git-commit-mode)
 (global-hl-line-mode)
 (global-org-modern-mode)
 (global-so-long-mode)
 (hl-line-mode)
 (isearch-fold-quotes-mode)
 (line-number-mode)
 (mac-mouse-wheel-mode)
 (marginalia-mode)
 (nerd-icons-completion-mode)
 (ns-auto-titlebar-mode)
 (persp-mode)
 (projectile-mode)
 (recentf-mode)
 (save-place-mode)
 (savehist-mode)
 (semantic-minor-modes-format)
 (server-mode)
 (shell-dirtrack-mode)
 (show-paren-mode)
 (size-indication-mode)
 (smartparens-global-mode)
 (solaire-global-mode)
 (solaire-mode)
 (transient-mark-mode)
 (undo-fu-mode)
 (undo-fu-session-global-mode)
 (vertico-mode)
 (vertico-multiform-mode)
 (which-key-mode)
 (windmove-mode)
 (window-divider-mode)
 (winner-mode)
 (ws-butler-global-mode)
 (yas-global-mode)
 (yas-minor-mode))

Disabled minor modes

disabled

((+emacs-lisp--flycheck-non-package-mode)
 (+emacs-lisp--flymake-non-package-mode)
 (+emacs-lisp-ert-mode)
 (+emacs-lisp-non-package-mode)
 (+lsp-optimization-mode)
 (+org-pretty-mode)
 (+popup-buffer-mode)
 (+web-django-mode)
 (+web-jekyll-mode)
 (+web-wordpress-mode)
 (abbrev-mode)
 (apheleia-global-mode)
 (apheleia-mode)
 (archive-subfile-mode)
 (auto-fill-function)
 (auto-fill-mode)
 (auto-revert-mode)
 (auto-revert-tail-mode)
 (auto-save-mode)
 (auto-save-visited-mode)
 (avy-linum-mode)
 (blink-cursor-mode)
 (buffer-face-mode)
 (button-mode)
 (cider--debug-mode)
 (cider-auto-test-mode)
 (cider-enlighten-mode)
 (cider-mode)
 (cider-popup-buffer-mode)
 (cl-old-struct-compat-mode)
 (clj-refactor-mode)
 (comint-fontify-input-mode)
 (company-mode)
 (company-search-mode)
 (compilation-minor-mode)
 (compilation-shell-minor-mode)
 (completion-in-region-mode)
 (consult-preview-at-point-mode)
 (context-menu-mode)
 (cursor-face-highlight-mode)
 (cursor-intangible-mode)
 (cursor-sensor-mode)
 (dash-fontify-mode)
 (defining-kbd-macro)
 (delete-selection-mode)
 (diff-auto-refine-mode)
 (diff-hl-dir-mode)
 (diff-hl-mode)
 (diff-minor-mode)
 (dired-hide-details-mode)
 (display-line-numbers-mode)
 (dtrt-indent-global-mode)
 (dtrt-indent-mode)
 (eldoc-mode)
 (electric-layout-mode)
 (electric-pair-mode)
 (electric-quote-mode)
 (eshell-arg-mode)
 (eshell-command-mode)
 (eshell-proc-mode)
 (eshell-prompt-mode)
 (evil-cleverparens-mode)
 (evil-collection-magit-toggle-text-minor-mode)
 (flycheck-mode)
 (flycheck-popup-tip-mode)
 (flymake-mode)
 (general-override-local-mode)
 (git-commit-mode)
 (global-auto-revert-mode)
 (global-dash-fontify-mode)
 (global-display-line-numbers-mode)
 (global-goto-address-mode)
 (global-hide-mode-line-mode)
 (global-hl-todo-mode)
 (global-prettify-symbols-mode)
 (global-reveal-mode)
 (global-semantic-highlight-edits-mode)
 (global-semantic-highlight-func-mode)
 (global-semantic-show-parser-state-mode)
 (global-semantic-show-unmatched-syntax-mode)
 (global-semantic-stickyfunc-mode)
 (global-subword-mode)
 (global-superword-mode)
 (global-vi-tilde-fringe-mode)
 (global-visual-line-mode)
 (global-whitespace-mode)
 (global-whitespace-newline-mode)
 (goto-address-mode)
 (goto-address-prog-mode)
 (header-line-indent-mode)
 (hide-mode-line-mode)
 (highlight-numbers-mode)
 (highlight-quoted-mode)
 (hl-todo-mode)
 (horizontal-scroll-bar-mode)
 (hs-minor-mode)
 (html-autoview-mode)
 (ibuffer-auto-mode)
 (indent-tabs-mode)
 (isearch-mode)
 (jit-lock-debug-mode)
 (lock-file-mode)
 (lost-selection-mode)
 (mac-auto-ascii-mode)
 (mac-auto-operator-composition-mode)
 (mac-font-panel-mode)
 (magit-auto-revert-mode)
 (magit-blame-mode)
 (magit-blame-read-only-mode)
 (magit-blob-mode)
 (magit-todos-mode)
 (magit-wip-after-apply-mode)
 (magit-wip-after-save-local-mode)
 (magit-wip-after-save-mode)
 (magit-wip-before-change-mode)
 (magit-wip-initial-backup-mode)
 (magit-wip-mode)
 (mail-abbrevs-mode)
 (mc-hide-unmatched-lines-mode)
 (menu-bar-mode)
 (mml-mode)
 (mouse-wheel-mode)
 (multiple-cursors-mode)
 (next-error-follow-minor-mode)
 (org-capture-mode)
 (org-cdlatex-mode)
 (org-list-checkbox-radio-mode)
 (org-modern-mode)
 (org-src-mode)
 (org-table-follow-field-mode)
 (org-table-header-line-mode)
 (orgtbl-mode)
 (outline-minor-mode)
 (overwrite-mode)
 (paragraph-indent-minor-mode)
 (paredit-mode)
 (pcre-mode)
 (poetry-tracking-mode)
 (prettify-symbols-mode)
 (pyvenv-mode)
 (pyvenv-tracking-mode)
 (racket-hash-lang-repl-mode)
 (racket-smart-open-bracket-mode)
 (racket-xp-mode)
 (rainbow-delimiters-mode)
 (read-extended-command-mode)
 (rectangle-mark-mode)
 (reveal-mode)
 (rxt--read-pcre-mode)
 (rxt-global-mode)
 (rxt-mode)
 (semantic-highlight-edits-mode)
 (semantic-highlight-func-mode)
 (semantic-mode)
 (semantic-show-parser-state-mode)
 (semantic-show-unmatched-syntax-mode)
 (semantic-stickyfunc-mode)
 (sgml-electric-tag-pair-mode)
 (sh-electric-here-document-mode)
 (shell-command-with-editor-mode)
 (shell-highlight-undef-mode)
 (show-smartparens-global-mode)
 (show-smartparens-mode)
 (smartparens-global-strict-mode)
 (smartparens-mode)
 (smartparens-strict-mode)
 (smerge-mode)
 (so-long-minor-mode)
 (subword-mode)
 (superword-mode)
 (tab-bar-history-mode)
 (tab-bar-mode)
 (temp-buffer-resize-mode)
 (text-scale-mode)
 (tool-bar-mode)
 (tooltip-mode)
 (transient-resume-mode)
 (treesit-explore-mode)
 (treesit-inspect-mode)
 (undelete-frame-mode)
 (undo-fu-session-mode)
 (url-handler-mode)
 (use-hard-newlines)
 (vc-dir-git-mode)
 (vc-parent-buffer)
 (vi-tilde-fringe-mode)
 (view-mode)
 (visible-mode)
 (visual-line-mode)
 (which-function-mode)
 (whitespace-mode)
 (whitespace-newline-mode)
 (with-editor-mode)
 (ws-butler-mode)
 (xref-etags-mode))

Steps to reproduce:

The code

(require gregor
         html-parsing
         net/http-easy
         threading
         sxml)

(define  (url->xexp url)
  (~> (get url)
      response-body 
      bytes->string/utf-8
      html->xexp               ;; from html-parsing
))

(define  page (url->xexp base-url))

page

only when sending page to repl, it produces this error

————— run main.rkt —————
'(*TOP*
  "\r\n"
  "\r\n"
  (*DECL* DOCTYPE html)
  "\r\n"
  (*COMMENT* "[if IE 8]> <html lang=\"el\" class=\"ie8\"> <![endif]")
  "\r\n"
  (*COMMENT* "[if IE 9]> <html lang=\"el\" class=\"ie9\"> <![endif]")

< more xexp here> 

         (div
          (@ (class "modal-dialog") (style "overflow-y: initial !important; width: 1000px;"))
          "\r\n"
          "                    "
          (div
           (@ (class "modal-content") (style "max-width: 100%;"))
           "\r\n"
           "                        "
           (div
            (@ (class "modal-header") (style "background-color: #f2a42a !important"))
            "\r\n"
            "                   

bytes->string/utf-8: byte string is not a well-formed UTF-8 encoding
  byte string: #"         \"\n            (button (@ (type \"submit\") (style \"color: black;\") (class \"close\") (data-dismiss \"modal\")) (& times))\n            \"\\r\\n\"\n            \"                            \"\n            (h2 (@ (class \"modal-title\")) \...
Context (plain):
 /Users/aaa/.emacs.d/.local/straight/build-29.1/racket-mode/racket/print.rkt:48:0 drain-value-pipe
 /Applications/Racket v8.13/collects/racket/private/modbeg.rkt:11:4 print-values
 /Users/aaa/.emacs.d/.local/straight/build-29.1/racket-mode/racket/repl.rkt:322:0 configure/require/enter
>

greghendershott commented 2 months ago

Thanks for the report!

I wonder if this happens because buffering (done by Racket Mode); maybe the byte sequence is valid UTF-8, but it's falling over a boundary??

If in Emacs you (setq racket-pretty-print nil), and re-run, does the problem still occur?
I wonder if I could get a more-minimal example, that doesn't require needing to have all those packages installed. How you could do this, I think -- if it's not too inconvenient:
- Change the final line of your program from page to (require racket/pretty) (pretty-print page).
- In a terminal, run your program with command-line racket.
- Copy the output.
- Paste here. Preferably between a pair of ``` code fence blocks.
But if that's a PITA, or the resulting value is too large or too sensitive, then no worries.

Bogdanp commented 2 months ago

I've also encountered this issue recently, and I agree that it looks like some kind of boundary issue. Here's a file that reproduces the problem:

test.txt

Generated by:

(require net/http-easy)
(response-xexpr (get "https://daringfireball.net/thetalkshow/rss"))

Running

(call-with-input-file "test.txt" read)

in a brand new racket-mode repl reproduces the issue for me. Wrapping the result in (void ...), avoids the problem. The problem does seem to occur even when racket-pretty-print is nil.

greghendershott commented 2 months ago

Awesome, thanks!

I think the fix may be as simple as:

modified   racket/print.rkt
@@ -52,7 +52,7 @@
   (let loop ()
     (match (read-bytes-avail! buffer in)
       [(? exact-nonnegative-integer? len)
-       (define v (bytes->string/utf-8 (subbytes buffer 0 len)))
+       (define v (bytes->string/latin-1 buffer #f 0 len))
        (repl-output-value v)
        (loop)]
       [(? procedure? read-special)

I'm not sure what I was thinking, because bytes->string/utf-8 isn't even needed here -- Racket print has already done any UTF-8 conversion. So not only is it unnecessary it can cause this boundary problem.

Bogdanp commented 2 months ago

I haven't looked into how the print module works, so I might be totally wrong, but assuming the results of pretty-print or print are being written into the byte string, then the contents of the byte string are going to be utf-8 encoded, so it seems right to want to decode them and trying to decode them as latin-1 probably avoids the exception but decodes the wrong data at a boundary[1]. Ideally, the contents of buf would get written to the other end directly without any decoding, but if repl-output-value has to take a string, then it might be better to check if (bytes-ref buffer (sub1 len)) is a continuation byte (i.e. its most-significant bit is 1) and wait for more data before performing the conversion.

[1]:

> (define buf (string->bytes/utf-8 "λ")) 
> (for ([i (in-range 2)]) ;; assuming read-bytes-avail! returns one byte at a time
    (displayln (bytes->string/latin-1 buf #f i (add1 i))))
Î
»

greghendershott commented 2 months ago

OK, there are a few levels of buffering going on here, and I need to reload my brain with some of the details. For example it's possible the right answer is to preserve these as bytes, at this stage, and attempt conversion only later. I'll give it a think...

p.s. A quick hack would be to supply a non-false error-char to bytes->string/utf-8. That would prevent the crash, but it would leave "unknown" characters in the output, gratuitously IIUC. So I need to think through the whole thing.

greghendershott / racket-mode