Open Khady opened 2 years ago
My quickfix for reference
diff --git a/index/dune b/index/dune
index e75cb53..795cf51 100644
--- a/index/dune
+++ b/index/dune
@@ -1,6 +1,7 @@
(executable
(name index)
(libraries
+ extlib
db
fpath
tyxml
diff --git a/index/index.ml b/index/index.ml
index 188a686..2113679 100644
--- a/index/index.ml
+++ b/index/index.ml
@@ -9,7 +9,7 @@ let of_filename f =
in
module_name, f
-let filenames () = List.map of_filename (Files.list odoc_directory)
+let filenames () = ExtLib.List.map of_filename (Files.list odoc_directory)
let () =
let files = filenames () in
diff --git a/index/load_doc.ml b/index/load_doc.ml
index e0c5c3f..2b687ff 100644
--- a/index/load_doc.ml
+++ b/index/load_doc.ml
@@ -82,7 +82,7 @@ let rec paths ~prefix ~sgn = function
| [] -> [ prefix ]
| _ ->
rev_concat
- @@ List.mapi
+ @@ ExtLib.List.mapi
(fun i arg ->
let prefix = Cache_name.memo (string_of_int i) :: prefix in
paths ~prefix ~sgn arg)
@@ -90,7 +90,7 @@ let rec paths ~prefix ~sgn = function
end
| Tuple args ->
rev_concat
- @@ List.mapi (fun i arg ->
+ @@ ExtLib.List.mapi (fun i arg ->
let prefix = Cache_name.memo (string_of_int i ^ "*") :: prefix in
paths ~prefix ~sgn arg)
@@ args
@@ -106,7 +106,7 @@ let rec type_paths ~prefix ~sgn = function
(type_paths ~prefix ~sgn b)
| Constr (name, args) ->
rev_concat
- @@ List.map (fun name ->
+ @@ ExtLib.List.map (fun name ->
let name = String.concat "." name in
let prefix = name :: Types.string_of_sgn sgn :: prefix in
begin
@@ -114,14 +114,14 @@ let rec type_paths ~prefix ~sgn = function
| [] -> [ prefix ]
| _ ->
rev_concat
- @@ List.mapi
+ @@ ExtLib.List.mapi
(fun i arg ->
let prefix = string_of_int i :: prefix in
type_paths ~prefix ~sgn arg)
args
end)
@@ all_type_names name
- | Tuple args -> rev_concat @@ List.map (type_paths ~prefix ~sgn) @@ args
+ | Tuple args -> rev_concat @@ ExtLib.List.map (type_paths ~prefix ~sgn) @@ args
| _ -> []
let save_item ~pkg ~path_list ~path name type_ doc =
@@ -162,10 +162,10 @@ let save_item ~pkg ~path_list ~path name type_ doc =
(Db.list_of_string (Odoc_model.Names.ValueName.to_string name))
('.' :: path_list)
in
- let my_full_name = List.map Char.lowercase_ascii my_full_name in
+ let my_full_name = ExtLib.List.map Char.lowercase_ascii my_full_name in
Db.store_name my_full_name str_type ;
let type_paths = type_paths ~prefix:[] ~sgn:Pos type_ in
- Db.store_all str_type (List.map (List.map Cache_name.memo) type_paths)
+ Db.store_all str_type (ExtLib.List.map (ExtLib.List.map Cache_name.memo) type_paths)
let rec item ~pkg ~path_list ~path =
let open Odoc_model.Lang in
@@ -233,7 +233,7 @@ let run ~odoc_directory (root_name, filename) =
| _ ->
invalid_arg (Printf.sprintf "not a valid package/version? %S" filename)
in
- Format.printf "%s %s => %s@." package version root_name ;
+ Printf.printf "%s %s => %s\n%!" package version root_name ;
let filename = Filename.concat odoc_directory filename in
let fpath = Result.get_ok @@ Fpath.of_string filename in
let t =
diff --git a/index/pretty.ml b/index/pretty.ml
index 1435357..e20772c 100644
--- a/index/pretty.ml
+++ b/index/pretty.ml
@@ -54,11 +54,11 @@ and string_of_element = function
and string_of_link_content lst =
H.span
- (List.map (fun r -> string_of_non_link r.Odoc_model.Location_.value) lst)
+ (ExtLib.List.map (fun r -> string_of_non_link r.Odoc_model.Location_.value) lst)
and string_of_paragraph lst =
H.span
- (List.map (fun elt -> string_of_element elt.Odoc_model.Location_.value) lst)
+ (ExtLib.List.map (fun elt -> string_of_element elt.Odoc_model.Location_.value) lst)
let string_of_doc = function
| `Paragraph p -> Some (H.p [ string_of_paragraph p ])
diff --git a/query/dune b/query/dune
index 0b3ee71..f2af0f7 100644
--- a/query/dune
+++ b/query/dune
@@ -1,6 +1,6 @@
(library
(name query)
- (libraries lwt re db))
+ (libraries extlib lwt re db))
(menhir
(modules parser)
diff --git a/query/query.ml b/query/query.ml
index caefcda..5a4885f 100644
--- a/query/query.ml
+++ b/query/query.ml
@@ -62,7 +62,7 @@ let find_inter ~shards names =
let db = shard.Storage.db in
let r =
sort @@ inter_list
- @@ List.map
+ @@ ExtLib.List.map
(fun (name, count) ->
collapse_count ~count
@@ collapse_trie_with_poly name
@@ -76,7 +76,7 @@ let find_inter ~shards names =
let find_names ~shards names =
let names =
- List.map
+ ExtLib.List.map
(fun n -> List.rev (Db.list_of_string (String.lowercase_ascii n)))
names
in
@@ -86,7 +86,7 @@ let find_names ~shards names =
let open Lwt.Syntax in
let+ () = Lwt.pause () in
let candidates =
- List.map
+ ExtLib.List.map
(fun name ->
let t = Tchar.find name db_names in
collapse_triechar t)
diff --git a/query/query_ast.ml b/query/query_ast.ml
index a53ca4d..9a747f1 100644
--- a/query/query_ast.ml
+++ b/query/query_ast.ml
@@ -22,7 +22,7 @@ let rec paths_arrow ~prefix ~sgn = function
| [] -> [ prefix ]
| _ ->
List.concat
- @@ List.mapi
+ @@ ExtLib.List.mapi
(fun i arg ->
let prefix = string_of_int i :: prefix in
paths_arrow ~prefix ~sgn arg)
@@ -30,7 +30,7 @@ let rec paths_arrow ~prefix ~sgn = function
end
| Tuple args ->
List.concat
- @@ List.mapi
+ @@ ExtLib.List.mapi
(fun i arg ->
let prefix = (string_of_int i ^ "*") :: prefix in
paths_arrow ~prefix ~sgn arg)
@@ -48,14 +48,14 @@ let rec paths ~prefix ~sgn = function
| [] -> [ prefix ]
| _ ->
List.concat
- @@ List.mapi
+ @@ ExtLib.List.mapi
(fun i arg ->
let prefix = string_of_int i :: prefix in
paths ~prefix ~sgn arg)
args
end
| Tuple args ->
- List.concat @@ List.map (fun arg -> paths ~prefix ~sgn arg) args
+ List.concat @@ ExtLib.List.map (fun arg -> paths ~prefix ~sgn arg) args
let rec show = function
| Arrow (a, b) -> show_parens a ^ " -> " ^ show b
diff --git a/query/sort.ml b/query/sort.ml
index 2256d81..e86d2b0 100644
--- a/query/sort.ml
+++ b/query/sort.ml
@@ -70,7 +70,7 @@ let minimize = function
let used = Array.make (List.length (List.hd arr)) false in
let arr =
Array.map (fun lst ->
- let lst = (1, None) :: List.mapi (fun i x -> x, Some i) lst in
+ let lst = (1, None) :: ExtLib.List.mapi (fun i x -> x, Some i) lst in
List.sort Stdlib.compare lst)
@@ Array.of_list arr
in
@@ -132,17 +132,17 @@ let score_type query_type paths =
| _, [] | [], _ -> 0
| _ ->
let arr =
- List.map
+ ExtLib.List.map
(fun p ->
let p = List.rev p in
- List.map (fun q -> distance (List.rev q) p) query_type)
+ ExtLib.List.map (fun q -> distance (List.rev q) p) query_type)
paths
in
minimize arr
let list query_name query_type results =
let results =
- List.map
+ ExtLib.List.map
(fun a ->
let open Elt in
let name_cost = score_name query_name a.name in
diff --git a/www/packages.ml b/www/packages.ml
index 8b2b97c..f2b071a 100644
--- a/www/packages.ml
+++ b/www/packages.ml
@@ -145,14 +145,14 @@ let html =
div
~a:[ a_class [ "categories" ] ]
(M.bindings packages
- |> List.map (fun (category, packages) ->
+ |> ExtLib.List.map (fun (category, packages) ->
div
~a:[ a_class [ "category" ] ]
[ h3 [ txt (if category = "" then "Not classified" else category) ]
; div
~a:[ a_class [ "packages" ] ]
(S.elements packages
- |> List.map (fun package ->
+ |> ExtLib.List.map (fun package ->
a
~a:
[ a_href ("https://ocaml.org/p/" ^ package.name)
diff --git a/www/ui.ml b/www/ui.ml
index 4e6cbd4..c799e55 100644
--- a/www/ui.ml
+++ b/www/ui.ml
@@ -33,7 +33,7 @@ let render ~pretty results =
~a:[ a_class [ "query" ] ]
[ txt "Results for "; code [ txt pretty ] ]
; ul ~a:[ a_class [ "found" ] ]
- @@ List.map (fun r -> li (render_result r)) results
+ @@ ExtLib.List.map (fun r -> li (render_result r)) results
]
let ajax_reload =
diff --git a/www/www.ml b/www/www.ml
index 37972a8..8e14bc5 100644
--- a/www/www.ml
+++ b/www/www.ml
@@ -37,7 +37,7 @@ let api query =
open Lwt.Syntax
-let get_query params = Option.value ~default:"" (Dream.query params "q")
+let get_query params = Stdlib.Option.value ~default:"" (Dream.query params "q")
let root ~query fn _params =
let* result = fn query in
Thanks a lot for debugging the issue and the fix! <3 I'm afraid there's a large function type in your lib that might cause slowdown/crashes on the website later, I'll try to reproduce soon :) (hopefully next week, I'm currently going through a tendinitis and must resist the urge to code)
... But I'm too happy that you tried to run it, and very curious: did you manage to start the website or are there other traps on the way? (PRs are welcome if you keep hacking! but also feature requests / complaints ;) )
I managed to start the website and the search is working nicely. There was no particular problem, the readme was enough. it is only internal code so far, not including the opam libs we depend on, 5.1G db to give you an idea of the scale. There is an instance now deployed internally, hopefully some colleagues will play with it next week.
thanks for the cool project. Will report more and try to contribute as usage increases.
Cool, good to know it worked!
Taking a look at this again, I believe the culprit was the outer map
in the following line, since the list type_paths
could grow exponentially on nested types: (while all the other lists should be reasonably small...)
Db.store_all str_type (List.map (List.map Cache_name.memo) type_paths)
The good news is that we don't have this exponential explosion anymore! But if I'm wrong, the bad news is that we delegate more work to odoc.search
now, so it might be harder to fix depending on what's causing the issue. If and when you update sherlodoc, could you let us know if you still encounter the problem? :)
(At least I could not reproduce while indexing the documentation of opam packages that can be found on ocaml.org)
The command-line has changed a bit:
# using your odig generated documentation for the current switch:
$ odig odoc
$ sherlodoc index --db=db.ancient $(find $OPAM_SWITCH_PREFIX/var/cache/odig/odoc -name '*.odocl')
# or if you would like links to ocaml.org, you'll need to prepare an `odocls.tsv` file
# with three tab-separated columns: package_name\tversion\t/path/to/odocl
$ cat odocls.tsv
ocaml 5.1.0 /home/arthur/.opam/5.1.0/var/cache/odig/odoc/ocaml/stdlib__Buffer.odocl
ocaml 5.1.0 /home/arthur/.opam/5.1.0/var/cache/odig/odoc/ocaml/stdlib__Gc.odocl
...
cmdliner 1.2.0 ../ocaml-org-sherlodoc/2024/p/cmdliner/1.2.0/epoch-6896ddd946045dabbf3db278cdd97651/linked/p/cmdliner/1.2.0/page-doc.odocl
cmdliner 1.2.0 ../ocaml-org-sherlodoc/2024/p/cmdliner/1.2.0/epoch-6896ddd946045dabbf3db278cdd97651/linked/p/cmdliner/1.2.0/1.2.0/lib/cmdliner/Cmdliner.odocl
...
$ sherlodoc index --db=db.ancient --file-list=odocls.tsv
(Since you have private libs, it would be cool if the links could point to your local odig documentation but ocaml.org is currently hard-coded hmhmm)
I was running the indexer on an internal project for which I can't share the code. There is on odocl file that is not super big, only 68k, but triggers a stack overflow in a
List.map
call. Given the appearance ofCache.Make.memo
later in the stacktrace, I suppose that it is from this snippet.https://github.com/art-w/sherlodoc/blob/8b9e648700edb3fbdc3b74bd8610dcae988d86cc/index/load_doc.ml#L83-L96
Replacing
List.map
withExtLib.List.map
(which is tailrec) fixes the issue.