Closed nholland94 closed 1 year ago
In berkeley
, Hashtbl
functions:
$ git grep Hashtbl.iter
src/lib/crypto/kimchi_backend/common/plonk_constraint_system.ml: Hashtbl.iteri sys.equivalence_classes ~f:(fun ~key ~data ->
// (1) result placed in a set
src/lib/crypto/kimchi_backend/common/plonk_constraint_system.ml: Hashtbl.iter equivalence_classes ~f:(fun ps ->
// ***
src/lib/downloader/downloader.ml: Hashtbl.iter temporary_ignores ~f:(fun e ->
// (7) order doesn't matter here; this data structure only used internally; here it's only doing cleaning-ups
src/lib/downloader/downloader.ml: Hashtbl.iter knowledge ~f:Knowledge.clear ;
// (7) order doesn't matter here; this data structure only used internally; here it's only doing cleaning-ups
src/lib/downloader/downloader.ml: Hashtbl.iter t.knowledge ~f:(fun s ->
// (7) order doesn't matter here; this data structure is only used internally; here it's only doing cleaning-ups
src/lib/downloader/downloader.ml: Hashtbl.iter t.knowledge ~f:(fun s ->
// (7) order doesn't matter here; this data structure is only used internally; here it's only doing cleaning-ups
src/lib/downloader/downloader.ml: Hashtbl.iter downloading ~f:(fun (_, j, _) -> kill_job t j) ;
// (7) order doesn't matter here; this data structure is only used internally; here it's only doing cleaning-ups
src/lib/downloader/downloader.ml: Hashtbl.iter jobs ~f:(fun x ->
// (7) the order of things that we enqueue doesn't matter; this data structure is only used internally; here it's used for cleaning up
--
src/lib/ledger_catchup/super_catchup.ml: let iter_vertex (f : V.t -> unit) (t : t) = Hashtbl.iter t.nodes ~f
// (3) used in argument to Graphviz functor to enumerate vertice, edges, not part of protocol
src/lib/ledger_catchup/super_catchup.ml: Hashtbl.iter t.nodes ~f:(fun child ->
// (3) used in argument to Graphviz functor to enumerate vertice, edges, not part of protocol
--
src/lib/ledger_catchup/super_catchup.ml: Hashtbl.iter t.nodes ~f:(fun node ->
// (3) used in debug logs, not part of protocol
src/lib/ledger_catchup/super_catchup.ml: Hashtbl.iter t.nodes ~f:(fun node ->
// (3) used in debug logs, not part of protocol
--
src/lib/mina_net2/libp2p_helper.ml: Hashtbl.iter t.outstanding_requests ~f:(fun iv ->
// (7) fills Ivar for all outstanding requests, order doesn't seem important
--
src/lib/network_pool/snark_pool.ml: Hashtbl.iteri t.rebroadcastable ~f:(fun ~key ~data:(x, r) ->
// (1) used to construct another hash table
--
src/lib/o1trace/plugins.ml:let dispatch f = Hashtbl.iter plugins ~f
// (3) plugins for tracing, not part of protocol
--
src/lib/o1trace/thread.ml:let iter_threads ~f = Hashtbl.iter threads ~f
// (3) for tracing, not part of protocol
--
src/lib/rocksdb/database.ml: Hashtbl.iteri db_hashtbl ~f:(fun ~key ~data ->
// (3) in unit test to set values in db, not part of protocol
--
src/lib/transition_frontier/full_catchup_tree.ml: Hashtbl.iter nodes ~f:(fun x ->
// (7) in `tear_down`, fills `Ivar` for some nodes, order doesn't seem important
--
src/lib/transition_frontier/full_frontier/full_frontier.ml:let iter t ~f = Hashtbl.iter t.table ~f:(fun n -> f n.breadcrumb)
// (8) Full_frontier.iter required by interface, not called anywhere
src/lib/transition_frontier/full_frontier/full_frontier.ml: Hashtbl.iter t.table ~f:(fun node ->
// (1, 8) used to build another hash table used only for metrics
--
src/lib/transition_handler/catchup_scheduler.ml: Hashtbl.iter collected_transitions
// (7) internal cleanup, order doesn't seem important
src/lib/transition_handler/catchup_scheduler.ml: Hashtbl.iter parent_root_timeouts ~f:(fun timeout ->
// (7) internal cleanup, order doesn't seem important
--
$ git grep Hashtbl.to_alist
--
src/lib/consensus/proof_of_stake.ml: ( Hashtbl.to_alist delegatee_table
// (4) JSON output, not part of protocol
src/lib/consensus/proof_of_stake.ml: ( Hashtbl.to_alist delegators
// (4) JSON output, not part of protocol
src/lib/consensus/proof_of_stake.ml: |> Option.value_map ~f:Hashtbl.to_alist ~default:[]
// @nholland94 says OK
--
src/lib/downloader/downloader.ml: (List.map (Hashtbl.to_alist knowledge) ~f:(fun (p, s) ->
// (4) used for logging
src/lib/downloader/downloader.ml: (List.map (Hashtbl.to_alist t.downloading)
// (4) used for logging
src/lib/ledger_catchup/super_catchup.ml: (List.map (Hashtbl.to_alist s) ~f:(fun (k, v) ->
// (4) used for logging
src/lib/ledger_catchup/super_catchup.ml: (List.map (Hashtbl.to_alist s) ~f:(fun (k, v) ->
// (4) used for logging
--
src/lib/mina_commands/mina_commands.ml: (List.map (Hashtbl.to_alist full.states) ~f:(fun (state, hashes) ->
// (3) catchup state in daemon status, not part of protocol
--
src/lib/network_pool/snark_pool.ml: let p t = (Hashtbl.to_alist t.all, Hashtbl.to_alist t.rebroadcastable) in
// fixed in #13511
src/lib/network_pool/snark_pool.ml: Hashtbl.to_alist t.snark_tables.rebroadcastable
// code no longer there
--
src/lib/network_pool/transaction_pool.ml: Hashtbl.to_alist t.locally_generated_uncommitted
// (2) result is sorted
--
src/lib/rocksdb/database.ml: (Hashtbl.to_alist db_hashtbl)
// (2) in unit test, values are sorted anyway, not part of protocol
src/lib/rocksdb/database.ml: (Hashtbl.to_alist cp_hashtbl)
// (2) in unit test, values are sorted anyway, not part of protocol
---
src/lib/staged_ledger_diff/bitswap_block.ml: ( Blake2.Map.of_alist_exn (Hashtbl.to_alist blocks)
// (1) used to build a map, order unimportant
--
src/lib/transaction_snark_scan_state/transaction_snark_scan_state.ml: (List.map (Hashtbl.to_alist t) ~f:(fun (k, info) ->
// (4) logging
--
src/lib/transition_frontier/full_catchup_tree.ml: @@ List.map (Hashtbl.to_alist t.states) ~f:(fun (state, hashes) ->
// (4) for JSON
--
$ git grep Hashtbl.fold
src/lib/downloader/downloader.ml: @ Hashtbl.fold t.knowledge ~init:[] ~f:(fun ~key:p ~data:k acc ->
// (6) There's no preferred order for this collection; the result is only used inside this module
--
src/lib/gossip_net/fake.ml: Hashtbl.fold t.nodes ~init:Deferred.unit
// (8) fake network, not part of protocol
--
src/lib/merkle_mask/masking_merkle_tree.ml: Hashtbl.fold t.token_owners ~init:Account_id.Set.empty
// (1) used to create a set, order irrelevant
--
src/lib/transition_frontier/catchup_hash_tree.ml: Hashtbl.fold t.nodes ~init:[] ~f:(fun ~key ~data acc ->
// (7) used to build a list which is iterated over
---
src/lib/transition_frontier/full_catchup_tree.ml: Hashtbl.fold t.states ~init ~f:(fun ~key ~data acc ->
// (3, 8) for node status report
src/lib/transition_frontier/full_catchup_tree.ml: Hashtbl.fold t.nodes ~init:0 ~f:(fun ~key:_ ~data acc ->
// (7) finding a max element
src/lib/transition_frontier/full_catchup_tree.ml: Hashtbl.fold t.nodes ~init:[] ~f:(fun ~key:_ ~data acc ->
// (7) builds a list for node for removal, order not important
--
src/lib/transition_frontier/full_frontier/full_frontier.ml: let fold t ~f = Hashtbl.fold t.table ~f:(fun ~key:_ ~data -> f data)
// (8) for visualization
--
$ git grep Hashtbl.keys
src/app/best_tip_merger/best_tip_merger.ml: List.iter (Hashtbl.keys acc'.init_states) ~f:(fun root ->
// (8) an app, not part of protocol
src/app/cli/src/init/mina_run.ml: Hashtbl.keys child_pids
// (8) kills child processes on crash
src/lib/cache_lib/impl.ml: let to_list t = Hashtbl.keys t.set
// (8) Cache_lib.to_list needed for interface, apparently not used
src/lib/downloader/downloader.ml: , list (List.map ~f:Peer.to_yojson (Hashtbl.keys temporary_ignores))
// (4) used for logs
src/lib/mina_lib/mina_subscriptions.ml: @@ List.filter_opt (Hashtbl.keys subscribed_block_users)
// (1) creates set from list
--
src/lib/transition_frontier/catchup_hash_tree.ml: List.iter (Hashtbl.keys t.nodes) ~f:(fun h ->
// (7) used for pruning, order doesn't seem relevant
--
$ git grep Hashtbl.data
src/app/best_tip_merger/best_tip_merger.ml: List.fold (Hashtbl.data input.init_states) ~init:State_hash.Map.empty
// (8) app, not protocol
src/lib/consensus/proof_of_stake_fuzzer.ml: Option.map (Hashtbl.find dist slot) ~f:Hashtbl.data
// (8), in fuzzer, code is commented out
src/lib/downloader/downloader.ml: @ List.map (Hashtbl.data t.downloading) ~f:(fun (_, j, _) -> j)
// (6) there's no preferred order for this collection, it's only used as an aggregation of things; the client code also didn't depend on order
--
src/lib/gossip_net/fake.ml: Hashtbl.data nodes |> List.concat
// (8) not part of protocol
src/lib/gossip_net/fake.ml: let peers { peer_table; _ } = Hashtbl.data peer_table |> Deferred.return
// (8) not part of protocol
src/lib/gossip_net/fake.ml: Hashtbl.data t.peer_table
// (8) not part of protocol
--
src/lib/mina_net2/mina_net2.ml: Hashtbl.data t.subscriptions
// (7) argument to `List.exists`, so order not important
--
src/lib/transition_frontier/full_frontier/full_frontier.ml: List.map (Hashtbl.data t.table) ~f:(fun { breadcrumb; _ } -> breadcrumb)
// (6) `all_breadcrumbs` clients don't depend on order
Table
functions:
$ git grep Table.iter
--
src/lib/perf_histograms/perf_histograms0.ml: String.Table.iter t ~f:(fun tbl -> Histogram.Exp_time_spans.clear tbl)
// (7) called in `wipe` to clear all spans, order not important
--
src/lib/pipe_lib/broadcast_pipe.ml: Int.Table.iter t.pipes ~f:(fun w -> Pipe.close w) ;
// (7) closing write pipes in some order, shouldn't be consequential
--
$ git grep Table.to_alist
--
src/lib/allocation_functor/table.ml: String.Table.to_alist table
// (3) allocation data, order not part of protocol
--
src/lib/consensus/proof_of_stake.ml: ( Public_key.Compressed.Table.to_alist
// (1) in unused _seen_slot function; use to build a set, so order unimportant
src/lib/consensus/proof_of_stake.ml: Table.to_alist !t.last_checked_slot_and_epoch
// (4) used to construct JSON, not part of protocol
--
src/lib/distributed_dsl/node.ml: let checks = Condition_label.Table.to_alist t.conditions in
// (8) library not used
src/lib/distributed_dsl/node.ml: let checks = Message_label.Table.to_alist t.message_handlers in
// (8) library not used
--
src/lib/key_value_database/key_value_database.ml: Key.Table.to_alist t
// (3) only used in `Make_mock` functor, used in trust system unit test
src/lib/key_value_database/key_value_database.ml: let to_alist = Key.Table.to_alist
// (3) only used in `Make_mock` functor, used in trust system unit test
// unlike `Ledger.to_alist`, which is in ledger-leaf order
--
src/lib/merkle_ledger_tests/test_stubs.ml: let unsorted = Bigstring_frozen.Table.to_alist t.table in
// (2) result is sorted
src/lib/merkle_ledger_tests/test_stubs.ml: @@ Bigstring_frozen.Table.to_alist t.table
// (1) result is used to build a new table (probably this should just be `Table.copy` -- PR #13511)
--
src/lib/merkle_mask/masking_merkle_tree.ml: let account_data = Location_binable.Table.to_alist t.account_tbl in
// (7) sets accounts in mask parent at specified locations, order should not matter
src/lib/merkle_mask/masking_merkle_tree.ml: Location_binable.Table.to_alist t.account_tbl
// (1) used to get accounts, which is turned into a set, so order irrelevant
--
src/lib/snark_profiler_lib/snark_profiler_lib.ml: ( Transaction_key.Table.to_alist transaction_combinations
// (3) snark profiler, not protocol, result is sorted in any case
--
src/lib/transition_frontier/catchup_hash_tree.ml: (List.map (State_hash.Table.to_alist t) ~f:(fun (h, x) ->
// (4) for JSON
--
src/lib/vrf_evaluator/vrf_evaluator.ml: Table.to_alist last_checked_slot_and_epoch
// (1, 7) List is filtered, then used to build a set
--
$ git grep Table.fold
src/lib/consensus/proof_of_stake.ml: Public_key.Compressed.Table.fold outer_table ~init:0
// (7) adding numbers, order not important
--
src/lib/merkle_mask/maskable_merkle_tree.ml: Uuid.Table.fold uuid_to_masks_table ~init:empty
// (3) builds Graphviz graph, not part of protocol
--
$ git grep Table.keys
src/app/cli/src/init/client.ml: Token_id.Table.keys currency_tbl
// (2) tokens are dedup'ed and sorted
src/app/genesis_ledger_from_tsv/genesis_ledger_from_tsv.ml: let delegates = String.Table.keys delegates_tbl in
// (8) not part of protocol, app no longer used
--
src/app/replayer/replayer.ml: let slots = Int64.Table.keys global_slot_hashes_tbl in
// (7) to find least element, order not important
--
src/lib/consensus/proof_of_stake.ml: Public_key.Compressed.Table.keys !t.Data.last_checked_slot_and_epoch
// (1) converted to a set
--
src/lib/distributed_dsl/distributed_dsl.ml: (Char.Table.keys table) () ;
// (8) library not used
--
src/lib/key_value_database/key_value_database.ml: let keys = Key.Table.keys t in
// (3) in `random_element` in `Make_mock`, not part of protocol
--
src/lib/merkle_mask/masking_merkle_tree.ml: Account_id.Table.keys t.location_tbl
// (1) used to create a set, ordering unimportant
--
src/lib/mina_generators/zkapp_command_generators.ml: List.iter (Account_id.Table.keys account_state_tbl) ~f:(fun id ->
// (3) in Quickcheck generator, not in protocol
src/lib/mina_generators/zkapp_command_generators.ml: |> Account_id.Table.keys
// (3) in Quickcheck generator, not in protocol
--
src/lib/secrets/wallets.ml:let pks ({ cache; _ } : t) = Public_key.Compressed.Table.keys cache
// (1, 3, 5) in unit tests, always converted to set; also appears in `Mina_subscriptions`, where it's used to populate a table, so order unimportant; and in `Mina_graphql`, where ordering used for "tracked accounts", but that's not relevant to protocol
--
$ git grep Table.data
--
src/lib/bootstrap_controller/transition_cache.ml: let collected_transitions = State_hash.Table.data t |> List.concat in
// (1, 2 ) in bootstrap controller, result is filtered and sorted; a test builds a set from the result
--
src/lib/distributed_dsl/distributed_dsl.ml: (List.map (Identifier.Table.data t.nodes) ~f:(fun n ->
// (8) library not used
src/lib/distributed_dsl/distributed_dsl.ml: List.fold (Identifier.Table.data t.nodes) ~init:None
// (8) library not used
src/lib/distributed_dsl/distributed_dsl.ml: List.find (Identifier.Table.data t.nodes) ~f:MyNode.is_ready
// (8) library not used
--
src/lib/ledger_catchup/super_catchup.ml: State_hash.Table.data catchup_tree.nodes
// (3) used in unit test
src/lib/ledger_catchup/super_catchup.ml: State_hash.Table.data catchup_tree.nodes
// (3) was part of a unit test; but now it is is commented
src/lib/ledger_catchup/super_catchup.ml: State_hash.Table.data catchup_tree.nodes
// (3) was part of a unit test; but now it is commented
src/lib/ledger_catchup/super_catchup.ml: State_hash.Table.data catchup_tree.nodes
// (3) was part of a unit test; but now it is commented
--
src/lib/merkle_mask/maskable_merkle_tree.ml: let masks = List.concat @@ Uuid.Table.data registered_masks in
// (1) used to build new table
src/lib/merkle_mask/maskable_merkle_tree.ml: List.iter (Uuid.Table.data registered_masks) ~f:(fun ms ->
// (7) iterating over all registered masks to see if mask already registered, order not important
---
src/lib/merkle_mask/masking_merkle_tree.ml: Location_binable.Table.data t.account_tbl
// (8) self_find_all_accounts, appears to be dead code -- PR #13511
--
src/lib/mina_generators/zkapp_command_generators.ml: |> Account_id.Table.data
// (3) used to create a QC generator for account ids from list, not in protocol
src/lib/mina_generators/zkapp_command_generators.ml: Account_id.Table.data account_state_tbl
// (3) used to create a QC generator for public keys from list, not in protocol
--
src/lib/mina_graphql/mina_graphql.ml: Account_id.Table.data account_state_tbl
// (8) for ITN scheduler, used to get a length
--
src/lib/mina_lib/mina_lib.ml: ~default:[] ~f:Mina_base.Account.Index.Table.data
// (5) in `find_delegators`, used for GraphQL results `current_epoch_delegators` and `last_epoch_delegators`, not part of protocol
--
src/lib/pipe_lib/broadcast_pipe.ml: let inner_pipes = Int.Table.data t.pipes in
// (7) list of pipes to send broadcast over in non-deterministic order, so list order not important
--
Hash_set
functions:
$ git grep Hash_set.iter
src/lib/downloader/downloader.ml: Hash_set.iter peers ~f:(fun p ->
// (1) we iter through the collection and then add the selected result to another hashtbl
--
src/lib/integration_test_lib/gossip_state.ml: List.iter xs ~f:(fun s -> Hash_set.iter s ~f:(Hash_set.add res)) ;
// (1) adds all elements to hash set, order not important
--
$ git grep Hash_set.to_list
src/lib/crypto/kimchi_backend/common/plonk_constraint_system.ml: let ps = Hash_set.to_list ps in
// any possible issue fixed in #13592
--
src/lib/downloader/downloader.ml: let to_yojson t = `List (List.map (Hash_set.to_list t) ~f:Key.to_yojson)
// (4) used for logging
src/lib/downloader/downloader.ml: (List.map ~f:Peer.to_yojson (Hash_set.to_list downloading_peers))
// (4) used for logging
src/lib/downloader/downloader.ml: (Hash_set.to_list knowledge_requesting_peers) ) )
// (4) used for logging
src/lib/downloader/downloader.ml: (succ, Hash_set.to_list all)
// (1) the list is only consumed in 1 place where it's added a hash_set
--
src/lib/gossip_net/libp2p.ml: (Hash_set.to_list added_seeds)
// (2) list is part of another list that's dedup'ed and sorted
--
src/lib/transition_frontier/catchup_hash_tree.ml: (List.map (Hash_set.to_list t) ~f:(fun x ->
// (4) for JSON
src/lib/transition_frontier/catchup_hash_tree.ml: `List (List.map (Hash_set.to_list t) ~f:State_hash.to_yojson)
// (4) for JSON
--
$ git grep Hash_set.fold
--
src/lib/transition_frontier/catchup_hash_tree.ml: Hash_set.fold t.tips ~init:0 ~f:(fun acc tip ->
// (7) computes a max integer over tips in set, order not important
--
Hash_queue
functions:
$ git grep Hash_queue.fold
--
src/lib/network_pool/rate_limiter.ml: Ip.Hash_queue.foldi by_ip.table ~init:[] ~f:(fun acc ~key ~data ->
// (4) used to generate JSON summary, not part of protocol
src/lib/network_pool/rate_limiter.ml: Peer_id.Hash_queue.foldi by_peer_id.table ~init:[]
// (4) used to generate JSON summary, not part of protocol
--
DONE
Categories of acceptable uses:
Viable Systems recently raised an issue regarding OCaml hashtables having non-deterministic ordering of elements (https://github.com/MinaProtocol/mina/issues/13433). Hash tables in OCaml will iterate over elements in different orders dependent on the initial allocation size of the hash table and insertion order of the elements.
We need to audit our usage of hash tables in the code and make sure that we are not relying on the iteration order of elements in the table for any code which generates values that are stored in types which can be hashed or serialized.