This is a generic function for joining two datasets to each other. It's equivalent to an equijoin, for performance it utitlizes the Sort-merge join algorithm.
This gives it a O(|a| log(|a|) + |b| log(|b|)) complexity profile.
I've benchmarked it compared with the "obvious" implementation, which is
joinOn selectFn aKeyFn bKeyFn aList bList =
List.concatMap (\a ->
List.filterMap(\b ->
if aKeyFn a == bKeyFn b then
Just (selectFn a b)
else
Nothing
) bList
) aList
This implementation has the complexity of O(|a| * |b|).
This can be plainly seen in the benchmark results for the comparatively small case of 2 lists with 100 elements each:
Benchmarking code here
```elm
module Benchmarking exposing (main)
import Benchmark exposing (Benchmark)
import Benchmark.Runner exposing (BenchmarkProgram, program)
import List.Extra exposing (joinOn)
main : BenchmarkProgram
main =
program suite
n : Int
n =
100
suite : Benchmark
suite =
Benchmark.compare "simple vs optimized" "Simple" (\_ -> join select .d .d aList bList) "Optimized" (\_ -> joinOn select .d .d aList bList)
join selectFn aKeyFn bKeyFn aLst bLst =
List.concatMap
(\a ->
List.filterMap
(\b ->
if aKeyFn a == bKeyFn b then
Just (selectFn a b)
else
Nothing
)
bLst
)
aLst
aList =
List.range 1 n
|> List.map
(\i ->
{ a = modBy 5 i
, b = modBy 2 i
, c = modBy 3 i
, d = modBy 7 i
, e = "Foo-" ++ String.fromInt i
}
)
bList =
List.range 1 n
|> List.map
(\i ->
{ a = modBy 5 i
, b = modBy 2 i
, c = modBy 3 i
, d = modBy 13 i
, e = "Foo-" ++ String.fromInt i
}
)
select a b =
{ aa = a.a
, ab = a.b
, ac = a.c
, ad = a.d
, ae = a.e
, ba = b.a
, bb = b.b
, bc = b.c
, bd = b.d
, be = b.e
}
```
This is a generic function for joining two datasets to each other. It's equivalent to an equijoin, for performance it utitlizes the Sort-merge join algorithm.
This gives it a O(|a| log(|a|) + |b| log(|b|)) complexity profile.
I've benchmarked it compared with the "obvious" implementation, which is
This implementation has the complexity of O(|a| * |b|).
This can be plainly seen in the benchmark results for the comparatively small case of 2 lists with 100 elements each:
Benchmarking code here
```elm module Benchmarking exposing (main) import Benchmark exposing (Benchmark) import Benchmark.Runner exposing (BenchmarkProgram, program) import List.Extra exposing (joinOn) main : BenchmarkProgram main = program suite n : Int n = 100 suite : Benchmark suite = Benchmark.compare "simple vs optimized" "Simple" (\_ -> join select .d .d aList bList) "Optimized" (\_ -> joinOn select .d .d aList bList) join selectFn aKeyFn bKeyFn aLst bLst = List.concatMap (\a -> List.filterMap (\b -> if aKeyFn a == bKeyFn b then Just (selectFn a b) else Nothing ) bLst ) aLst aList = List.range 1 n |> List.map (\i -> { a = modBy 5 i , b = modBy 2 i , c = modBy 3 i , d = modBy 7 i , e = "Foo-" ++ String.fromInt i } ) bList = List.range 1 n |> List.map (\i -> { a = modBy 5 i , b = modBy 2 i , c = modBy 3 i , d = modBy 13 i , e = "Foo-" ++ String.fromInt i } ) select a b = { aa = a.a , ab = a.b , ac = a.c , ad = a.d , ae = a.e , ba = b.a , bb = b.b , bc = b.c , bd = b.d , be = b.e } ```