Open ingted opened 1 year ago
The result:
val data: string = "A;B
a;1
b;2
b;2
c;3"
val bytes: byte array =
[|65uy; 59uy; 66uy; 10uy; 97uy; 59uy; 49uy; 10uy; 98uy; 59uy; 50uy; 10uy;
98uy; 59uy; 50uy; 10uy; 99uy; 59uy; 51uy|]
val stream: IO.MemoryStream
val df: Frame<int,string> =
A B
0 -> a 1
1 -> b 2
2 -> b 2
3 -> c 3
4 rows x 2 columns
0 missing values
val it: Frame<int,string> =
A B
0 -> a 1
1 -> b 2
3 -> c 3
3 rows x 2 columns
0 missing values
module Frame =
let inline distinctFrame (keys: 'C seq) (distColId:'C) (frame: Frame<'R, 'C>) =
let idxSource =
frame
|> Frame.mapRows (fun (i:'R) r ->
i, keys |> Seq.map r.TryGet |> Seq.toArray
)
|> fun s -> s.Values
let idx =
idxSource
|> Seq.groupBy (fun (_, g) -> g)
|> Seq.map (fun g ->
let (idx, _) = snd g |> Seq.item 0
idx
)
|> Seq.distinct
|> fun s -> Frame([distColId], [Series(s, s)])//"____distinctIdx____"
let fmj = frame.Join(idx, kind=JoinKind.Inner)
fmj.Columns.[fmj.ColumnKeys |> Seq.filter (fun v -> v <> distColId)]
let inline orderBy (mappingOrderKeys: ObjectSeries<'C> -> int) (distColId:'C) (frame: Frame<'R, 'C>) =
let idxSource =
frame
|> Frame.mapRows (fun (i:'R) r ->
mappingOrderKeys r
)
|> fun s -> Frame([distColId], [s])//"____distinctIdx____"
frame.Join(idxSource, kind=JoinKind.Inner)
|> Frame.sortRows distColId
let inline orderByCols (keys: 'C seq) (distColId:'C) (frame: Frame<'R, 'C>) =
let colValues =
frame
|> Frame.mapRowValues (fun row ->
let values =
keys
|> Seq.map (fun k ->
row.[k]
)
|> Seq.toArray
values |> Array.map (fun v -> v :?> IComparable)
)
let orderKeys =
colValues.Values
|> Seq.distinct
|> Seq.sort
|> Seq.mapi (fun i v -> v, i)
|> Map
let ordered =
colValues
|> Series.map (fun c v -> orderKeys.[v])
let appendCol =
Frame.ofColumns [distColId, ordered]
frame.Join(appendCol, kind=JoinKind.Inner)
|> Frame.sortRows distColId
orderBy & orderByCols functionality
https://stackoverflow.com/questions/70985428/deedle-distinct-by-column/75897557#75897557
We sometimes do "select distinct col1, col2, col3 from xxx" in RDBMS, and it seems like we don't have an easy to use API in Deedle?
How about this?