cardillo / joinery

Data frames for Java
https://joinery.sh
GNU General Public License v3.0
700 stars 166 forks source link

How does "Join" work? Could you give me a good example? #71

Open SergeyRasin opened 6 years ago

SergeyRasin commented 6 years ago

Hello!

I'm trying to use "join", but I get an error (generated key is not unique). It looks like I'm doing something wrong. Is it possible to join two "DateFrame" if their keys contain a different set of fields?

Here is my example:

import joinery.; import java.text.; import java.util.*;

public class Main {

public static void main(String[] args) throws Exception {

    Date dtDate = (new SimpleDateFormat("yyyy-MM-dd")).parse("2018-01-03");

    final DataFrame<Object> df = new DataFrame<Object>(
            Collections.emptyList(),
            Arrays.asList("name", "date", "value"),
            Arrays.asList(
                    Arrays.asList("MOSENERG", "MOSENERG", "LENENERG", "LENENERG", "TOMSKENER", "TOMSKENER", "ORELENERG", "ORELENERG", "NOVOSIBEN", "NOVOSIBEN", "CHELENERG"),
                    Arrays.asList("2018-01-01", "2018-01-02", "2018-01-01", "2018-01-02", "2018-01-01", "2018-01-02", "2018-01-01", "2018-01-02", "2018-01-01", "2018-01-02", "2018-01-03"),
                    Arrays.asList(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 1)
            )
    ).convert(null, Date.class, Number.class)
     .reindex(Arrays.asList("name", "date").toArray(), false)
     ;

    System.out.println(df);

    final DataFrame df1 = df
            .select((values) -> ((double) values.get(2) <= 5) && !Arrays.asList("CHELENERG").contains(values.get(0)) && ((Date) values.get(1)).compareTo(dtDate) < 0)
            .add("price", (values) -> Math.random())
            .add("amount", (values) -> (double) values.get(2)*(double) values.get(3))
            .rename("value", "volume")
            .groupBy("name")
            .sum()
            .sortBy("name")
            ;

    System.out.println(df1);

    System.out.println(df.joinOn(df1, "name"));

}

}