df.groupBy { city }.max()
Input DataFrame: rowsCount = 7, columnsCount = 5
name | age | city | weight | isHappy |
---|
firstName | lastName | | | | |
---|
Alice | Cooper | 15 | London | 54 | true |
Bob | Dylan | 45 | Dubai | 87 | true |
Charlie | Daniels | 20 | Moscow | null | false |
Charlie | Chaplin | 40 | Milan | null | true |
Bob | Marley | 30 | Tokyo | 68 | true |
Alice | Wolf | 20 | null | 55 | false |
Charlie | Byrd | 30 | Moscow | 90 | true |
Step 1: GroupBy
Output DataFrame: rowsCount = 6, columnsCount = 4
city | age | weight | isHappy |
---|
London | 15 | 54 | true |
Dubai | 45 | 87 | true |
Moscow | 30 | 90 | true |
Milan | 40 | null | true |
Tokyo | 30 | 68 | true |
null | 20 | 55 | false |
df.groupBy { city }.mean()
Input DataFrame: rowsCount = 7, columnsCount = 5
name | age | city | weight | isHappy |
---|
firstName | lastName | | | | |
---|
Alice | Cooper | 15 | London | 54 | true |
Bob | Dylan | 45 | Dubai | 87 | true |
Charlie | Daniels | 20 | Moscow | null | false |
Charlie | Chaplin | 40 | Milan | null | true |
Bob | Marley | 30 | Tokyo | 68 | true |
Alice | Wolf | 20 | null | 55 | false |
Charlie | Byrd | 30 | Moscow | 90 | true |
Step 1: GroupBy
Output DataFrame: rowsCount = 6, columnsCount = 3
city | age | weight |
---|
London | 15.0 | 54.0 |
Dubai | 45.0 | 87.0 |
Moscow | 25.0 | 90.0 |
Milan | 40.0 | NaN |
Tokyo | 30.0 | 68.0 |
null | 20.0 | 55.0 |
df.groupBy { city }.max { age }
Input DataFrame: rowsCount = 7, columnsCount = 5
name | age | city | weight | isHappy |
---|
firstName | lastName | | | | |
---|
Alice | Cooper | 15 | London | 54 | true |
Bob | Dylan | 45 | Dubai | 87 | true |
Charlie | Daniels | 20 | Moscow | null | false |
Charlie | Chaplin | 40 | Milan | null | true |
Bob | Marley | 30 | Tokyo | 68 | true |
Alice | Wolf | 20 | null | 55 | false |
Charlie | Byrd | 30 | Moscow | 90 | true |
Step 1: GroupBy
Output DataFrame: rowsCount = 6, columnsCount = 2
city | age |
---|
London | 15 |
Dubai | 45 |
Moscow | 30 |
Milan | 40 |
Tokyo | 30 |
null | 20 |
df.groupBy { city }.sum("total weight") { weight }
Input DataFrame: rowsCount = 7, columnsCount = 5
name | age | city | weight | isHappy |
---|
firstName | lastName | | | | |
---|
Alice | Cooper | 15 | London | 54 | true |
Bob | Dylan | 45 | Dubai | 87 | true |
Charlie | Daniels | 20 | Moscow | null | false |
Charlie | Chaplin | 40 | Milan | null | true |
Bob | Marley | 30 | Tokyo | 68 | true |
Alice | Wolf | 20 | null | 55 | false |
Charlie | Byrd | 30 | Moscow | 90 | true |
Step 1: GroupBy
Output DataFrame: rowsCount = 6, columnsCount = 2
city | total weight |
---|
London | 54 |
Dubai | 87 |
Moscow | 90 |
Milan | 0 |
Tokyo | 68 |
null | 55 |
df.groupBy { city }.count()
Input DataFrame: rowsCount = 7, columnsCount = 5
name | age | city | weight | isHappy |
---|
firstName | lastName | | | | |
---|
Alice | Cooper | 15 | London | 54 | true |
Bob | Dylan | 45 | Dubai | 87 | true |
Charlie | Daniels | 20 | Moscow | null | false |
Charlie | Chaplin | 40 | Milan | null | true |
Bob | Marley | 30 | Tokyo | 68 | true |
Alice | Wolf | 20 | null | 55 | false |
Charlie | Byrd | 30 | Moscow | 90 | true |
Step 1: GroupBy
Output DataFrame: rowsCount = 6, columnsCount = 2
city | count |
---|
London | 1 |
Dubai | 1 |
Moscow | 2 |
Milan | 1 |
Tokyo | 1 |
null | 1 |
df.groupBy { city }.max { name.firstName.length() and name.lastName.length() }
Input DataFrame: rowsCount = 7, columnsCount = 5
name | age | city | weight | isHappy |
---|
firstName | lastName | | | | |
---|
Alice | Cooper | 15 | London | 54 | true |
Bob | Dylan | 45 | Dubai | 87 | true |
Charlie | Daniels | 20 | Moscow | null | false |
Charlie | Chaplin | 40 | Milan | null | true |
Bob | Marley | 30 | Tokyo | 68 | true |
Alice | Wolf | 20 | null | 55 | false |
Charlie | Byrd | 30 | Moscow | 90 | true |
Step 1: GroupBy
Output DataFrame: rowsCount = 6, columnsCount = 2
city | max |
---|
London | 6 |
Dubai | 5 |
Moscow | 7 |
Milan | 7 |
Tokyo | 6 |
null | 5 |
df.groupBy { city }.medianFor { age and weight }
Input DataFrame: rowsCount = 7, columnsCount = 5
name | age | city | weight | isHappy |
---|
firstName | lastName | | | | |
---|
Alice | Cooper | 15 | London | 54 | true |
Bob | Dylan | 45 | Dubai | 87 | true |
Charlie | Daniels | 20 | Moscow | null | false |
Charlie | Chaplin | 40 | Milan | null | true |
Bob | Marley | 30 | Tokyo | 68 | true |
Alice | Wolf | 20 | null | 55 | false |
Charlie | Byrd | 30 | Moscow | 90 | true |
Step 1: GroupBy
Output DataFrame: rowsCount = 6, columnsCount = 3
city | age | weight |
---|
London | 15.0 | 54.0 |
Dubai | 45.0 | 87.0 |
Moscow | 25.0 | 90.0 |
Milan | 40.0 | null |
Tokyo | 30.0 | 68.0 |
null | 20.0 | 55.0 |
df.groupBy { city }.minFor { (age into "min age") and (weight into "min weight") }
Input DataFrame: rowsCount = 7, columnsCount = 5
name | age | city | weight | isHappy |
---|
firstName | lastName | | | | |
---|
Alice | Cooper | 15 | London | 54 | true |
Bob | Dylan | 45 | Dubai | 87 | true |
Charlie | Daniels | 20 | Moscow | null | false |
Charlie | Chaplin | 40 | Milan | null | true |
Bob | Marley | 30 | Tokyo | 68 | true |
Alice | Wolf | 20 | null | 55 | false |
Charlie | Byrd | 30 | Moscow | 90 | true |
Step 1: GroupBy
Output DataFrame: rowsCount = 6, columnsCount = 3
city | min age | min weight |
---|
London | 15 | 54 |
Dubai | 45 | 87 |
Moscow | 20 | 90 |
Milan | 40 | null |
Tokyo | 30 | 68 |
null | 20 | 55 |
df.groupBy { city }.meanOf("mean ratio") { weight?.div(age) }
Input DataFrame: rowsCount = 7, columnsCount = 5
name | age | city | weight | isHappy |
---|
firstName | lastName | | | | |
---|
Alice | Cooper | 15 | London | 54 | true |
Bob | Dylan | 45 | Dubai | 87 | true |
Charlie | Daniels | 20 | Moscow | null | false |
Charlie | Chaplin | 40 | Milan | null | true |
Bob | Marley | 30 | Tokyo | 68 | true |
Alice | Wolf | 20 | null | 55 | false |
Charlie | Byrd | 30 | Moscow | 90 | true |
Step 1: GroupBy
Output DataFrame: rowsCount = 6, columnsCount = 2
city | mean ratio |
---|
London | 3.0 |
Dubai | 1.0 |
Moscow | 3.0 |
Milan | NaN |
Tokyo | 2.0 |
null | 2.0 |