df.groupBy { city }.meanFor { age and weight }
Input DataFrame: rowsCount = 7, columnsCount = 5
name | age | city | weight | isHappy |
---|
firstName | lastName | | | | |
---|
Alice | Cooper | 15 | London | 54 | true |
Bob | Dylan | 45 | Dubai | 87 | true |
Charlie | Daniels | 20 | Moscow | null | false |
Charlie | Chaplin | 40 | Milan | null | true |
Bob | Marley | 30 | Tokyo | 68 | true |
Alice | Wolf | 20 | null | 55 | false |
Charlie | Byrd | 30 | Moscow | 90 | true |
Step 1: GroupBy
Output DataFrame: rowsCount = 6, columnsCount = 3
city | age | weight |
---|
London | 15.0 | 54.0 |
Dubai | 45.0 | 87.0 |
Moscow | 25.0 | 90.0 |
Milan | 40.0 | NaN |
Tokyo | 30.0 | 68.0 |
null | 20.0 | 55.0 |
df.groupBy { city }.mean()
Input DataFrame: rowsCount = 7, columnsCount = 5
name | age | city | weight | isHappy |
---|
firstName | lastName | | | | |
---|
Alice | Cooper | 15 | London | 54 | true |
Bob | Dylan | 45 | Dubai | 87 | true |
Charlie | Daniels | 20 | Moscow | null | false |
Charlie | Chaplin | 40 | Milan | null | true |
Bob | Marley | 30 | Tokyo | 68 | true |
Alice | Wolf | 20 | null | 55 | false |
Charlie | Byrd | 30 | Moscow | 90 | true |
Step 1: GroupBy
Output DataFrame: rowsCount = 6, columnsCount = 3
city | age | weight |
---|
London | 15.0 | 54.0 |
Dubai | 45.0 | 87.0 |
Moscow | 25.0 | 90.0 |
Milan | 40.0 | NaN |
Tokyo | 30.0 | 68.0 |
null | 20.0 | 55.0 |