df.distinct { age and name }
Input DataFrame: rowsCount = 7, columnsCount = 5
name | age | city | weight | isHappy |
---|
firstName | lastName | | | | |
---|
Alice | Cooper | 15 | London | 54 | true |
Bob | Dylan | 45 | Dubai | 87 | true |
Charlie | Daniels | 20 | Moscow | null | false |
Charlie | Chaplin | 40 | Milan | null | true |
Bob | Marley | 30 | Tokyo | 68 | true |
Alice | Wolf | 20 | null | 55 | false |
Charlie | Byrd | 30 | Moscow | 90 | true |
Output DataFrame: rowsCount = 7, columnsCount = 2
age | name |
---|
| firstName | lastName |
---|
15 | Alice | Cooper |
45 | Bob | Dylan |
20 | Charlie | Daniels |
40 | Charlie | Chaplin |
30 | Bob | Marley |
20 | Alice | Wolf |
30 | Charlie | Byrd |
df.select { age and name }.distinct()
Input DataFrame: rowsCount = 7, columnsCount = 5
name | age | city | weight | isHappy |
---|
firstName | lastName | | | | |
---|
Alice | Cooper | 15 | London | 54 | true |
Bob | Dylan | 45 | Dubai | 87 | true |
Charlie | Daniels | 20 | Moscow | null | false |
Charlie | Chaplin | 40 | Milan | null | true |
Bob | Marley | 30 | Tokyo | 68 | true |
Alice | Wolf | 20 | null | 55 | false |
Charlie | Byrd | 30 | Moscow | 90 | true |
Step 1: DataFrame: rowsCount = 7, columnsCount = 2
age | name |
---|
| firstName | lastName |
---|
15 | Alice | Cooper |
45 | Bob | Dylan |
20 | Charlie | Daniels |
40 | Charlie | Chaplin |
30 | Bob | Marley |
20 | Alice | Wolf |
30 | Charlie | Byrd |
Output DataFrame: rowsCount = 7, columnsCount = 2
age | name |
---|
| firstName | lastName |
---|
15 | Alice | Cooper |
45 | Bob | Dylan |
20 | Charlie | Daniels |
40 | Charlie | Chaplin |
30 | Bob | Marley |
20 | Alice | Wolf |
30 | Charlie | Byrd |