{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Working with groups of rows of a data frame\n",
"\n",
"### Bogumił Kamiński"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"using DataFrames"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"using CSV"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"using Statistics"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"using FreqTables"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"using Pipe"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"This part of the tutorial assumes that you have run *3. Working with text files* part so that you have auto2.csv file in your working directory."
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(15, 200)"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ENV[\"LINES\"], ENV[\"COLUMNS\"] = 15, 200"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"
| mpg | cylinders | displacement | horsepower | weight | acceleration | year | origin | name | brand |
---|
| Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | String | String |
---|
392 rows × 10 columns
1 | 18.0 | 8.0 | 307.0 | 130.0 | 3504.0 | 12.0 | 70.0 | 1.0 | chevrolet chevelle malibu | chevrolet |
---|
2 | 15.0 | 8.0 | 350.0 | 165.0 | 3693.0 | 11.5 | 70.0 | 1.0 | buick skylark 320 | buick |
---|
3 | 18.0 | 8.0 | 318.0 | 150.0 | 3436.0 | 11.0 | 70.0 | 1.0 | plymouth satellite | plymouth |
---|
4 | 16.0 | 8.0 | 304.0 | 150.0 | 3433.0 | 12.0 | 70.0 | 1.0 | amc rebel sst | amc |
---|
5 | 17.0 | 8.0 | 302.0 | 140.0 | 3449.0 | 10.5 | 70.0 | 1.0 | ford torino | ford |
---|
6 | 15.0 | 8.0 | 429.0 | 198.0 | 4341.0 | 10.0 | 70.0 | 1.0 | ford galaxie 500 | ford |
---|
7 | 14.0 | 8.0 | 454.0 | 220.0 | 4354.0 | 9.0 | 70.0 | 1.0 | chevrolet impala | chevrolet |
---|
8 | 14.0 | 8.0 | 440.0 | 215.0 | 4312.0 | 8.5 | 70.0 | 1.0 | plymouth fury iii | plymouth |
---|
9 | 14.0 | 8.0 | 455.0 | 225.0 | 4425.0 | 10.0 | 70.0 | 1.0 | pontiac catalina | pontiac |
---|
10 | 15.0 | 8.0 | 390.0 | 190.0 | 3850.0 | 8.5 | 70.0 | 1.0 | amc ambassador dpl | amc |
---|
11 | 15.0 | 8.0 | 383.0 | 170.0 | 3563.0 | 10.0 | 70.0 | 1.0 | dodge challenger se | dodge |
---|
12 | 14.0 | 8.0 | 340.0 | 160.0 | 3609.0 | 8.0 | 70.0 | 1.0 | plymouth 'cuda 340 | plymouth |
---|
13 | 15.0 | 8.0 | 400.0 | 150.0 | 3761.0 | 9.5 | 70.0 | 1.0 | chevrolet monte carlo | chevrolet |
---|
14 | 14.0 | 8.0 | 455.0 | 225.0 | 3086.0 | 10.0 | 70.0 | 1.0 | buick estate wagon (sw) | buick |
---|
15 | 24.0 | 4.0 | 113.0 | 95.0 | 2372.0 | 15.0 | 70.0 | 3.0 | toyota corona mark ii | toyota |
---|
⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ |
---|
"
],
"text/latex": [
"\\begin{tabular}{r|cccccccccc}\n",
"\t& mpg & cylinders & displacement & horsepower & weight & acceleration & year & origin & name & brand\\\\\n",
"\t\\hline\n",
"\t& Float64 & Float64 & Float64 & Float64 & Float64 & Float64 & Float64 & Float64 & String & String\\\\\n",
"\t\\hline\n",
"\t1 & 18.0 & 8.0 & 307.0 & 130.0 & 3504.0 & 12.0 & 70.0 & 1.0 & chevrolet chevelle malibu & chevrolet \\\\\n",
"\t2 & 15.0 & 8.0 & 350.0 & 165.0 & 3693.0 & 11.5 & 70.0 & 1.0 & buick skylark 320 & buick \\\\\n",
"\t3 & 18.0 & 8.0 & 318.0 & 150.0 & 3436.0 & 11.0 & 70.0 & 1.0 & plymouth satellite & plymouth \\\\\n",
"\t4 & 16.0 & 8.0 & 304.0 & 150.0 & 3433.0 & 12.0 & 70.0 & 1.0 & amc rebel sst & amc \\\\\n",
"\t5 & 17.0 & 8.0 & 302.0 & 140.0 & 3449.0 & 10.5 & 70.0 & 1.0 & ford torino & ford \\\\\n",
"\t6 & 15.0 & 8.0 & 429.0 & 198.0 & 4341.0 & 10.0 & 70.0 & 1.0 & ford galaxie 500 & ford \\\\\n",
"\t7 & 14.0 & 8.0 & 454.0 & 220.0 & 4354.0 & 9.0 & 70.0 & 1.0 & chevrolet impala & chevrolet \\\\\n",
"\t8 & 14.0 & 8.0 & 440.0 & 215.0 & 4312.0 & 8.5 & 70.0 & 1.0 & plymouth fury iii & plymouth \\\\\n",
"\t9 & 14.0 & 8.0 & 455.0 & 225.0 & 4425.0 & 10.0 & 70.0 & 1.0 & pontiac catalina & pontiac \\\\\n",
"\t10 & 15.0 & 8.0 & 390.0 & 190.0 & 3850.0 & 8.5 & 70.0 & 1.0 & amc ambassador dpl & amc \\\\\n",
"\t11 & 15.0 & 8.0 & 383.0 & 170.0 & 3563.0 & 10.0 & 70.0 & 1.0 & dodge challenger se & dodge \\\\\n",
"\t12 & 14.0 & 8.0 & 340.0 & 160.0 & 3609.0 & 8.0 & 70.0 & 1.0 & plymouth 'cuda 340 & plymouth \\\\\n",
"\t13 & 15.0 & 8.0 & 400.0 & 150.0 & 3761.0 & 9.5 & 70.0 & 1.0 & chevrolet monte carlo & chevrolet \\\\\n",
"\t14 & 14.0 & 8.0 & 455.0 & 225.0 & 3086.0 & 10.0 & 70.0 & 1.0 & buick estate wagon (sw) & buick \\\\\n",
"\t15 & 24.0 & 4.0 & 113.0 & 95.0 & 2372.0 & 15.0 & 70.0 & 3.0 & toyota corona mark ii & toyota \\\\\n",
"\t$\\dots$ & $\\dots$ & $\\dots$ & $\\dots$ & $\\dots$ & $\\dots$ & $\\dots$ & $\\dots$ & $\\dots$ & $\\dots$ & $\\dots$ \\\\\n",
"\\end{tabular}\n"
],
"text/plain": [
"392×10 DataFrame\n",
"│ Row │ mpg │ cylinders │ displacement │ horsepower │ weight │ acceleration │ year │ origin │ name │ brand │\n",
"│ │ \u001b[90mFloat64\u001b[39m │ \u001b[90mFloat64\u001b[39m │ \u001b[90mFloat64\u001b[39m │ \u001b[90mFloat64\u001b[39m │ \u001b[90mFloat64\u001b[39m │ \u001b[90mFloat64\u001b[39m │ \u001b[90mFloat64\u001b[39m │ \u001b[90mFloat64\u001b[39m │ \u001b[90mString\u001b[39m │ \u001b[90mString\u001b[39m │\n",
"├─────┼─────────┼───────────┼──────────────┼────────────┼─────────┼──────────────┼─────────┼─────────┼───────────────────────────┼───────────┤\n",
"│ 1 │ 18.0 │ 8.0 │ 307.0 │ 130.0 │ 3504.0 │ 12.0 │ 70.0 │ 1.0 │ chevrolet chevelle malibu │ chevrolet │\n",
"│ 2 │ 15.0 │ 8.0 │ 350.0 │ 165.0 │ 3693.0 │ 11.5 │ 70.0 │ 1.0 │ buick skylark 320 │ buick │\n",
"│ 3 │ 18.0 │ 8.0 │ 318.0 │ 150.0 │ 3436.0 │ 11.0 │ 70.0 │ 1.0 │ plymouth satellite │ plymouth │\n",
"⋮\n",
"│ 389 │ 44.0 │ 4.0 │ 97.0 │ 52.0 │ 2130.0 │ 24.6 │ 82.0 │ 2.0 │ vw pickup │ vw │\n",
"│ 390 │ 32.0 │ 4.0 │ 135.0 │ 84.0 │ 2295.0 │ 11.6 │ 82.0 │ 1.0 │ dodge rampage │ dodge │\n",
"│ 391 │ 28.0 │ 4.0 │ 120.0 │ 79.0 │ 2625.0 │ 18.6 │ 82.0 │ 1.0 │ ford ranger │ ford │\n",
"│ 392 │ 31.0 │ 4.0 │ 119.0 │ 82.0 │ 2720.0 │ 19.4 │ 82.0 │ 1.0 │ chevy s-10 │ chevy │"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df = CSV.File(\"auto2.csv\") |> DataFrame"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"We want to group our data frame by `:brand` column:"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"GroupedDataFrame with 37 groups based on key: brand
First Group (43 rows): brand = \"chevrolet\"
| mpg | cylinders | displacement | horsepower | weight | acceleration | year | origin | name | brand |
---|
| Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | String | String |
---|
1 | 18.0 | 8.0 | 307.0 | 130.0 | 3504.0 | 12.0 | 70.0 | 1.0 | chevrolet chevelle malibu | chevrolet |
---|
2 | 14.0 | 8.0 | 454.0 | 220.0 | 4354.0 | 9.0 | 70.0 | 1.0 | chevrolet impala | chevrolet |
---|
3 | 15.0 | 8.0 | 400.0 | 150.0 | 3761.0 | 9.5 | 70.0 | 1.0 | chevrolet monte carlo | chevrolet |
---|
4 | 28.0 | 4.0 | 140.0 | 90.0 | 2264.0 | 15.5 | 71.0 | 1.0 | chevrolet vega 2300 | chevrolet |
---|
5 | 17.0 | 6.0 | 250.0 | 100.0 | 3329.0 | 15.5 | 71.0 | 1.0 | chevrolet chevelle malibu | chevrolet |
---|
6 | 14.0 | 8.0 | 350.0 | 165.0 | 4209.0 | 12.0 | 71.0 | 1.0 | chevrolet impala | chevrolet |
---|
7 | 22.0 | 4.0 | 140.0 | 72.0 | 2408.0 | 19.0 | 71.0 | 1.0 | chevrolet vega (sw) | chevrolet |
---|
8 | 20.0 | 4.0 | 140.0 | 90.0 | 2408.0 | 19.5 | 72.0 | 1.0 | chevrolet vega | chevrolet |
---|
9 | 13.0 | 8.0 | 350.0 | 165.0 | 4274.0 | 12.0 | 72.0 | 1.0 | chevrolet impala | chevrolet |
---|
10 | 13.0 | 8.0 | 307.0 | 130.0 | 4098.0 | 14.0 | 72.0 | 1.0 | chevrolet chevelle concours (sw) | chevrolet |
---|
11 | 13.0 | 8.0 | 350.0 | 145.0 | 3988.0 | 13.0 | 73.0 | 1.0 | chevrolet malibu | chevrolet |
---|
12 | 13.0 | 8.0 | 400.0 | 150.0 | 4464.0 | 12.0 | 73.0 | 1.0 | chevrolet caprice classic | chevrolet |
---|
13 | 16.0 | 6.0 | 250.0 | 100.0 | 3278.0 | 18.0 | 73.0 | 1.0 | chevrolet nova custom | chevrolet |
---|
14 | 11.0 | 8.0 | 400.0 | 150.0 | 4997.0 | 14.0 | 73.0 | 1.0 | chevrolet impala | chevrolet |
---|
15 | 21.0 | 4.0 | 140.0 | 72.0 | 2401.0 | 19.5 | 73.0 | 1.0 | chevrolet vega | chevrolet |
---|
⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ |
---|
⋮
Last Group (1 row): brand = \"nissan\"
| mpg | cylinders | displacement | horsepower | weight | acceleration | year | origin | name | brand |
---|
| Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | String | String |
---|
1 | 36.0 | 4.0 | 120.0 | 88.0 | 2160.0 | 14.5 | 82.0 | 3.0 | nissan stanza xe | nissan |
---|
"
],
"text/latex": [
"GroupedDataFrame with 37 groups based on key: brand\n",
"\n",
"First Group (43 rows): brand = \"chevrolet\"\n",
"\n",
"\\begin{tabular}{r|cccccccccc}\n",
"\t& mpg & cylinders & displacement & horsepower & weight & acceleration & year & origin & name & brand\\\\\n",
"\t\\hline\n",
"\t& Float64 & Float64 & Float64 & Float64 & Float64 & Float64 & Float64 & Float64 & String & String\\\\\n",
"\t\\hline\n",
"\t1 & 18.0 & 8.0 & 307.0 & 130.0 & 3504.0 & 12.0 & 70.0 & 1.0 & chevrolet chevelle malibu & chevrolet \\\\\n",
"\t2 & 14.0 & 8.0 & 454.0 & 220.0 & 4354.0 & 9.0 & 70.0 & 1.0 & chevrolet impala & chevrolet \\\\\n",
"\t3 & 15.0 & 8.0 & 400.0 & 150.0 & 3761.0 & 9.5 & 70.0 & 1.0 & chevrolet monte carlo & chevrolet \\\\\n",
"\t4 & 28.0 & 4.0 & 140.0 & 90.0 & 2264.0 & 15.5 & 71.0 & 1.0 & chevrolet vega 2300 & chevrolet \\\\\n",
"\t5 & 17.0 & 6.0 & 250.0 & 100.0 & 3329.0 & 15.5 & 71.0 & 1.0 & chevrolet chevelle malibu & chevrolet \\\\\n",
"\t6 & 14.0 & 8.0 & 350.0 & 165.0 & 4209.0 & 12.0 & 71.0 & 1.0 & chevrolet impala & chevrolet \\\\\n",
"\t7 & 22.0 & 4.0 & 140.0 & 72.0 & 2408.0 & 19.0 & 71.0 & 1.0 & chevrolet vega (sw) & chevrolet \\\\\n",
"\t8 & 20.0 & 4.0 & 140.0 & 90.0 & 2408.0 & 19.5 & 72.0 & 1.0 & chevrolet vega & chevrolet \\\\\n",
"\t9 & 13.0 & 8.0 & 350.0 & 165.0 & 4274.0 & 12.0 & 72.0 & 1.0 & chevrolet impala & chevrolet \\\\\n",
"\t10 & 13.0 & 8.0 & 307.0 & 130.0 & 4098.0 & 14.0 & 72.0 & 1.0 & chevrolet chevelle concours (sw) & chevrolet \\\\\n",
"\t11 & 13.0 & 8.0 & 350.0 & 145.0 & 3988.0 & 13.0 & 73.0 & 1.0 & chevrolet malibu & chevrolet \\\\\n",
"\t12 & 13.0 & 8.0 & 400.0 & 150.0 & 4464.0 & 12.0 & 73.0 & 1.0 & chevrolet caprice classic & chevrolet \\\\\n",
"\t13 & 16.0 & 6.0 & 250.0 & 100.0 & 3278.0 & 18.0 & 73.0 & 1.0 & chevrolet nova custom & chevrolet \\\\\n",
"\t14 & 11.0 & 8.0 & 400.0 & 150.0 & 4997.0 & 14.0 & 73.0 & 1.0 & chevrolet impala & chevrolet \\\\\n",
"\t15 & 21.0 & 4.0 & 140.0 & 72.0 & 2401.0 & 19.5 & 73.0 & 1.0 & chevrolet vega & chevrolet \\\\\n",
"\t$\\dots$ & $\\dots$ & $\\dots$ & $\\dots$ & $\\dots$ & $\\dots$ & $\\dots$ & $\\dots$ & $\\dots$ & $\\dots$ & $\\dots$ \\\\\n",
"\\end{tabular}\n",
"\n",
"$\\dots$\n",
"\n",
"Last Group (1 row): brand = \"nissan\"\n",
"\n",
"\\begin{tabular}{r|cccccccccc}\n",
"\t& mpg & cylinders & displacement & horsepower & weight & acceleration & year & origin & name & brand\\\\\n",
"\t\\hline\n",
"\t& Float64 & Float64 & Float64 & Float64 & Float64 & Float64 & Float64 & Float64 & String & String\\\\\n",
"\t\\hline\n",
"\t1 & 36.0 & 4.0 & 120.0 & 88.0 & 2160.0 & 14.5 & 82.0 & 3.0 & nissan stanza xe & nissan \\\\\n",
"\\end{tabular}\n"
],
"text/plain": [
"GroupedDataFrame with 37 groups based on key: brand\n",
"First Group (43 rows): brand = \"chevrolet\"\n",
"│ Row │ mpg │ cylinders │ displacement │ horsepower │ weight │ acceleration │ year │ origin │ name │ brand │\n",
"│ │ \u001b[90mFloat64\u001b[39m │ \u001b[90mFloat64\u001b[39m │ \u001b[90mFloat64\u001b[39m │ \u001b[90mFloat64\u001b[39m │ \u001b[90mFloat64\u001b[39m │ \u001b[90mFloat64\u001b[39m │ \u001b[90mFloat64\u001b[39m │ \u001b[90mFloat64\u001b[39m │ \u001b[90mString\u001b[39m │ \u001b[90mString\u001b[39m │\n",
"├─────┼─────────┼───────────┼──────────────┼────────────┼─────────┼──────────────┼─────────┼─────────┼───────────────────────────┼───────────┤\n",
"│ 1 │ 18.0 │ 8.0 │ 307.0 │ 130.0 │ 3504.0 │ 12.0 │ 70.0 │ 1.0 │ chevrolet chevelle malibu │ chevrolet │\n",
"│ 2 │ 14.0 │ 8.0 │ 454.0 │ 220.0 │ 4354.0 │ 9.0 │ 70.0 │ 1.0 │ chevrolet impala │ chevrolet │\n",
"│ 3 │ 15.0 │ 8.0 │ 400.0 │ 150.0 │ 3761.0 │ 9.5 │ 70.0 │ 1.0 │ chevrolet monte carlo │ chevrolet │\n",
"⋮\n",
"│ 40 │ 28.0 │ 4.0 │ 112.0 │ 88.0 │ 2605.0 │ 19.6 │ 82.0 │ 1.0 │ chevrolet cavalier │ chevrolet │\n",
"│ 41 │ 27.0 │ 4.0 │ 112.0 │ 88.0 │ 2640.0 │ 18.6 │ 82.0 │ 1.0 │ chevrolet cavalier wagon │ chevrolet │\n",
"│ 42 │ 34.0 │ 4.0 │ 112.0 │ 88.0 │ 2395.0 │ 18.0 │ 82.0 │ 1.0 │ chevrolet cavalier 2-door │ chevrolet │\n",
"│ 43 │ 27.0 │ 4.0 │ 151.0 │ 90.0 │ 2950.0 │ 17.3 │ 82.0 │ 1.0 │ chevrolet camaro │ chevrolet │\n",
"⋮\n",
"Last Group (1 row): brand = \"nissan\"\n",
"│ Row │ mpg │ cylinders │ displacement │ horsepower │ weight │ acceleration │ year │ origin │ name │ brand │\n",
"│ │ \u001b[90mFloat64\u001b[39m │ \u001b[90mFloat64\u001b[39m │ \u001b[90mFloat64\u001b[39m │ \u001b[90mFloat64\u001b[39m │ \u001b[90mFloat64\u001b[39m │ \u001b[90mFloat64\u001b[39m │ \u001b[90mFloat64\u001b[39m │ \u001b[90mFloat64\u001b[39m │ \u001b[90mString\u001b[39m │ \u001b[90mString\u001b[39m │\n",
"├─────┼─────────┼───────────┼──────────────┼────────────┼─────────┼──────────────┼─────────┼─────────┼──────────────────┼────────┤\n",
"│ 1 │ 36.0 │ 4.0 │ 120.0 │ 88.0 │ 2160.0 │ 14.5 │ 82.0 │ 3.0 │ nissan stanza xe │ nissan │"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"gdf = groupby(df, :brand)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Note that it is easy (and fast) to find an appropriate group by passing a `Tuple` of values of grouping columns (in our case it is just one column)"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
" | mpg | cylinders | displacement | horsepower | weight | acceleration | year | origin | name | brand |
---|
| Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | String | String |
---|
48 rows × 10 columns
1 | 17.0 | 8.0 | 302.0 | 140.0 | 3449.0 | 10.5 | 70.0 | 1.0 | ford torino | ford |
---|
2 | 15.0 | 8.0 | 429.0 | 198.0 | 4341.0 | 10.0 | 70.0 | 1.0 | ford galaxie 500 | ford |
---|
3 | 21.0 | 6.0 | 200.0 | 85.0 | 2587.0 | 16.0 | 70.0 | 1.0 | ford maverick | ford |
---|
4 | 10.0 | 8.0 | 360.0 | 215.0 | 4615.0 | 14.0 | 70.0 | 1.0 | ford f250 | ford |
---|
5 | 19.0 | 6.0 | 250.0 | 88.0 | 3302.0 | 15.5 | 71.0 | 1.0 | ford torino 500 | ford |
---|
6 | 14.0 | 8.0 | 351.0 | 153.0 | 4154.0 | 13.5 | 71.0 | 1.0 | ford galaxie 500 | ford |
---|
7 | 13.0 | 8.0 | 400.0 | 170.0 | 4746.0 | 12.0 | 71.0 | 1.0 | ford country squire (sw) | ford |
---|
8 | 18.0 | 6.0 | 250.0 | 88.0 | 3139.0 | 14.5 | 71.0 | 1.0 | ford mustang | ford |
---|
9 | 21.0 | 4.0 | 122.0 | 86.0 | 2226.0 | 16.5 | 72.0 | 1.0 | ford pinto runabout | ford |
---|
10 | 14.0 | 8.0 | 351.0 | 153.0 | 4129.0 | 13.0 | 72.0 | 1.0 | ford galaxie 500 | ford |
---|
11 | 13.0 | 8.0 | 302.0 | 140.0 | 4294.0 | 16.0 | 72.0 | 1.0 | ford gran torino (sw) | ford |
---|
12 | 22.0 | 4.0 | 122.0 | 86.0 | 2395.0 | 16.0 | 72.0 | 1.0 | ford pinto (sw) | ford |
---|
13 | 14.0 | 8.0 | 302.0 | 137.0 | 4042.0 | 14.5 | 73.0 | 1.0 | ford gran torino | ford |
---|
14 | 13.0 | 8.0 | 351.0 | 158.0 | 4363.0 | 13.0 | 73.0 | 1.0 | ford ltd | ford |
---|
15 | 18.0 | 6.0 | 250.0 | 88.0 | 3021.0 | 16.5 | 73.0 | 1.0 | ford maverick | ford |
---|
⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ |
---|
"
],
"text/latex": [
"\\begin{tabular}{r|cccccccccc}\n",
"\t& mpg & cylinders & displacement & horsepower & weight & acceleration & year & origin & name & brand\\\\\n",
"\t\\hline\n",
"\t& Float64 & Float64 & Float64 & Float64 & Float64 & Float64 & Float64 & Float64 & String & String\\\\\n",
"\t\\hline\n",
"\t1 & 17.0 & 8.0 & 302.0 & 140.0 & 3449.0 & 10.5 & 70.0 & 1.0 & ford torino & ford \\\\\n",
"\t2 & 15.0 & 8.0 & 429.0 & 198.0 & 4341.0 & 10.0 & 70.0 & 1.0 & ford galaxie 500 & ford \\\\\n",
"\t3 & 21.0 & 6.0 & 200.0 & 85.0 & 2587.0 & 16.0 & 70.0 & 1.0 & ford maverick & ford \\\\\n",
"\t4 & 10.0 & 8.0 & 360.0 & 215.0 & 4615.0 & 14.0 & 70.0 & 1.0 & ford f250 & ford \\\\\n",
"\t5 & 19.0 & 6.0 & 250.0 & 88.0 & 3302.0 & 15.5 & 71.0 & 1.0 & ford torino 500 & ford \\\\\n",
"\t6 & 14.0 & 8.0 & 351.0 & 153.0 & 4154.0 & 13.5 & 71.0 & 1.0 & ford galaxie 500 & ford \\\\\n",
"\t7 & 13.0 & 8.0 & 400.0 & 170.0 & 4746.0 & 12.0 & 71.0 & 1.0 & ford country squire (sw) & ford \\\\\n",
"\t8 & 18.0 & 6.0 & 250.0 & 88.0 & 3139.0 & 14.5 & 71.0 & 1.0 & ford mustang & ford \\\\\n",
"\t9 & 21.0 & 4.0 & 122.0 & 86.0 & 2226.0 & 16.5 & 72.0 & 1.0 & ford pinto runabout & ford \\\\\n",
"\t10 & 14.0 & 8.0 & 351.0 & 153.0 & 4129.0 & 13.0 & 72.0 & 1.0 & ford galaxie 500 & ford \\\\\n",
"\t11 & 13.0 & 8.0 & 302.0 & 140.0 & 4294.0 & 16.0 & 72.0 & 1.0 & ford gran torino (sw) & ford \\\\\n",
"\t12 & 22.0 & 4.0 & 122.0 & 86.0 & 2395.0 & 16.0 & 72.0 & 1.0 & ford pinto (sw) & ford \\\\\n",
"\t13 & 14.0 & 8.0 & 302.0 & 137.0 & 4042.0 & 14.5 & 73.0 & 1.0 & ford gran torino & ford \\\\\n",
"\t14 & 13.0 & 8.0 & 351.0 & 158.0 & 4363.0 & 13.0 & 73.0 & 1.0 & ford ltd & ford \\\\\n",
"\t15 & 18.0 & 6.0 & 250.0 & 88.0 & 3021.0 & 16.5 & 73.0 & 1.0 & ford maverick & ford \\\\\n",
"\t$\\dots$ & $\\dots$ & $\\dots$ & $\\dots$ & $\\dots$ & $\\dots$ & $\\dots$ & $\\dots$ & $\\dots$ & $\\dots$ & $\\dots$ \\\\\n",
"\\end{tabular}\n"
],
"text/plain": [
"48×10 SubDataFrame\n",
"│ Row │ mpg │ cylinders │ displacement │ horsepower │ weight │ acceleration │ year │ origin │ name │ brand │\n",
"│ │ \u001b[90mFloat64\u001b[39m │ \u001b[90mFloat64\u001b[39m │ \u001b[90mFloat64\u001b[39m │ \u001b[90mFloat64\u001b[39m │ \u001b[90mFloat64\u001b[39m │ \u001b[90mFloat64\u001b[39m │ \u001b[90mFloat64\u001b[39m │ \u001b[90mFloat64\u001b[39m │ \u001b[90mString\u001b[39m │ \u001b[90mString\u001b[39m │\n",
"├─────┼─────────┼───────────┼──────────────┼────────────┼─────────┼──────────────┼─────────┼─────────┼──────────────────────┼────────┤\n",
"│ 1 │ 17.0 │ 8.0 │ 302.0 │ 140.0 │ 3449.0 │ 10.5 │ 70.0 │ 1.0 │ ford torino │ ford │\n",
"│ 2 │ 15.0 │ 8.0 │ 429.0 │ 198.0 │ 4341.0 │ 10.0 │ 70.0 │ 1.0 │ ford galaxie 500 │ ford │\n",
"│ 3 │ 21.0 │ 6.0 │ 200.0 │ 85.0 │ 2587.0 │ 16.0 │ 70.0 │ 1.0 │ ford maverick │ ford │\n",
"⋮\n",
"│ 45 │ 24.0 │ 4.0 │ 140.0 │ 92.0 │ 2865.0 │ 16.4 │ 82.0 │ 1.0 │ ford fairmont futura │ ford │\n",
"│ 46 │ 22.0 │ 6.0 │ 232.0 │ 112.0 │ 2835.0 │ 14.7 │ 82.0 │ 1.0 │ ford granada l │ ford │\n",
"│ 47 │ 27.0 │ 4.0 │ 140.0 │ 86.0 │ 2790.0 │ 15.6 │ 82.0 │ 1.0 │ ford mustang gl │ ford │\n",
"│ 48 │ 28.0 │ 4.0 │ 120.0 │ 79.0 │ 2625.0 │ 18.6 │ 82.0 │ 1.0 │ ford ranger │ ford │"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"gdf[(\"ford\",)]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Using the `combine` function we can easily calculate some aggregates by group:"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
" | brand | mpg_mean |
---|
| String | Float64 |
---|
37 rows × 2 columns
1 | chevrolet | 20.4721 |
---|
2 | buick | 19.1824 |
---|
3 | plymouth | 21.7032 |
---|
4 | amc | 18.0704 |
---|
5 | ford | 19.475 |
---|
6 | pontiac | 20.0125 |
---|
7 | dodge | 22.0607 |
---|
8 | toyota | 28.372 |
---|
9 | datsun | 31.113 |
---|
10 | volkswagen | 29.1067 |
---|
11 | peugeot | 23.6875 |
---|
12 | audi | 26.7143 |
---|
13 | saab | 23.9 |
---|
14 | bmw | 23.75 |
---|
15 | chevy | 18.0 |
---|
⋮ | ⋮ | ⋮ |
---|
"
],
"text/latex": [
"\\begin{tabular}{r|cc}\n",
"\t& brand & mpg\\_mean\\\\\n",
"\t\\hline\n",
"\t& String & Float64\\\\\n",
"\t\\hline\n",
"\t1 & chevrolet & 20.4721 \\\\\n",
"\t2 & buick & 19.1824 \\\\\n",
"\t3 & plymouth & 21.7032 \\\\\n",
"\t4 & amc & 18.0704 \\\\\n",
"\t5 & ford & 19.475 \\\\\n",
"\t6 & pontiac & 20.0125 \\\\\n",
"\t7 & dodge & 22.0607 \\\\\n",
"\t8 & toyota & 28.372 \\\\\n",
"\t9 & datsun & 31.113 \\\\\n",
"\t10 & volkswagen & 29.1067 \\\\\n",
"\t11 & peugeot & 23.6875 \\\\\n",
"\t12 & audi & 26.7143 \\\\\n",
"\t13 & saab & 23.9 \\\\\n",
"\t14 & bmw & 23.75 \\\\\n",
"\t15 & chevy & 18.0 \\\\\n",
"\t$\\dots$ & $\\dots$ & $\\dots$ \\\\\n",
"\\end{tabular}\n"
],
"text/plain": [
"37×2 DataFrame\n",
"│ Row │ brand │ mpg_mean │\n",
"│ │ \u001b[90mString\u001b[39m │ \u001b[90mFloat64\u001b[39m │\n",
"├─────┼───────────┼──────────┤\n",
"│ 1 │ chevrolet │ 20.4721 │\n",
"│ 2 │ buick │ 19.1824 │\n",
"│ 3 │ plymouth │ 21.7032 │\n",
"⋮\n",
"│ 34 │ mercedes │ 25.4 │\n",
"│ 35 │ vokswagen │ 29.8 │\n",
"│ 36 │ triumph │ 35.0 │\n",
"│ 37 │ nissan │ 36.0 │"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"brand_mpg = combine(gdf, :mpg => mean)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"The `:mpg => mean` syntax means that we pass a column `:mpg` to be processed by the `mean` function for each group.\n",
"\n",
"We could have added a target column name to the generated column in the following way:"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
" | brand | mean_mpg |
---|
| String | Float64 |
---|
37 rows × 2 columns
1 | chevrolet | 20.4721 |
---|
2 | buick | 19.1824 |
---|
3 | plymouth | 21.7032 |
---|
4 | amc | 18.0704 |
---|
5 | ford | 19.475 |
---|
6 | pontiac | 20.0125 |
---|
7 | dodge | 22.0607 |
---|
8 | toyota | 28.372 |
---|
9 | datsun | 31.113 |
---|
10 | volkswagen | 29.1067 |
---|
11 | peugeot | 23.6875 |
---|
12 | audi | 26.7143 |
---|
13 | saab | 23.9 |
---|
14 | bmw | 23.75 |
---|
15 | chevy | 18.0 |
---|
⋮ | ⋮ | ⋮ |
---|
"
],
"text/latex": [
"\\begin{tabular}{r|cc}\n",
"\t& brand & mean\\_mpg\\\\\n",
"\t\\hline\n",
"\t& String & Float64\\\\\n",
"\t\\hline\n",
"\t1 & chevrolet & 20.4721 \\\\\n",
"\t2 & buick & 19.1824 \\\\\n",
"\t3 & plymouth & 21.7032 \\\\\n",
"\t4 & amc & 18.0704 \\\\\n",
"\t5 & ford & 19.475 \\\\\n",
"\t6 & pontiac & 20.0125 \\\\\n",
"\t7 & dodge & 22.0607 \\\\\n",
"\t8 & toyota & 28.372 \\\\\n",
"\t9 & datsun & 31.113 \\\\\n",
"\t10 & volkswagen & 29.1067 \\\\\n",
"\t11 & peugeot & 23.6875 \\\\\n",
"\t12 & audi & 26.7143 \\\\\n",
"\t13 & saab & 23.9 \\\\\n",
"\t14 & bmw & 23.75 \\\\\n",
"\t15 & chevy & 18.0 \\\\\n",
"\t$\\dots$ & $\\dots$ & $\\dots$ \\\\\n",
"\\end{tabular}\n"
],
"text/plain": [
"37×2 DataFrame\n",
"│ Row │ brand │ mean_mpg │\n",
"│ │ \u001b[90mString\u001b[39m │ \u001b[90mFloat64\u001b[39m │\n",
"├─────┼───────────┼──────────┤\n",
"│ 1 │ chevrolet │ 20.4721 │\n",
"│ 2 │ buick │ 19.1824 │\n",
"│ 3 │ plymouth │ 21.7032 │\n",
"⋮\n",
"│ 34 │ mercedes │ 25.4 │\n",
"│ 35 │ vokswagen │ 29.8 │\n",
"│ 36 │ triumph │ 35.0 │\n",
"│ 37 │ nissan │ 36.0 │"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"combine(gdf, :mpg => mean => :mean_mpg)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Now let us see how one can sort a data frame. In order to see alll rows of a data frame we temporarily change the number of rows printed."
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"50"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ENV[\"LINES\"] = 50"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/html": [
" | brand | mpg_mean |
---|
| String | Float64 |
---|
37 rows × 2 columns
1 | vw | 39.0167 |
---|
2 | nissan | 36.0 |
---|
3 | triumph | 35.0 |
---|
4 | honda | 33.7615 |
---|
5 | datsun | 31.113 |
---|
6 | mazda | 30.86 |
---|
7 | subaru | 30.525 |
---|
8 | vokswagen | 29.8 |
---|
9 | renault | 29.6667 |
---|
10 | volkswagen | 29.1067 |
---|
11 | fiat | 28.9125 |
---|
12 | toyota | 28.372 |
---|
13 | audi | 26.7143 |
---|
14 | maxda | 26.05 |
---|
15 | opel | 25.75 |
---|
16 | mercedes | 25.4 |
---|
17 | capri | 25.0 |
---|
18 | saab | 23.9 |
---|
19 | bmw | 23.75 |
---|
20 | peugeot | 23.6875 |
---|
21 | mercedes-benz | 23.25 |
---|
22 | toyouta | 23.0 |
---|
23 | dodge | 22.0607 |
---|
24 | plymouth | 21.7032 |
---|
25 | volvo | 21.1167 |
---|
26 | oldsmobile | 21.1 |
---|
27 | chevrolet | 20.4721 |
---|
28 | pontiac | 20.0125 |
---|
29 | cadillac | 19.75 |
---|
30 | ford | 19.475 |
---|
31 | buick | 19.1824 |
---|
32 | mercury | 19.1182 |
---|
33 | amc | 18.0704 |
---|
34 | chevy | 18.0 |
---|
35 | chrysler | 17.2667 |
---|
36 | chevroelt | 16.0 |
---|
37 | hi | 9.0 |
---|
"
],
"text/latex": [
"\\begin{tabular}{r|cc}\n",
"\t& brand & mpg\\_mean\\\\\n",
"\t\\hline\n",
"\t& String & Float64\\\\\n",
"\t\\hline\n",
"\t1 & vw & 39.0167 \\\\\n",
"\t2 & nissan & 36.0 \\\\\n",
"\t3 & triumph & 35.0 \\\\\n",
"\t4 & honda & 33.7615 \\\\\n",
"\t5 & datsun & 31.113 \\\\\n",
"\t6 & mazda & 30.86 \\\\\n",
"\t7 & subaru & 30.525 \\\\\n",
"\t8 & vokswagen & 29.8 \\\\\n",
"\t9 & renault & 29.6667 \\\\\n",
"\t10 & volkswagen & 29.1067 \\\\\n",
"\t11 & fiat & 28.9125 \\\\\n",
"\t12 & toyota & 28.372 \\\\\n",
"\t13 & audi & 26.7143 \\\\\n",
"\t14 & maxda & 26.05 \\\\\n",
"\t15 & opel & 25.75 \\\\\n",
"\t16 & mercedes & 25.4 \\\\\n",
"\t17 & capri & 25.0 \\\\\n",
"\t18 & saab & 23.9 \\\\\n",
"\t19 & bmw & 23.75 \\\\\n",
"\t20 & peugeot & 23.6875 \\\\\n",
"\t21 & mercedes-benz & 23.25 \\\\\n",
"\t22 & toyouta & 23.0 \\\\\n",
"\t23 & dodge & 22.0607 \\\\\n",
"\t24 & plymouth & 21.7032 \\\\\n",
"\t25 & volvo & 21.1167 \\\\\n",
"\t26 & oldsmobile & 21.1 \\\\\n",
"\t27 & chevrolet & 20.4721 \\\\\n",
"\t28 & pontiac & 20.0125 \\\\\n",
"\t29 & cadillac & 19.75 \\\\\n",
"\t30 & ford & 19.475 \\\\\n",
"\t31 & buick & 19.1824 \\\\\n",
"\t32 & mercury & 19.1182 \\\\\n",
"\t33 & amc & 18.0704 \\\\\n",
"\t34 & chevy & 18.0 \\\\\n",
"\t35 & chrysler & 17.2667 \\\\\n",
"\t36 & chevroelt & 16.0 \\\\\n",
"\t37 & hi & 9.0 \\\\\n",
"\\end{tabular}\n"
],
"text/plain": [
"37×2 DataFrame\n",
"│ Row │ brand │ mpg_mean │\n",
"│ │ \u001b[90mString\u001b[39m │ \u001b[90mFloat64\u001b[39m │\n",
"├─────┼───────────────┼──────────┤\n",
"│ 1 │ vw │ 39.0167 │\n",
"│ 2 │ nissan │ 36.0 │\n",
"│ 3 │ triumph │ 35.0 │\n",
"│ 4 │ honda │ 33.7615 │\n",
"│ 5 │ datsun │ 31.113 │\n",
"│ 6 │ mazda │ 30.86 │\n",
"│ 7 │ subaru │ 30.525 │\n",
"│ 8 │ vokswagen │ 29.8 │\n",
"│ 9 │ renault │ 29.6667 │\n",
"│ 10 │ volkswagen │ 29.1067 │\n",
"│ 11 │ fiat │ 28.9125 │\n",
"│ 12 │ toyota │ 28.372 │\n",
"│ 13 │ audi │ 26.7143 │\n",
"│ 14 │ maxda │ 26.05 │\n",
"│ 15 │ opel │ 25.75 │\n",
"│ 16 │ mercedes │ 25.4 │\n",
"│ 17 │ capri │ 25.0 │\n",
"│ 18 │ saab │ 23.9 │\n",
"│ 19 │ bmw │ 23.75 │\n",
"│ 20 │ peugeot │ 23.6875 │\n",
"│ 21 │ mercedes-benz │ 23.25 │\n",
"│ 22 │ toyouta │ 23.0 │\n",
"│ 23 │ dodge │ 22.0607 │\n",
"│ 24 │ plymouth │ 21.7032 │\n",
"│ 25 │ volvo │ 21.1167 │\n",
"│ 26 │ oldsmobile │ 21.1 │\n",
"│ 27 │ chevrolet │ 20.4721 │\n",
"│ 28 │ pontiac │ 20.0125 │\n",
"│ 29 │ cadillac │ 19.75 │\n",
"│ 30 │ ford │ 19.475 │\n",
"│ 31 │ buick │ 19.1824 │\n",
"│ 32 │ mercury │ 19.1182 │\n",
"│ 33 │ amc │ 18.0704 │\n",
"│ 34 │ chevy │ 18.0 │\n",
"│ 35 │ chrysler │ 17.2667 │\n",
"│ 36 │ chevroelt │ 16.0 │\n",
"│ 37 │ hi │ 9.0 │"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"sort!(brand_mpg, :mpg_mean, rev=true) # sort! performs an in-place operation"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"A typical data cleaning task is to check its consistency. In this case it would be making sure that each brand has a unique origin. We will do it in several ways."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"First we use FreqTables.jl:"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"37×3 Named Array{Int64,2}\n",
"brand ╲ origin │ 1.0 2.0 3.0\n",
"───────────────┼──────────────\n",
"amc │ 27 0 0\n",
"audi │ 0 7 0\n",
"bmw │ 0 2 0\n",
"buick │ 17 0 0\n",
"cadillac │ 2 0 0\n",
"capri │ 1 0 0\n",
"chevroelt │ 1 0 0\n",
"chevrolet │ 43 0 0\n",
"chevy │ 3 0 0\n",
"chrysler │ 6 0 0\n",
"datsun │ 0 0 23\n",
"dodge │ 28 0 0\n",
"fiat │ 0 8 0\n",
"ford │ 48 0 0\n",
"hi │ 1 0 0\n",
"honda │ 0 0 13\n",
"maxda │ 0 0 2\n",
"mazda │ 0 0 10\n",
"mercedes │ 0 1 0\n",
"mercedes-benz │ 0 2 0\n",
"mercury │ 11 0 0\n",
"nissan │ 0 0 1\n",
"oldsmobile │ 10 0 0\n",
"opel │ 0 4 0\n",
"peugeot │ 0 8 0\n",
"plymouth │ 31 0 0\n",
"pontiac │ 16 0 0\n",
"renault │ 0 3 0\n",
"saab │ 0 4 0\n",
"subaru │ 0 0 4\n",
"toyota │ 0 0 25\n",
"toyouta │ 0 0 1\n",
"triumph │ 0 1 0\n",
"vokswagen │ 0 1 0\n",
"volkswagen │ 0 15 0\n",
"volvo │ 0 6 0\n",
"vw │ 0 6 0"
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"freqtable(df, :brand, :origin)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Now let us create a chain of operations using Pipe.jl. Here `_` denotes the position of the argument passed from the previous step in the chain."
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
" | brand | origin_function |
---|
| String | Int64 |
---|
37 rows × 2 columns
1 | chevrolet | 1 |
---|
2 | buick | 1 |
---|
3 | plymouth | 1 |
---|
4 | amc | 1 |
---|
5 | ford | 1 |
---|
6 | pontiac | 1 |
---|
7 | dodge | 1 |
---|
8 | toyota | 1 |
---|
9 | datsun | 1 |
---|
10 | volkswagen | 1 |
---|
11 | peugeot | 1 |
---|
12 | audi | 1 |
---|
13 | saab | 1 |
---|
14 | bmw | 1 |
---|
15 | chevy | 1 |
---|
16 | hi | 1 |
---|
17 | mercury | 1 |
---|
18 | opel | 1 |
---|
19 | fiat | 1 |
---|
20 | oldsmobile | 1 |
---|
21 | chrysler | 1 |
---|
22 | mazda | 1 |
---|
23 | volvo | 1 |
---|
24 | renault | 1 |
---|
25 | toyouta | 1 |
---|
26 | maxda | 1 |
---|
27 | honda | 1 |
---|
28 | subaru | 1 |
---|
29 | chevroelt | 1 |
---|
30 | capri | 1 |
---|
31 | vw | 1 |
---|
32 | mercedes-benz | 1 |
---|
33 | cadillac | 1 |
---|
34 | mercedes | 1 |
---|
35 | vokswagen | 1 |
---|
36 | triumph | 1 |
---|
37 | nissan | 1 |
---|
"
],
"text/latex": [
"\\begin{tabular}{r|cc}\n",
"\t& brand & origin\\_function\\\\\n",
"\t\\hline\n",
"\t& String & Int64\\\\\n",
"\t\\hline\n",
"\t1 & chevrolet & 1 \\\\\n",
"\t2 & buick & 1 \\\\\n",
"\t3 & plymouth & 1 \\\\\n",
"\t4 & amc & 1 \\\\\n",
"\t5 & ford & 1 \\\\\n",
"\t6 & pontiac & 1 \\\\\n",
"\t7 & dodge & 1 \\\\\n",
"\t8 & toyota & 1 \\\\\n",
"\t9 & datsun & 1 \\\\\n",
"\t10 & volkswagen & 1 \\\\\n",
"\t11 & peugeot & 1 \\\\\n",
"\t12 & audi & 1 \\\\\n",
"\t13 & saab & 1 \\\\\n",
"\t14 & bmw & 1 \\\\\n",
"\t15 & chevy & 1 \\\\\n",
"\t16 & hi & 1 \\\\\n",
"\t17 & mercury & 1 \\\\\n",
"\t18 & opel & 1 \\\\\n",
"\t19 & fiat & 1 \\\\\n",
"\t20 & oldsmobile & 1 \\\\\n",
"\t21 & chrysler & 1 \\\\\n",
"\t22 & mazda & 1 \\\\\n",
"\t23 & volvo & 1 \\\\\n",
"\t24 & renault & 1 \\\\\n",
"\t25 & toyouta & 1 \\\\\n",
"\t26 & maxda & 1 \\\\\n",
"\t27 & honda & 1 \\\\\n",
"\t28 & subaru & 1 \\\\\n",
"\t29 & chevroelt & 1 \\\\\n",
"\t30 & capri & 1 \\\\\n",
"\t31 & vw & 1 \\\\\n",
"\t32 & mercedes-benz & 1 \\\\\n",
"\t33 & cadillac & 1 \\\\\n",
"\t34 & mercedes & 1 \\\\\n",
"\t35 & vokswagen & 1 \\\\\n",
"\t36 & triumph & 1 \\\\\n",
"\t37 & nissan & 1 \\\\\n",
"\\end{tabular}\n"
],
"text/plain": [
"37×2 DataFrame\n",
"│ Row │ brand │ origin_function │\n",
"│ │ \u001b[90mString\u001b[39m │ \u001b[90mInt64\u001b[39m │\n",
"├─────┼───────────────┼─────────────────┤\n",
"│ 1 │ chevrolet │ 1 │\n",
"│ 2 │ buick │ 1 │\n",
"│ 3 │ plymouth │ 1 │\n",
"│ 4 │ amc │ 1 │\n",
"│ 5 │ ford │ 1 │\n",
"│ 6 │ pontiac │ 1 │\n",
"│ 7 │ dodge │ 1 │\n",
"│ 8 │ toyota │ 1 │\n",
"│ 9 │ datsun │ 1 │\n",
"│ 10 │ volkswagen │ 1 │\n",
"│ 11 │ peugeot │ 1 │\n",
"│ 12 │ audi │ 1 │\n",
"│ 13 │ saab │ 1 │\n",
"│ 14 │ bmw │ 1 │\n",
"│ 15 │ chevy │ 1 │\n",
"│ 16 │ hi │ 1 │\n",
"│ 17 │ mercury │ 1 │\n",
"│ 18 │ opel │ 1 │\n",
"│ 19 │ fiat │ 1 │\n",
"│ 20 │ oldsmobile │ 1 │\n",
"│ 21 │ chrysler │ 1 │\n",
"│ 22 │ mazda │ 1 │\n",
"│ 23 │ volvo │ 1 │\n",
"│ 24 │ renault │ 1 │\n",
"│ 25 │ toyouta │ 1 │\n",
"│ 26 │ maxda │ 1 │\n",
"│ 27 │ honda │ 1 │\n",
"│ 28 │ subaru │ 1 │\n",
"│ 29 │ chevroelt │ 1 │\n",
"│ 30 │ capri │ 1 │\n",
"│ 31 │ vw │ 1 │\n",
"│ 32 │ mercedes-benz │ 1 │\n",
"│ 33 │ cadillac │ 1 │\n",
"│ 34 │ mercedes │ 1 │\n",
"│ 35 │ vokswagen │ 1 │\n",
"│ 36 │ triumph │ 1 │\n",
"│ 37 │ nissan │ 1 │"
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"orig_brand = @pipe df |>\n",
" groupby(_, :brand) |>\n",
" combine(_, :origin => x -> length(unique(x)))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Let us make sure that we always get exactly one origin:"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(1, 1)"
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"extrema(orig_brand.origin_function)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"We also could have grouped our data frame by two columns:"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
" | origin | brand | nrow |
---|
| Float64 | String | Int64 |
---|
37 rows × 3 columns
1 | 1.0 | chevrolet | 43 |
---|
2 | 1.0 | buick | 17 |
---|
3 | 1.0 | plymouth | 31 |
---|
4 | 1.0 | amc | 27 |
---|
5 | 1.0 | ford | 48 |
---|
6 | 1.0 | pontiac | 16 |
---|
7 | 1.0 | dodge | 28 |
---|
8 | 3.0 | toyota | 25 |
---|
9 | 3.0 | datsun | 23 |
---|
10 | 2.0 | volkswagen | 15 |
---|
11 | 2.0 | peugeot | 8 |
---|
12 | 2.0 | audi | 7 |
---|
13 | 2.0 | saab | 4 |
---|
14 | 2.0 | bmw | 2 |
---|
15 | 1.0 | chevy | 3 |
---|
16 | 1.0 | hi | 1 |
---|
17 | 1.0 | mercury | 11 |
---|
18 | 2.0 | opel | 4 |
---|
19 | 2.0 | fiat | 8 |
---|
20 | 1.0 | oldsmobile | 10 |
---|
21 | 1.0 | chrysler | 6 |
---|
22 | 3.0 | mazda | 10 |
---|
23 | 2.0 | volvo | 6 |
---|
24 | 2.0 | renault | 3 |
---|
25 | 3.0 | toyouta | 1 |
---|
26 | 3.0 | maxda | 2 |
---|
27 | 3.0 | honda | 13 |
---|
28 | 3.0 | subaru | 4 |
---|
29 | 1.0 | chevroelt | 1 |
---|
30 | 1.0 | capri | 1 |
---|
31 | 2.0 | vw | 6 |
---|
32 | 2.0 | mercedes-benz | 2 |
---|
33 | 1.0 | cadillac | 2 |
---|
34 | 2.0 | mercedes | 1 |
---|
35 | 2.0 | vokswagen | 1 |
---|
36 | 2.0 | triumph | 1 |
---|
37 | 3.0 | nissan | 1 |
---|
"
],
"text/latex": [
"\\begin{tabular}{r|ccc}\n",
"\t& origin & brand & nrow\\\\\n",
"\t\\hline\n",
"\t& Float64 & String & Int64\\\\\n",
"\t\\hline\n",
"\t1 & 1.0 & chevrolet & 43 \\\\\n",
"\t2 & 1.0 & buick & 17 \\\\\n",
"\t3 & 1.0 & plymouth & 31 \\\\\n",
"\t4 & 1.0 & amc & 27 \\\\\n",
"\t5 & 1.0 & ford & 48 \\\\\n",
"\t6 & 1.0 & pontiac & 16 \\\\\n",
"\t7 & 1.0 & dodge & 28 \\\\\n",
"\t8 & 3.0 & toyota & 25 \\\\\n",
"\t9 & 3.0 & datsun & 23 \\\\\n",
"\t10 & 2.0 & volkswagen & 15 \\\\\n",
"\t11 & 2.0 & peugeot & 8 \\\\\n",
"\t12 & 2.0 & audi & 7 \\\\\n",
"\t13 & 2.0 & saab & 4 \\\\\n",
"\t14 & 2.0 & bmw & 2 \\\\\n",
"\t15 & 1.0 & chevy & 3 \\\\\n",
"\t16 & 1.0 & hi & 1 \\\\\n",
"\t17 & 1.0 & mercury & 11 \\\\\n",
"\t18 & 2.0 & opel & 4 \\\\\n",
"\t19 & 2.0 & fiat & 8 \\\\\n",
"\t20 & 1.0 & oldsmobile & 10 \\\\\n",
"\t21 & 1.0 & chrysler & 6 \\\\\n",
"\t22 & 3.0 & mazda & 10 \\\\\n",
"\t23 & 2.0 & volvo & 6 \\\\\n",
"\t24 & 2.0 & renault & 3 \\\\\n",
"\t25 & 3.0 & toyouta & 1 \\\\\n",
"\t26 & 3.0 & maxda & 2 \\\\\n",
"\t27 & 3.0 & honda & 13 \\\\\n",
"\t28 & 3.0 & subaru & 4 \\\\\n",
"\t29 & 1.0 & chevroelt & 1 \\\\\n",
"\t30 & 1.0 & capri & 1 \\\\\n",
"\t31 & 2.0 & vw & 6 \\\\\n",
"\t32 & 2.0 & mercedes-benz & 2 \\\\\n",
"\t33 & 1.0 & cadillac & 2 \\\\\n",
"\t34 & 2.0 & mercedes & 1 \\\\\n",
"\t35 & 2.0 & vokswagen & 1 \\\\\n",
"\t36 & 2.0 & triumph & 1 \\\\\n",
"\t37 & 3.0 & nissan & 1 \\\\\n",
"\\end{tabular}\n"
],
"text/plain": [
"37×3 DataFrame\n",
"│ Row │ origin │ brand │ nrow │\n",
"│ │ \u001b[90mFloat64\u001b[39m │ \u001b[90mString\u001b[39m │ \u001b[90mInt64\u001b[39m │\n",
"├─────┼─────────┼───────────────┼───────┤\n",
"│ 1 │ 1.0 │ chevrolet │ 43 │\n",
"│ 2 │ 1.0 │ buick │ 17 │\n",
"│ 3 │ 1.0 │ plymouth │ 31 │\n",
"│ 4 │ 1.0 │ amc │ 27 │\n",
"│ 5 │ 1.0 │ ford │ 48 │\n",
"│ 6 │ 1.0 │ pontiac │ 16 │\n",
"│ 7 │ 1.0 │ dodge │ 28 │\n",
"│ 8 │ 3.0 │ toyota │ 25 │\n",
"│ 9 │ 3.0 │ datsun │ 23 │\n",
"│ 10 │ 2.0 │ volkswagen │ 15 │\n",
"│ 11 │ 2.0 │ peugeot │ 8 │\n",
"│ 12 │ 2.0 │ audi │ 7 │\n",
"│ 13 │ 2.0 │ saab │ 4 │\n",
"│ 14 │ 2.0 │ bmw │ 2 │\n",
"│ 15 │ 1.0 │ chevy │ 3 │\n",
"│ 16 │ 1.0 │ hi │ 1 │\n",
"│ 17 │ 1.0 │ mercury │ 11 │\n",
"│ 18 │ 2.0 │ opel │ 4 │\n",
"│ 19 │ 2.0 │ fiat │ 8 │\n",
"│ 20 │ 1.0 │ oldsmobile │ 10 │\n",
"│ 21 │ 1.0 │ chrysler │ 6 │\n",
"│ 22 │ 3.0 │ mazda │ 10 │\n",
"│ 23 │ 2.0 │ volvo │ 6 │\n",
"│ 24 │ 2.0 │ renault │ 3 │\n",
"│ 25 │ 3.0 │ toyouta │ 1 │\n",
"│ 26 │ 3.0 │ maxda │ 2 │\n",
"│ 27 │ 3.0 │ honda │ 13 │\n",
"│ 28 │ 3.0 │ subaru │ 4 │\n",
"│ 29 │ 1.0 │ chevroelt │ 1 │\n",
"│ 30 │ 1.0 │ capri │ 1 │\n",
"│ 31 │ 2.0 │ vw │ 6 │\n",
"│ 32 │ 2.0 │ mercedes-benz │ 2 │\n",
"│ 33 │ 1.0 │ cadillac │ 2 │\n",
"│ 34 │ 2.0 │ mercedes │ 1 │\n",
"│ 35 │ 2.0 │ vokswagen │ 1 │\n",
"│ 36 │ 2.0 │ triumph │ 1 │\n",
"│ 37 │ 3.0 │ nissan │ 1 │"
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"origin_brand2 = @pipe df |>\n",
" groupby(_, [:origin, :brand]) |>\n",
" combine(_, nrow)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Note that passing `nrow` as a combining operation gives us the number of rows per group."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Now we can do the `unstack` operation on the result to get a similar table that we got with `freqtable`:"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
" | brand | 1.0 | 2.0 | 3.0 |
---|
| String | Int64? | Int64? | Int64? |
---|
37 rows × 4 columns
1 | amc | 27 | missing | missing |
---|
2 | audi | missing | 7 | missing |
---|
3 | bmw | missing | 2 | missing |
---|
4 | buick | 17 | missing | missing |
---|
5 | cadillac | 2 | missing | missing |
---|
6 | capri | 1 | missing | missing |
---|
7 | chevroelt | 1 | missing | missing |
---|
8 | chevrolet | 43 | missing | missing |
---|
9 | chevy | 3 | missing | missing |
---|
10 | chrysler | 6 | missing | missing |
---|
11 | datsun | missing | missing | 23 |
---|
12 | dodge | 28 | missing | missing |
---|
13 | fiat | missing | 8 | missing |
---|
14 | ford | 48 | missing | missing |
---|
15 | hi | 1 | missing | missing |
---|
16 | honda | missing | missing | 13 |
---|
17 | maxda | missing | missing | 2 |
---|
18 | mazda | missing | missing | 10 |
---|
19 | mercedes | missing | 1 | missing |
---|
20 | mercedes-benz | missing | 2 | missing |
---|
21 | mercury | 11 | missing | missing |
---|
22 | nissan | missing | missing | 1 |
---|
23 | oldsmobile | 10 | missing | missing |
---|
24 | opel | missing | 4 | missing |
---|
25 | peugeot | missing | 8 | missing |
---|
26 | plymouth | 31 | missing | missing |
---|
27 | pontiac | 16 | missing | missing |
---|
28 | renault | missing | 3 | missing |
---|
29 | saab | missing | 4 | missing |
---|
30 | subaru | missing | missing | 4 |
---|
31 | toyota | missing | missing | 25 |
---|
32 | toyouta | missing | missing | 1 |
---|
33 | triumph | missing | 1 | missing |
---|
34 | vokswagen | missing | 1 | missing |
---|
35 | volkswagen | missing | 15 | missing |
---|
36 | volvo | missing | 6 | missing |
---|
37 | vw | missing | 6 | missing |
---|
"
],
"text/latex": [
"\\begin{tabular}{r|cccc}\n",
"\t& brand & 1.0 & 2.0 & 3.0\\\\\n",
"\t\\hline\n",
"\t& String & Int64? & Int64? & Int64?\\\\\n",
"\t\\hline\n",
"\t1 & amc & 27 & \\emph{missing} & \\emph{missing} \\\\\n",
"\t2 & audi & \\emph{missing} & 7 & \\emph{missing} \\\\\n",
"\t3 & bmw & \\emph{missing} & 2 & \\emph{missing} \\\\\n",
"\t4 & buick & 17 & \\emph{missing} & \\emph{missing} \\\\\n",
"\t5 & cadillac & 2 & \\emph{missing} & \\emph{missing} \\\\\n",
"\t6 & capri & 1 & \\emph{missing} & \\emph{missing} \\\\\n",
"\t7 & chevroelt & 1 & \\emph{missing} & \\emph{missing} \\\\\n",
"\t8 & chevrolet & 43 & \\emph{missing} & \\emph{missing} \\\\\n",
"\t9 & chevy & 3 & \\emph{missing} & \\emph{missing} \\\\\n",
"\t10 & chrysler & 6 & \\emph{missing} & \\emph{missing} \\\\\n",
"\t11 & datsun & \\emph{missing} & \\emph{missing} & 23 \\\\\n",
"\t12 & dodge & 28 & \\emph{missing} & \\emph{missing} \\\\\n",
"\t13 & fiat & \\emph{missing} & 8 & \\emph{missing} \\\\\n",
"\t14 & ford & 48 & \\emph{missing} & \\emph{missing} \\\\\n",
"\t15 & hi & 1 & \\emph{missing} & \\emph{missing} \\\\\n",
"\t16 & honda & \\emph{missing} & \\emph{missing} & 13 \\\\\n",
"\t17 & maxda & \\emph{missing} & \\emph{missing} & 2 \\\\\n",
"\t18 & mazda & \\emph{missing} & \\emph{missing} & 10 \\\\\n",
"\t19 & mercedes & \\emph{missing} & 1 & \\emph{missing} \\\\\n",
"\t20 & mercedes-benz & \\emph{missing} & 2 & \\emph{missing} \\\\\n",
"\t21 & mercury & 11 & \\emph{missing} & \\emph{missing} \\\\\n",
"\t22 & nissan & \\emph{missing} & \\emph{missing} & 1 \\\\\n",
"\t23 & oldsmobile & 10 & \\emph{missing} & \\emph{missing} \\\\\n",
"\t24 & opel & \\emph{missing} & 4 & \\emph{missing} \\\\\n",
"\t25 & peugeot & \\emph{missing} & 8 & \\emph{missing} \\\\\n",
"\t26 & plymouth & 31 & \\emph{missing} & \\emph{missing} \\\\\n",
"\t27 & pontiac & 16 & \\emph{missing} & \\emph{missing} \\\\\n",
"\t28 & renault & \\emph{missing} & 3 & \\emph{missing} \\\\\n",
"\t29 & saab & \\emph{missing} & 4 & \\emph{missing} \\\\\n",
"\t30 & subaru & \\emph{missing} & \\emph{missing} & 4 \\\\\n",
"\t31 & toyota & \\emph{missing} & \\emph{missing} & 25 \\\\\n",
"\t32 & toyouta & \\emph{missing} & \\emph{missing} & 1 \\\\\n",
"\t33 & triumph & \\emph{missing} & 1 & \\emph{missing} \\\\\n",
"\t34 & vokswagen & \\emph{missing} & 1 & \\emph{missing} \\\\\n",
"\t35 & volkswagen & \\emph{missing} & 15 & \\emph{missing} \\\\\n",
"\t36 & volvo & \\emph{missing} & 6 & \\emph{missing} \\\\\n",
"\t37 & vw & \\emph{missing} & 6 & \\emph{missing} \\\\\n",
"\\end{tabular}\n"
],
"text/plain": [
"37×4 DataFrame\n",
"│ Row │ brand │ 1.0 │ 2.0 │ 3.0 │\n",
"│ │ \u001b[90mString\u001b[39m │ \u001b[90mInt64?\u001b[39m │ \u001b[90mInt64?\u001b[39m │ \u001b[90mInt64?\u001b[39m │\n",
"├─────┼───────────────┼─────────┼─────────┼─────────┤\n",
"│ 1 │ amc │ 27 │ \u001b[90mmissing\u001b[39m │ \u001b[90mmissing\u001b[39m │\n",
"│ 2 │ audi │ \u001b[90mmissing\u001b[39m │ 7 │ \u001b[90mmissing\u001b[39m │\n",
"│ 3 │ bmw │ \u001b[90mmissing\u001b[39m │ 2 │ \u001b[90mmissing\u001b[39m │\n",
"│ 4 │ buick │ 17 │ \u001b[90mmissing\u001b[39m │ \u001b[90mmissing\u001b[39m │\n",
"│ 5 │ cadillac │ 2 │ \u001b[90mmissing\u001b[39m │ \u001b[90mmissing\u001b[39m │\n",
"│ 6 │ capri │ 1 │ \u001b[90mmissing\u001b[39m │ \u001b[90mmissing\u001b[39m │\n",
"│ 7 │ chevroelt │ 1 │ \u001b[90mmissing\u001b[39m │ \u001b[90mmissing\u001b[39m │\n",
"│ 8 │ chevrolet │ 43 │ \u001b[90mmissing\u001b[39m │ \u001b[90mmissing\u001b[39m │\n",
"│ 9 │ chevy │ 3 │ \u001b[90mmissing\u001b[39m │ \u001b[90mmissing\u001b[39m │\n",
"│ 10 │ chrysler │ 6 │ \u001b[90mmissing\u001b[39m │ \u001b[90mmissing\u001b[39m │\n",
"│ 11 │ datsun │ \u001b[90mmissing\u001b[39m │ \u001b[90mmissing\u001b[39m │ 23 │\n",
"│ 12 │ dodge │ 28 │ \u001b[90mmissing\u001b[39m │ \u001b[90mmissing\u001b[39m │\n",
"│ 13 │ fiat │ \u001b[90mmissing\u001b[39m │ 8 │ \u001b[90mmissing\u001b[39m │\n",
"│ 14 │ ford │ 48 │ \u001b[90mmissing\u001b[39m │ \u001b[90mmissing\u001b[39m │\n",
"│ 15 │ hi │ 1 │ \u001b[90mmissing\u001b[39m │ \u001b[90mmissing\u001b[39m │\n",
"│ 16 │ honda │ \u001b[90mmissing\u001b[39m │ \u001b[90mmissing\u001b[39m │ 13 │\n",
"│ 17 │ maxda │ \u001b[90mmissing\u001b[39m │ \u001b[90mmissing\u001b[39m │ 2 │\n",
"│ 18 │ mazda │ \u001b[90mmissing\u001b[39m │ \u001b[90mmissing\u001b[39m │ 10 │\n",
"│ 19 │ mercedes │ \u001b[90mmissing\u001b[39m │ 1 │ \u001b[90mmissing\u001b[39m │\n",
"│ 20 │ mercedes-benz │ \u001b[90mmissing\u001b[39m │ 2 │ \u001b[90mmissing\u001b[39m │\n",
"│ 21 │ mercury │ 11 │ \u001b[90mmissing\u001b[39m │ \u001b[90mmissing\u001b[39m │\n",
"│ 22 │ nissan │ \u001b[90mmissing\u001b[39m │ \u001b[90mmissing\u001b[39m │ 1 │\n",
"│ 23 │ oldsmobile │ 10 │ \u001b[90mmissing\u001b[39m │ \u001b[90mmissing\u001b[39m │\n",
"│ 24 │ opel │ \u001b[90mmissing\u001b[39m │ 4 │ \u001b[90mmissing\u001b[39m │\n",
"│ 25 │ peugeot │ \u001b[90mmissing\u001b[39m │ 8 │ \u001b[90mmissing\u001b[39m │\n",
"│ 26 │ plymouth │ 31 │ \u001b[90mmissing\u001b[39m │ \u001b[90mmissing\u001b[39m │\n",
"│ 27 │ pontiac │ 16 │ \u001b[90mmissing\u001b[39m │ \u001b[90mmissing\u001b[39m │\n",
"│ 28 │ renault │ \u001b[90mmissing\u001b[39m │ 3 │ \u001b[90mmissing\u001b[39m │\n",
"│ 29 │ saab │ \u001b[90mmissing\u001b[39m │ 4 │ \u001b[90mmissing\u001b[39m │\n",
"│ 30 │ subaru │ \u001b[90mmissing\u001b[39m │ \u001b[90mmissing\u001b[39m │ 4 │\n",
"│ 31 │ toyota │ \u001b[90mmissing\u001b[39m │ \u001b[90mmissing\u001b[39m │ 25 │\n",
"│ 32 │ toyouta │ \u001b[90mmissing\u001b[39m │ \u001b[90mmissing\u001b[39m │ 1 │\n",
"│ 33 │ triumph │ \u001b[90mmissing\u001b[39m │ 1 │ \u001b[90mmissing\u001b[39m │\n",
"│ 34 │ vokswagen │ \u001b[90mmissing\u001b[39m │ 1 │ \u001b[90mmissing\u001b[39m │\n",
"│ 35 │ volkswagen │ \u001b[90mmissing\u001b[39m │ 15 │ \u001b[90mmissing\u001b[39m │\n",
"│ 36 │ volvo │ \u001b[90mmissing\u001b[39m │ 6 │ \u001b[90mmissing\u001b[39m │\n",
"│ 37 │ vw │ \u001b[90mmissing\u001b[39m │ 6 │ \u001b[90mmissing\u001b[39m │"
]
},
"execution_count": 18,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"origin_vs_brand = unstack(origin_brand2, :brand, :origin, :nrow)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"The only difference is that missing combinations are given as `missing` and `freqtable` produced `0` there. It is easy to fix using broadcasting:"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
" | brand | 1.0 | 2.0 | 3.0 |
---|
| String | Int64 | Int64 | Int64 |
---|
37 rows × 4 columns
1 | amc | 27 | 0 | 0 |
---|
2 | audi | 0 | 7 | 0 |
---|
3 | bmw | 0 | 2 | 0 |
---|
4 | buick | 17 | 0 | 0 |
---|
5 | cadillac | 2 | 0 | 0 |
---|
6 | capri | 1 | 0 | 0 |
---|
7 | chevroelt | 1 | 0 | 0 |
---|
8 | chevrolet | 43 | 0 | 0 |
---|
9 | chevy | 3 | 0 | 0 |
---|
10 | chrysler | 6 | 0 | 0 |
---|
11 | datsun | 0 | 0 | 23 |
---|
12 | dodge | 28 | 0 | 0 |
---|
13 | fiat | 0 | 8 | 0 |
---|
14 | ford | 48 | 0 | 0 |
---|
15 | hi | 1 | 0 | 0 |
---|
16 | honda | 0 | 0 | 13 |
---|
17 | maxda | 0 | 0 | 2 |
---|
18 | mazda | 0 | 0 | 10 |
---|
19 | mercedes | 0 | 1 | 0 |
---|
20 | mercedes-benz | 0 | 2 | 0 |
---|
21 | mercury | 11 | 0 | 0 |
---|
22 | nissan | 0 | 0 | 1 |
---|
23 | oldsmobile | 10 | 0 | 0 |
---|
24 | opel | 0 | 4 | 0 |
---|
25 | peugeot | 0 | 8 | 0 |
---|
26 | plymouth | 31 | 0 | 0 |
---|
27 | pontiac | 16 | 0 | 0 |
---|
28 | renault | 0 | 3 | 0 |
---|
29 | saab | 0 | 4 | 0 |
---|
30 | subaru | 0 | 0 | 4 |
---|
31 | toyota | 0 | 0 | 25 |
---|
32 | toyouta | 0 | 0 | 1 |
---|
33 | triumph | 0 | 1 | 0 |
---|
34 | vokswagen | 0 | 1 | 0 |
---|
35 | volkswagen | 0 | 15 | 0 |
---|
36 | volvo | 0 | 6 | 0 |
---|
37 | vw | 0 | 6 | 0 |
---|
"
],
"text/latex": [
"\\begin{tabular}{r|cccc}\n",
"\t& brand & 1.0 & 2.0 & 3.0\\\\\n",
"\t\\hline\n",
"\t& String & Int64 & Int64 & Int64\\\\\n",
"\t\\hline\n",
"\t1 & amc & 27 & 0 & 0 \\\\\n",
"\t2 & audi & 0 & 7 & 0 \\\\\n",
"\t3 & bmw & 0 & 2 & 0 \\\\\n",
"\t4 & buick & 17 & 0 & 0 \\\\\n",
"\t5 & cadillac & 2 & 0 & 0 \\\\\n",
"\t6 & capri & 1 & 0 & 0 \\\\\n",
"\t7 & chevroelt & 1 & 0 & 0 \\\\\n",
"\t8 & chevrolet & 43 & 0 & 0 \\\\\n",
"\t9 & chevy & 3 & 0 & 0 \\\\\n",
"\t10 & chrysler & 6 & 0 & 0 \\\\\n",
"\t11 & datsun & 0 & 0 & 23 \\\\\n",
"\t12 & dodge & 28 & 0 & 0 \\\\\n",
"\t13 & fiat & 0 & 8 & 0 \\\\\n",
"\t14 & ford & 48 & 0 & 0 \\\\\n",
"\t15 & hi & 1 & 0 & 0 \\\\\n",
"\t16 & honda & 0 & 0 & 13 \\\\\n",
"\t17 & maxda & 0 & 0 & 2 \\\\\n",
"\t18 & mazda & 0 & 0 & 10 \\\\\n",
"\t19 & mercedes & 0 & 1 & 0 \\\\\n",
"\t20 & mercedes-benz & 0 & 2 & 0 \\\\\n",
"\t21 & mercury & 11 & 0 & 0 \\\\\n",
"\t22 & nissan & 0 & 0 & 1 \\\\\n",
"\t23 & oldsmobile & 10 & 0 & 0 \\\\\n",
"\t24 & opel & 0 & 4 & 0 \\\\\n",
"\t25 & peugeot & 0 & 8 & 0 \\\\\n",
"\t26 & plymouth & 31 & 0 & 0 \\\\\n",
"\t27 & pontiac & 16 & 0 & 0 \\\\\n",
"\t28 & renault & 0 & 3 & 0 \\\\\n",
"\t29 & saab & 0 & 4 & 0 \\\\\n",
"\t30 & subaru & 0 & 0 & 4 \\\\\n",
"\t31 & toyota & 0 & 0 & 25 \\\\\n",
"\t32 & toyouta & 0 & 0 & 1 \\\\\n",
"\t33 & triumph & 0 & 1 & 0 \\\\\n",
"\t34 & vokswagen & 0 & 1 & 0 \\\\\n",
"\t35 & volkswagen & 0 & 15 & 0 \\\\\n",
"\t36 & volvo & 0 & 6 & 0 \\\\\n",
"\t37 & vw & 0 & 6 & 0 \\\\\n",
"\\end{tabular}\n"
],
"text/plain": [
"37×4 DataFrame\n",
"│ Row │ brand │ 1.0 │ 2.0 │ 3.0 │\n",
"│ │ \u001b[90mString\u001b[39m │ \u001b[90mInt64\u001b[39m │ \u001b[90mInt64\u001b[39m │ \u001b[90mInt64\u001b[39m │\n",
"├─────┼───────────────┼───────┼───────┼───────┤\n",
"│ 1 │ amc │ 27 │ 0 │ 0 │\n",
"│ 2 │ audi │ 0 │ 7 │ 0 │\n",
"│ 3 │ bmw │ 0 │ 2 │ 0 │\n",
"│ 4 │ buick │ 17 │ 0 │ 0 │\n",
"│ 5 │ cadillac │ 2 │ 0 │ 0 │\n",
"│ 6 │ capri │ 1 │ 0 │ 0 │\n",
"│ 7 │ chevroelt │ 1 │ 0 │ 0 │\n",
"│ 8 │ chevrolet │ 43 │ 0 │ 0 │\n",
"│ 9 │ chevy │ 3 │ 0 │ 0 │\n",
"│ 10 │ chrysler │ 6 │ 0 │ 0 │\n",
"│ 11 │ datsun │ 0 │ 0 │ 23 │\n",
"│ 12 │ dodge │ 28 │ 0 │ 0 │\n",
"│ 13 │ fiat │ 0 │ 8 │ 0 │\n",
"│ 14 │ ford │ 48 │ 0 │ 0 │\n",
"│ 15 │ hi │ 1 │ 0 │ 0 │\n",
"│ 16 │ honda │ 0 │ 0 │ 13 │\n",
"│ 17 │ maxda │ 0 │ 0 │ 2 │\n",
"│ 18 │ mazda │ 0 │ 0 │ 10 │\n",
"│ 19 │ mercedes │ 0 │ 1 │ 0 │\n",
"│ 20 │ mercedes-benz │ 0 │ 2 │ 0 │\n",
"│ 21 │ mercury │ 11 │ 0 │ 0 │\n",
"│ 22 │ nissan │ 0 │ 0 │ 1 │\n",
"│ 23 │ oldsmobile │ 10 │ 0 │ 0 │\n",
"│ 24 │ opel │ 0 │ 4 │ 0 │\n",
"│ 25 │ peugeot │ 0 │ 8 │ 0 │\n",
"│ 26 │ plymouth │ 31 │ 0 │ 0 │\n",
"│ 27 │ pontiac │ 16 │ 0 │ 0 │\n",
"│ 28 │ renault │ 0 │ 3 │ 0 │\n",
"│ 29 │ saab │ 0 │ 4 │ 0 │\n",
"│ 30 │ subaru │ 0 │ 0 │ 4 │\n",
"│ 31 │ toyota │ 0 │ 0 │ 25 │\n",
"│ 32 │ toyouta │ 0 │ 0 │ 1 │\n",
"│ 33 │ triumph │ 0 │ 1 │ 0 │\n",
"│ 34 │ vokswagen │ 0 │ 1 │ 0 │\n",
"│ 35 │ volkswagen │ 0 │ 15 │ 0 │\n",
"│ 36 │ volvo │ 0 │ 6 │ 0 │\n",
"│ 37 │ vw │ 0 │ 6 │ 0 │"
]
},
"execution_count": 19,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"coalesce.(origin_vs_brand, 0)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Note that column names of a data frame can be any sequence of characters as in this case:"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"4-element Array{String,1}:\n",
" \"brand\"\n",
" \"1.0\"\n",
" \"2.0\"\n",
" \"3.0\""
]
},
"execution_count": 20,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"names(origin_vs_brand)"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"4-element Array{Symbol,1}:\n",
" :brand\n",
" Symbol(\"1.0\")\n",
" Symbol(\"2.0\")\n",
" Symbol(\"3.0\")"
]
},
"execution_count": 21,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"propertynames(origin_vs_brand)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"In such situations it is easiest to refer to such columns using strings, as `Symbol`s are not super easy to write when they have to represent sequences that are not allowed as variable names as you can see above."
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"37-element Array{Union{Missing, Int64},1}:\n",
" 27\n",
" missing\n",
" missing\n",
" 17\n",
" 2\n",
" 1\n",
" 1\n",
" 43\n",
" 3\n",
" 6\n",
" missing\n",
" 28\n",
" missing\n",
" 48\n",
" 1\n",
" missing\n",
" missing\n",
" missing\n",
" missing\n",
" missing\n",
" 11\n",
" missing\n",
" 10\n",
" missing\n",
" missing\n",
" 31\n",
" 16\n",
" missing\n",
" missing\n",
" missing\n",
" missing\n",
" missing\n",
" missing\n",
" missing\n",
" missing\n",
" missing\n",
" missing"
]
},
"execution_count": 22,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"origin_vs_brand.\"1.0\""
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Finally let us show that columns of a data frame can hold any data type."
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
" | origin | brand_function |
---|
| Float64 | Array… |
---|
3 rows × 2 columns
1 | 1.0 | [\"chevrolet\", \"buick\", \"plymouth\", \"amc\", \"ford\", \"pontiac\", \"dodge\", \"chevy\", \"hi\", \"mercury\", \"oldsmobile\", \"chrysler\", \"chevroelt\", \"capri\", \"cadillac\"] |
---|
2 | 3.0 | [\"toyota\", \"datsun\", \"mazda\", \"toyouta\", \"maxda\", \"honda\", \"subaru\", \"nissan\"] |
---|
3 | 2.0 | [\"volkswagen\", \"peugeot\", \"audi\", \"saab\", \"bmw\", \"opel\", \"fiat\", \"volvo\", \"renault\", \"vw\", \"mercedes-benz\", \"mercedes\", \"vokswagen\", \"triumph\"] |
---|
"
],
"text/latex": [
"\\begin{tabular}{r|cc}\n",
"\t& origin & brand\\_function\\\\\n",
"\t\\hline\n",
"\t& Float64 & Array…\\\\\n",
"\t\\hline\n",
"\t1 & 1.0 & [\"chevrolet\", \"buick\", \"plymouth\", \"amc\", \"ford\", \"pontiac\", \"dodge\", \"chevy\", \"hi\", \"mercury\", \"oldsmobile\", \"chrysler\", \"chevroelt\", \"capri\", \"cadillac\"] \\\\\n",
"\t2 & 3.0 & [\"toyota\", \"datsun\", \"mazda\", \"toyouta\", \"maxda\", \"honda\", \"subaru\", \"nissan\"] \\\\\n",
"\t3 & 2.0 & [\"volkswagen\", \"peugeot\", \"audi\", \"saab\", \"bmw\", \"opel\", \"fiat\", \"volvo\", \"renault\", \"vw\", \"mercedes-benz\", \"mercedes\", \"vokswagen\", \"triumph\"] \\\\\n",
"\\end{tabular}\n"
],
"text/plain": [
"3×2 DataFrame\n",
"│ Row │ origin │ brand_function │\n",
"│ │ \u001b[90mFloat64\u001b[39m │ \u001b[90mArray{String,1}\u001b[39m │\n",
"├─────┼─────────┼─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┤\n",
"│ 1 │ 1.0 │ [\"chevrolet\", \"buick\", \"plymouth\", \"amc\", \"ford\", \"pontiac\", \"dodge\", \"chevy\", \"hi\", \"mercury\", \"oldsmobile\", \"chrysler\", \"chevroelt\", \"capri\", \"cadillac\"] │\n",
"│ 2 │ 3.0 │ [\"toyota\", \"datsun\", \"mazda\", \"toyouta\", \"maxda\", \"honda\", \"subaru\", \"nissan\"] │\n",
"│ 3 │ 2.0 │ [\"volkswagen\", \"peugeot\", \"audi\", \"saab\", \"bmw\", \"opel\", \"fiat\", \"volvo\", \"renault\", \"vw\", \"mercedes-benz\", \"mercedes\", \"vokswagen\", \"triumph\"] │"
]
},
"execution_count": 23,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"origin_brand3 = @pipe df |>\n",
" groupby(_, :origin) |>\n",
" combine(_, :brand => x -> Ref(unique(x)))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Note that we used `Ref` to protect the return value of our function to be broadcasted (this is the same trick as in standard broadcasting in Julia). Otherwise we would get:"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
" | origin | brand_unique |
---|
| Float64 | String |
---|
37 rows × 2 columns
1 | 1.0 | chevrolet |
---|
2 | 1.0 | buick |
---|
3 | 1.0 | plymouth |
---|
4 | 1.0 | amc |
---|
5 | 1.0 | ford |
---|
6 | 1.0 | pontiac |
---|
7 | 1.0 | dodge |
---|
8 | 1.0 | chevy |
---|
9 | 1.0 | hi |
---|
10 | 1.0 | mercury |
---|
11 | 1.0 | oldsmobile |
---|
12 | 1.0 | chrysler |
---|
13 | 1.0 | chevroelt |
---|
14 | 1.0 | capri |
---|
15 | 1.0 | cadillac |
---|
16 | 3.0 | toyota |
---|
17 | 3.0 | datsun |
---|
18 | 3.0 | mazda |
---|
19 | 3.0 | toyouta |
---|
20 | 3.0 | maxda |
---|
21 | 3.0 | honda |
---|
22 | 3.0 | subaru |
---|
23 | 3.0 | nissan |
---|
24 | 2.0 | volkswagen |
---|
25 | 2.0 | peugeot |
---|
26 | 2.0 | audi |
---|
27 | 2.0 | saab |
---|
28 | 2.0 | bmw |
---|
29 | 2.0 | opel |
---|
30 | 2.0 | fiat |
---|
31 | 2.0 | volvo |
---|
32 | 2.0 | renault |
---|
33 | 2.0 | vw |
---|
34 | 2.0 | mercedes-benz |
---|
35 | 2.0 | mercedes |
---|
36 | 2.0 | vokswagen |
---|
37 | 2.0 | triumph |
---|
"
],
"text/latex": [
"\\begin{tabular}{r|cc}\n",
"\t& origin & brand\\_unique\\\\\n",
"\t\\hline\n",
"\t& Float64 & String\\\\\n",
"\t\\hline\n",
"\t1 & 1.0 & chevrolet \\\\\n",
"\t2 & 1.0 & buick \\\\\n",
"\t3 & 1.0 & plymouth \\\\\n",
"\t4 & 1.0 & amc \\\\\n",
"\t5 & 1.0 & ford \\\\\n",
"\t6 & 1.0 & pontiac \\\\\n",
"\t7 & 1.0 & dodge \\\\\n",
"\t8 & 1.0 & chevy \\\\\n",
"\t9 & 1.0 & hi \\\\\n",
"\t10 & 1.0 & mercury \\\\\n",
"\t11 & 1.0 & oldsmobile \\\\\n",
"\t12 & 1.0 & chrysler \\\\\n",
"\t13 & 1.0 & chevroelt \\\\\n",
"\t14 & 1.0 & capri \\\\\n",
"\t15 & 1.0 & cadillac \\\\\n",
"\t16 & 3.0 & toyota \\\\\n",
"\t17 & 3.0 & datsun \\\\\n",
"\t18 & 3.0 & mazda \\\\\n",
"\t19 & 3.0 & toyouta \\\\\n",
"\t20 & 3.0 & maxda \\\\\n",
"\t21 & 3.0 & honda \\\\\n",
"\t22 & 3.0 & subaru \\\\\n",
"\t23 & 3.0 & nissan \\\\\n",
"\t24 & 2.0 & volkswagen \\\\\n",
"\t25 & 2.0 & peugeot \\\\\n",
"\t26 & 2.0 & audi \\\\\n",
"\t27 & 2.0 & saab \\\\\n",
"\t28 & 2.0 & bmw \\\\\n",
"\t29 & 2.0 & opel \\\\\n",
"\t30 & 2.0 & fiat \\\\\n",
"\t31 & 2.0 & volvo \\\\\n",
"\t32 & 2.0 & renault \\\\\n",
"\t33 & 2.0 & vw \\\\\n",
"\t34 & 2.0 & mercedes-benz \\\\\n",
"\t35 & 2.0 & mercedes \\\\\n",
"\t36 & 2.0 & vokswagen \\\\\n",
"\t37 & 2.0 & triumph \\\\\n",
"\\end{tabular}\n"
],
"text/plain": [
"37×2 DataFrame\n",
"│ Row │ origin │ brand_unique │\n",
"│ │ \u001b[90mFloat64\u001b[39m │ \u001b[90mString\u001b[39m │\n",
"├─────┼─────────┼───────────────┤\n",
"│ 1 │ 1.0 │ chevrolet │\n",
"│ 2 │ 1.0 │ buick │\n",
"│ 3 │ 1.0 │ plymouth │\n",
"│ 4 │ 1.0 │ amc │\n",
"│ 5 │ 1.0 │ ford │\n",
"│ 6 │ 1.0 │ pontiac │\n",
"│ 7 │ 1.0 │ dodge │\n",
"│ 8 │ 1.0 │ chevy │\n",
"│ 9 │ 1.0 │ hi │\n",
"│ 10 │ 1.0 │ mercury │\n",
"│ 11 │ 1.0 │ oldsmobile │\n",
"│ 12 │ 1.0 │ chrysler │\n",
"│ 13 │ 1.0 │ chevroelt │\n",
"│ 14 │ 1.0 │ capri │\n",
"│ 15 │ 1.0 │ cadillac │\n",
"│ 16 │ 3.0 │ toyota │\n",
"│ 17 │ 3.0 │ datsun │\n",
"│ 18 │ 3.0 │ mazda │\n",
"│ 19 │ 3.0 │ toyouta │\n",
"│ 20 │ 3.0 │ maxda │\n",
"│ 21 │ 3.0 │ honda │\n",
"│ 22 │ 3.0 │ subaru │\n",
"│ 23 │ 3.0 │ nissan │\n",
"│ 24 │ 2.0 │ volkswagen │\n",
"│ 25 │ 2.0 │ peugeot │\n",
"│ 26 │ 2.0 │ audi │\n",
"│ 27 │ 2.0 │ saab │\n",
"│ 28 │ 2.0 │ bmw │\n",
"│ 29 │ 2.0 │ opel │\n",
"│ 30 │ 2.0 │ fiat │\n",
"│ 31 │ 2.0 │ volvo │\n",
"│ 32 │ 2.0 │ renault │\n",
"│ 33 │ 2.0 │ vw │\n",
"│ 34 │ 2.0 │ mercedes-benz │\n",
"│ 35 │ 2.0 │ mercedes │\n",
"│ 36 │ 2.0 │ vokswagen │\n",
"│ 37 │ 2.0 │ triumph │"
]
},
"execution_count": 24,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"@pipe df |> groupby(_, :origin) |> combine(_, :brand => unique)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Now we check if the groups are pairwise disjoint (this is a bit tricky, but it is much fun to be able to write such code in Julia):"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"1 vs 2 => String[]\n",
"1 vs 3 => String[]\n",
"2 vs 3 => String[]\n"
]
}
],
"source": [
"for i in axes(origin_brand3, 1)\n",
" for j in i+1:nrow(origin_brand3)\n",
" println(\"$i vs $j => \", intersect(origin_brand3.brand_function[[i,j]]...))\n",
" end\n",
"end"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"We can also easily flatten the `origin_brand3` data frame so that `brand_function` column is expanded to span multiple rows (the case we have seen above when we omitted writing `Ref` in `combine`)"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
" | origin | brand_function |
---|
| Float64 | String |
---|
37 rows × 2 columns
1 | 1.0 | chevrolet |
---|
2 | 1.0 | buick |
---|
3 | 1.0 | plymouth |
---|
4 | 1.0 | amc |
---|
5 | 1.0 | ford |
---|
6 | 1.0 | pontiac |
---|
7 | 1.0 | dodge |
---|
8 | 1.0 | chevy |
---|
9 | 1.0 | hi |
---|
10 | 1.0 | mercury |
---|
11 | 1.0 | oldsmobile |
---|
12 | 1.0 | chrysler |
---|
13 | 1.0 | chevroelt |
---|
14 | 1.0 | capri |
---|
15 | 1.0 | cadillac |
---|
16 | 3.0 | toyota |
---|
17 | 3.0 | datsun |
---|
18 | 3.0 | mazda |
---|
19 | 3.0 | toyouta |
---|
20 | 3.0 | maxda |
---|
21 | 3.0 | honda |
---|
22 | 3.0 | subaru |
---|
23 | 3.0 | nissan |
---|
24 | 2.0 | volkswagen |
---|
25 | 2.0 | peugeot |
---|
26 | 2.0 | audi |
---|
27 | 2.0 | saab |
---|
28 | 2.0 | bmw |
---|
29 | 2.0 | opel |
---|
30 | 2.0 | fiat |
---|
31 | 2.0 | volvo |
---|
32 | 2.0 | renault |
---|
33 | 2.0 | vw |
---|
34 | 2.0 | mercedes-benz |
---|
35 | 2.0 | mercedes |
---|
36 | 2.0 | vokswagen |
---|
37 | 2.0 | triumph |
---|
"
],
"text/latex": [
"\\begin{tabular}{r|cc}\n",
"\t& origin & brand\\_function\\\\\n",
"\t\\hline\n",
"\t& Float64 & String\\\\\n",
"\t\\hline\n",
"\t1 & 1.0 & chevrolet \\\\\n",
"\t2 & 1.0 & buick \\\\\n",
"\t3 & 1.0 & plymouth \\\\\n",
"\t4 & 1.0 & amc \\\\\n",
"\t5 & 1.0 & ford \\\\\n",
"\t6 & 1.0 & pontiac \\\\\n",
"\t7 & 1.0 & dodge \\\\\n",
"\t8 & 1.0 & chevy \\\\\n",
"\t9 & 1.0 & hi \\\\\n",
"\t10 & 1.0 & mercury \\\\\n",
"\t11 & 1.0 & oldsmobile \\\\\n",
"\t12 & 1.0 & chrysler \\\\\n",
"\t13 & 1.0 & chevroelt \\\\\n",
"\t14 & 1.0 & capri \\\\\n",
"\t15 & 1.0 & cadillac \\\\\n",
"\t16 & 3.0 & toyota \\\\\n",
"\t17 & 3.0 & datsun \\\\\n",
"\t18 & 3.0 & mazda \\\\\n",
"\t19 & 3.0 & toyouta \\\\\n",
"\t20 & 3.0 & maxda \\\\\n",
"\t21 & 3.0 & honda \\\\\n",
"\t22 & 3.0 & subaru \\\\\n",
"\t23 & 3.0 & nissan \\\\\n",
"\t24 & 2.0 & volkswagen \\\\\n",
"\t25 & 2.0 & peugeot \\\\\n",
"\t26 & 2.0 & audi \\\\\n",
"\t27 & 2.0 & saab \\\\\n",
"\t28 & 2.0 & bmw \\\\\n",
"\t29 & 2.0 & opel \\\\\n",
"\t30 & 2.0 & fiat \\\\\n",
"\t31 & 2.0 & volvo \\\\\n",
"\t32 & 2.0 & renault \\\\\n",
"\t33 & 2.0 & vw \\\\\n",
"\t34 & 2.0 & mercedes-benz \\\\\n",
"\t35 & 2.0 & mercedes \\\\\n",
"\t36 & 2.0 & vokswagen \\\\\n",
"\t37 & 2.0 & triumph \\\\\n",
"\\end{tabular}\n"
],
"text/plain": [
"37×2 DataFrame\n",
"│ Row │ origin │ brand_function │\n",
"│ │ \u001b[90mFloat64\u001b[39m │ \u001b[90mString\u001b[39m │\n",
"├─────┼─────────┼────────────────┤\n",
"│ 1 │ 1.0 │ chevrolet │\n",
"│ 2 │ 1.0 │ buick │\n",
"│ 3 │ 1.0 │ plymouth │\n",
"│ 4 │ 1.0 │ amc │\n",
"│ 5 │ 1.0 │ ford │\n",
"│ 6 │ 1.0 │ pontiac │\n",
"│ 7 │ 1.0 │ dodge │\n",
"│ 8 │ 1.0 │ chevy │\n",
"│ 9 │ 1.0 │ hi │\n",
"│ 10 │ 1.0 │ mercury │\n",
"│ 11 │ 1.0 │ oldsmobile │\n",
"│ 12 │ 1.0 │ chrysler │\n",
"│ 13 │ 1.0 │ chevroelt │\n",
"│ 14 │ 1.0 │ capri │\n",
"│ 15 │ 1.0 │ cadillac │\n",
"│ 16 │ 3.0 │ toyota │\n",
"│ 17 │ 3.0 │ datsun │\n",
"│ 18 │ 3.0 │ mazda │\n",
"│ 19 │ 3.0 │ toyouta │\n",
"│ 20 │ 3.0 │ maxda │\n",
"│ 21 │ 3.0 │ honda │\n",
"│ 22 │ 3.0 │ subaru │\n",
"│ 23 │ 3.0 │ nissan │\n",
"│ 24 │ 2.0 │ volkswagen │\n",
"│ 25 │ 2.0 │ peugeot │\n",
"│ 26 │ 2.0 │ audi │\n",
"│ 27 │ 2.0 │ saab │\n",
"│ 28 │ 2.0 │ bmw │\n",
"│ 29 │ 2.0 │ opel │\n",
"│ 30 │ 2.0 │ fiat │\n",
"│ 31 │ 2.0 │ volvo │\n",
"│ 32 │ 2.0 │ renault │\n",
"│ 33 │ 2.0 │ vw │\n",
"│ 34 │ 2.0 │ mercedes-benz │\n",
"│ 35 │ 2.0 │ mercedes │\n",
"│ 36 │ 2.0 │ vokswagen │\n",
"│ 37 │ 2.0 │ triumph │"
]
},
"execution_count": 26,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"flatten(origin_brand3, :brand_function)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Julia 1.4.1",
"language": "julia",
"name": "julia-1.4"
},
"language_info": {
"file_extension": ".jl",
"mimetype": "application/julia",
"name": "julia",
"version": "1.4.1"
}
},
"nbformat": 4,
"nbformat_minor": 4
}