import pandas as pd
= pd.read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2022/2022-09-06/inventories.csv.gz')
inventories = pd.read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2022/2022-09-06/inventory_sets.csv.gz')
inventory_sets = pd.read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2022/2022-09-06/sets.csv.gz') sets
Legos
Intoduction
My attempt to recreate David Robinson’s analysis of lego dataset (tidyTuesday from 2022-09-06)
Load datasets
First lets read data!
import os, re
# get all files
= os.listdir('../../data-screencasts/lego-data/')
files # concatenate files names and directory path
= ['../../data-screencasts/lego-data/' + file for file in files]
files_paths # create names for data frames
= [re.sub(r'\.csv.gz','', name) for name in files]
names # load dataframes into dictionary
= {key: pd.read_csv(file) for (key, file) in zip(names, files_paths)} lego_datasets
'set_num') inventories.value_counts(
set_num
657-2 16
659-2 12
666-1 11
266-2 11
264-2 11
..
6397-1 1
6396384-1 1
6396-1 1
6395-1 1
vwkit-1 1
Name: count, Length: 32348, dtype: int64
from IPython.display import Markdown
from tabulate import tabulate
= inventory_sets.head()
table
Markdown(tabulate(
table,="keys"
headers ))
inventory_id | set_num | quantity | |
---|---|---|---|
0 | 35 | 75911-1 | 1 |
1 | 35 | 75912-1 | 1 |
2 | 39 | 75048-1 | 1 |
3 | 39 | 75053-1 | 1 |
4 | 50 | 4515-1 | 1 |