import pandas as pd
inventories = pd.read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2022/2022-09-06/inventories.csv.gz')
inventory_sets = pd.read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2022/2022-09-06/inventory_sets.csv.gz')
sets = pd.read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2022/2022-09-06/sets.csv.gz')Legos
Intoduction
My attempt to recreate David Robinson’s analysis of lego dataset (tidyTuesday from 2022-09-06)
Load datasets
First lets read data!
import os, re
# get all files
files = os.listdir('../../data-screencasts/lego-data/')
# concatenate files names and directory path
files_paths = ['../../data-screencasts/lego-data/' + file for file in files]
# create names for data frames
names = [re.sub(r'\.csv.gz','', name) for name in files]
# load dataframes into dictionary
lego_datasets = {key: pd.read_csv(file) for (key, file) in zip(names, files_paths)}inventories.value_counts('set_num')set_num
657-2 16
659-2 12
666-1 11
266-2 11
264-2 11
..
6397-1 1
6396384-1 1
6396-1 1
6395-1 1
vwkit-1 1
Name: count, Length: 32348, dtype: int64
from IPython.display import Markdown
from tabulate import tabulate
table = inventory_sets.head()
Markdown(tabulate(
table,
headers="keys"
))| inventory_id | set_num | quantity | |
|---|---|---|---|
| 0 | 35 | 75911-1 | 1 |
| 1 | 35 | 75912-1 | 1 |
| 2 | 39 | 75048-1 | 1 |
| 3 | 39 | 75053-1 | 1 |
| 4 | 50 | 4515-1 | 1 |