7.9. Case Study HTML Astronauts

7.9.1. Case Study - 0x01

"""
>>> result
{'years': 10, 'months': 4, 'days': 7, 'hours': 17, 'minutes': 47, 'seconds': 0}
"""

import pandas as pd

pd.set_option('display.width', 1000)
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 50)

SECOND = 1
MINUTE = 60 * SECOND
HOUR = 60 * MINUTE
DAY = 24 * HOUR
MONTH = 30.4375 * DAY
YEAR = 365.25 * DAY

# DATA = 'https://en.wikipedia.org/wiki/European_Astronaut_Corps'
DATA = 'https://python3.info/_static/european-astronaut-corps.html'


def duration(between):
    if between is pd.NaT:
        return between
    years, seconds = divmod(between.total_seconds(), YEAR)
    months, seconds = divmod(seconds, MONTH)
    days, seconds = divmod(seconds, DAY)
    hours, seconds = divmod(between.seconds, HOUR)
    minutes, seconds = divmod(seconds, MINUTE)
    return {
        'years': int(years),
        'months': int(months),
        'days': int(days),
        'hours': int(hours),
        'minutes': int(minutes),
        'seconds': int(seconds),
    }


tables = pd.read_html(DATA)
current = tables[0]
former = tables[2]

c = pd.to_timedelta(current['Time in space'], errors='coerce').sum()
f = pd.to_timedelta(former['Time in space'], errors='coerce').sum()
result = duration(c+f)

7.9.2. Case Study - 0x02

import pandas as pd


pd.set_option('display.width', 200)
pd.set_option('display.max_columns', 15)
pd.set_option('display.max_rows', 100)

# %%
DATA = 'https://en.wikipedia.org/wiki/European_Astronaut_Corps'
YEAR = 365.25

# %%
dfs = pd.read_html(DATA)
active = dfs[0]
former = dfs[2]

# %%
a = active['Time in space'].map(pd.to_timedelta).sum()
f = former['Time in space'].map(pd.to_timedelta).sum()

years = (a+f).days / YEAR
# 9.806981519507186