7.10. Case Study HTML Dahab

7.10.1. Case Study 1

import pandas as pd
from matplotlib import pyplot as plt

pd.set_option('display.width', 300)
pd.set_option('display.max_columns', 15)
pd.set_option('display.max_rows', 100)
pd.set_option('display.min_rows', 100)
pd.set_option('display.max_seq_items', 100)


# DATA = 'https://en.wikipedia.org/wiki/Dahab'
DATA = 'https://python3.info/_static/html-dahab-2.html'

tables = pd.read_html(DATA)
sea = tables[2]

air = (
    tables[1]
    .droplevel(0, axis='columns')
    .iloc[:-2]
    .replace({'Month': {
        'Average high °C (°F)': 'temperature high',
        'Daily mean °C (°F)': 'temperature mean',
        'Average low °C (°F)': 'temperature low',
        'Average precipitation mm (inches)': 'precipitation average',
        'Average rainy days': 'precipitation days',
        'Mean daily sunshine hours': 'sunshine hours'}})
    .drop(columns=['Year'])
    .set_index('Month')
    .replace(r' \(.+\)', '', regex=True)
    .astype('float16')
    .convert_dtypes()
    .round(1)
)


# Daily mean temperature
mean_per_month = air.loc['temperature mean', :]
mean = mean_per_month.mean().round(1)  # 23.5
plot = (
    mean_per_month
    .plot(
        kind='line',
        title='Dahab, South Sinai, Egipt\nMean temperature',
        xlabel='Month',
        ylabel='Temperature [°C]',
        grid=True,
        figsize=(16,10),
        label='temperature'))
plt.hlines(
    y=mean,
    xmin=0,
    xmax=mean_per_month.size,
    color='red',
    label='all time mean')
plt.legend()
# plt.show()
../../_images/html-dahab-1-mean-temperature.png

7.10.2. Case Study 2

# %%

import pandas as pd
from matplotlib import pyplot as plt

# %%

# DATA = 'https://en.wikipedia.org/wiki/Dahab'
DATA = 'https://python3.info/_static/html-dahab-2.html'

dfs = pd.read_html(DATA)

# %%

air = (
    dfs[1]
    .iloc[1]
    .droplevel(0)
    .astype('string')
    .str.extract(r'(\d{1,2}\.\d{1})')
    .drop(['Year', 'Month'], axis='rows')
    .astype('float')
    .rename(columns={0:'air'})
    .convert_dtypes()
)

# %%

sea = (
    dfs[2]
    .transpose()
    .astype('string')
    .loc[:, 0]
    .str.extract(r'(\d{1,2})')
    .astype('float')
    .rename(columns={0:'sea'})
    .convert_dtypes()
)

# %%

temperature = pd.concat([air, sea], axis='columns')

# %%

plot_average_temperature = (
    temperature
    .assign(baseline=lambda df: df['air'].mean())
    .plot(
        kind='line',
        title='Dahab, Egypt - Average Temperature',
        xlabel='Month',
        ylabel='Temperature [°C]',
        figsize=(10, 5),
    )
)

plt.show()

# %%

plot_difference = (
    temperature
    .assign(diff=lambda df: df['air'] - df['sea'])
    .assign(baseline=0)
    .loc[:, ['diff', 'baseline']]
    .plot(
        kind='line',
        title='Dahab, Egypt - Temperature Difference',
        xlabel='Month',
        ylabel='Temperature [°C]',
        figsize=(10, 5),
    )
)

plt.show()
../../_images/html-dahab-2-mean-temperature.png
../../_images/html-dahab-2-diff-temperature.png