7.2. CSV COVID19

7.2.1. Case Study - 0x01

#%% Imports
from datetime import date
import pandas as pd
from pandas.tseries.holiday import AbstractHolidayCalendar, Holiday
from pandas.tseries.holiday import EasterMonday, Easter
from pandas.tseries.offsets import Day
import matplotlib.pyplot as plt
import matplotlib.axes


#%% Settings
pd.set_option('display.width', 200)
pd.set_option('display.max_columns', 15)
pd.set_option('display.max_rows', 100)


#%% Data Sources
#CONFIRMED = 'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv'
#DEATHS = 'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_global.csv'
#RECOVERED = 'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_recovered_global.csv'

CONFIRMED = 'https://python3.info/_static/covid19-confirmed.csv'
DEATHS = 'https://python3.info/_static/covid19-deaths.csv'
RECOVERED = 'https://python3.info/_static/covid19-recovered.csv'


#%% Data Frames
confirmed = pd.read_csv(CONFIRMED)
deaths = pd.read_csv(DEATHS)
recovered = pd.read_csv(RECOVERED)


#%% Get Country from DataFrame
def covid19(country: str = None) -> pd.DataFrame:
    """
    Get Confirmed, Deaths, Recovered for given country

    >>> covid19('Poland').loc['2022-01-01']
    Confirmed    4120248
    Deaths         97559
    Recovered          0
    Name: 2022-01-01 00:00:00, dtype: int64

    >>> covid19('France').loc['2022-01-01']
    Confirmed    10296909
    Deaths         124839
    Recovered           0
    Name: 2022-01-01 00:00:00, dtype: int64

    >>> covid19().loc['2022-01-01']
    Confirmed    289931319
    Deaths         5473487
    Recovered            0
    Name: 2022-01-01 00:00:00, dtype: int64
    """
    def _get(data: pd.DataFrame, country: str = None) -> pd.Series:
        """
        Get Country from DataFrame

        >>> _get(confirmed, 'Poland').loc['2022-01-01']
        4120248
        >>> _get(deaths, 'Poland').loc['2022-01-01']
        97559
        >>> _get(recovered, 'Poland').loc['2022-01-01']
        0
        """
        if country is not None:
            data = data.query('`Country/Region` == @country')
        return (data
                .transpose()
                .iloc[4:]
                .sum(axis='columns')
                .astype('int64')
                .rename(pd.to_datetime, axis='index'))

    return pd.DataFrame({
        'Confirmed': _get(confirmed, country),
        'Deaths': _get(deaths, country),
        'Recovered': _get(recovered, country)})


#%% Calendars
class PLHolidayCalendar(AbstractHolidayCalendar):
    """
    Custom Holiday calendar for Poland based on
    https://en.wikipedia.org/wiki/Public_holidays_in_Poland
    """
    rules = [
        Holiday('New Years Day', month=1, day=1),
        Holiday('Epiphany', month=1, day=6),
        Holiday('Easter', month=1, day=1, offset=[Easter()]),
        EasterMonday,
        Holiday('May Day', month=5, day=1),
        Holiday('Constitution Day', month=5, day=3),
        Holiday('Pentecost Sunday', month=1, day=1, offset=[Easter(), Day(49)]),
        Holiday('Corpus Christi', month=1, day=1, offset=[Easter(), Day(60)]),
        Holiday('Assumption of the Blessed Virgin Mary', month=8, day=15),
        Holiday('All Saints Day', month=11, day=1),
        Holiday('Independence Day', month=11, day=11),
        Holiday('Christmas Day', month=12, day=25),
        Holiday('Second Day of Christmastide', month=12, day=26),
    ]


#%% Show trendline
def plot_trendline(data: pd.DataFrame) -> matplotlib.axes.Axes:
    return (data
            .loc[:, ['Confirmed','Deaths']]
            .plot(kind='line',
                  subplots=True,
                  layout=(2, 1),
                  figsize=(10, 10)))


#%% Show fatalities
def plot_fatalities(data: pd.DataFrame) -> matplotlib.axes.Axes:
    return ((data['Deaths'] / data['Confirmed'])
            .mul(100)  # convert to percent
            .round(2)
            .plot(kind='line',
                  title='Percent of deaths vs new cases in last two weeks',
                  xlabel='Day',
                  ylabel='Percent',
                  ylim=(0.0, 6.0),
                  figsize=(10, 10),
                  grid=True))


#%% Confirmed cases day-by-day difference
def plot_confirmed_daily(data: pd.DataFrame) -> matplotlib.axes.Axes:
    return data['Confirmed'].diff().plot()


#%% Covid infection waves
def plot_confirmed_waves(data:pd.DataFrame) -> matplotlib.axes.Axes:
    return data['Confirmed'].diff().resample('W').median().plot()


#%% Confirmed cases in last two weeks
def plot_confirmed_last(data: pd.DataFrame, freq='2W') -> matplotlib.axes.Axes:
    return data['Confirmed'].last(freq).diff().plot()


#%% Confirmed cases every month
def plot_confirmed_monthly(data: pd.DataFrame) -> matplotlib.axes.Axes:
    return data['Confirmed'].resample('M').sum().plot()


#%%
def plot_confirmed_after_holidays(
        data: pd.DataFrame,
        since: date | str | None = '2021-01-01',
        until: date | str | None = '2022-02-07',
        days: int = 14,
        calendar: AbstractHolidayCalendar = PLHolidayCalendar(),
    ) -> matplotlib.axes.Axes:
    """
    Confirmed cases in period of 14 days after holidays
    """
    def _get(since, days):
        return (data
                .loc[since:, 'Confirmed']
                .iloc[:days]
                .reset_index(drop=True))

    data = {column: _get(since=holiday, days=days)
            for holiday in calendar.holidays(since, until)
            if (column := holiday.strftime('%Y-%m-%d'))}

    return pd.DataFrame(data).diff().plot(
        kind='line',
        subplots=True,
        layout=(15,1),
        sharex=True,
        figsize=(5, 15),
        grid=True)


#%% Run
if __name__ == '__main__':
    poland = covid19('Poland')
    usa = covid19('US')
    france = covid19('France')
    china = covid19('China')
    world = covid19()


    data = poland.loc['2020-01-01':'2022-02-01']

    plot_trendline(data)
    # plt.show()

    plot_fatalities(data)
    # plt.show()

    plot_confirmed_daily(data)
    # plt.show()

    plot_confirmed_waves(data)
    # plt.show()

    plot_confirmed_last(data)
    # plt.show()

    plot_confirmed_monthly(data)
    # plt.show()

    plot_confirmed_after_holidays(data)
    # plt.show()



"""TODO:

# Resample
poland['Confirmed'].shift(periods=1, freq='D').plot(kind='line')


# Z rozróżnianiem na kwartały (Q)
plot = poland['Confirmed'].resample('Q').plot(kind='line', legend=True)
plot[0].name = '2020-Q1'
plot[1].name = '2020-Q2'
plot[2].name = '2020-Q3'
plot[3].name = '2020-Q4'
plot[4].name = '2021-Q1'
plot[5].name = '2021-Q2'
plot[6].name = '2021-Q3'
plot[7].name = '2021-Q4'
plot[8].name = '2022-Q1'
plt.show()


# Makrotrendy
poland['Confirmed'].diff().rolling(window=14).median().plot()
plt.show()

poland['Confirmed'].diff().rolling(window=14).median().plot()
plt.show()

poland['Confirmed'].diff().resample('W').median().plot()
plt.show()

poland['Confirmed'].diff().resample('M').median().plot()
plt.show()

poland['Confirmed'].diff().resample('Q').median().plot()
plt.show()
"""
../../_images/covid19-poland-confirmed-daily.png

Figure 7.2. Confirmed daily plot for COVID19 pandemy in Poland.

../../_images/covid19-poland-confirmed-holidays.png

Figure 7.3. Confirmed holidays plot for COVID19 pandemy in Poland.

../../_images/covid19-poland-confirmed-last.png

Figure 7.4. Confirmed last plot for COVID19 pandemy in Poland.

../../_images/covid19-poland-confirmed-monthly.png

Figure 7.5. Confirmed monthly plot for COVID19 pandemy in Poland.

../../_images/covid19-poland-confirmed-waves.png

Figure 7.6. Confirmed waves plot for COVID19 pandemy in Poland.

../../_images/covid19-poland-fatalities.png

Figure 7.7. Fatalities plot for COVID19 pandemy in Poland.

../../_images/covid19-poland-trendline.png

Figure 7.8. Trendline plot for COVID19 pandemy in Poland.

7.2.2. Case Study - 0x02

from datetime import date

import pandas as pd
from doctest import testmod as run_tests
from matplotlib import pyplot as plt
import pandas as pd
from pandas.tseries.holiday import AbstractHolidayCalendar, Holiday, EasterMonday, Easter
from pandas.tseries.offsets import Day

PROCENT = 1


#%%

# CONFIRMED = 'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv'
# RECOVERED = 'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_recovered_global.csv'
# DEATHS = 'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_global.csv'

CONFIRMED = 'https://python3.info/_static/covid19-confirmed.csv'
RECOVERED = 'https://python3.info/_static/covid19-recovered.csv'
DEATHS = 'https://python3.info/_static/covid19-deaths.csv'


confirmed = pd.read_csv(CONFIRMED).convert_dtypes()
recovered = pd.read_csv(RECOVERED).convert_dtypes()
deaths = pd.read_csv(DEATHS).convert_dtypes()


class PLHolidayCalendar(AbstractHolidayCalendar):
    """
    Custom Holiday calendar for Poland based on
    https://en.wikipedia.org/wiki/Public_holidays_in_Poland

    >>> PLHolidayCalendar().holidays(start='2000-01-01', end='2000-12-31')
    DatetimeIndex(['2000-01-01', '2000-01-06', '2000-04-23', '2000-04-24',
                   '2000-05-01', '2000-05-03', '2000-06-11', '2000-06-22',
                   '2000-08-15', '2000-11-01', '2000-11-11', '2000-12-25',
                   '2000-12-26'],
                  dtype='datetime64[ns]', freq=None)
    """
    rules = [
        Holiday('New Years Day', month=1, day=1),
        Holiday('Epiphany', month=1, day=6),
        Holiday('Easter', month=1, day=1, offset=[Easter()]),
        EasterMonday,
        Holiday('May Day', month=5, day=1),
        Holiday('Constitution Day', month=5, day=3),
        Holiday('Pentecost Sunday', month=1, day=1, offset=[Easter(), Day(49)]),
        Holiday('Corpus Christi', month=1, day=1, offset=[Easter(), Day(60)]),
        Holiday('Assumption of the Blessed Virgin Mary', month=8, day=15),
        Holiday('All Saints Day', month=11, day=1),
        Holiday('Independence Day', month=11, day=11),
        Holiday('Christmas Day', month=12, day=25),
        Holiday('Second Day of Christmastide', month=12, day=26),
    ]


#%%

def _parse(data, country, name):
    """
    >>> _parse(confirmed, 'Poland', name='confirmed').loc['2021-08-04']
    confirmed    2883448
    Name: 2021-08-04 00:00:00, dtype: int64

    >>> _parse(confirmed, 'Poland', name='confirmed').loc['2021-08-05']
    confirmed    2883624
    Name: 2021-08-05 00:00:00, dtype: int64

    >>> _parse(recovered, 'Poland', name='recovered').loc['2021-08-04']
    recovered    2653981
    Name: 2021-08-04 00:00:00, dtype: int64

    >>> _parse(recovered, 'Poland', name='recovered').loc['2021-08-05']
    recovered    0
    Name: 2021-08-05 00:00:00, dtype: int64

    >>> _parse(deaths, 'Poland', name='deaths').loc['2021-08-04']
    deaths    75269
    Name: 2021-08-04 00:00:00, dtype: int64

    >>> _parse(deaths, 'Poland', name='deaths').loc['2021-08-05']
    deaths    75275
    Name: 2021-08-05 00:00:00, dtype: int64
    """
    if country is not None:
        query = data['Country/Region'] == country
        data = data.loc[query]

    return (
        data
        .transpose()
        .iloc[4:]
        .sum(axis='columns')
        .astype('int')
        .to_frame()
        .rename(lambda x: name, axis='columns')
        .rename(pd.to_datetime, axis='index'))


#%%
def get(country=None):
    """
    >>> get('Poland').loc['2021-08-04']
    confirmed    2883448
    recovered    2653981
    deaths         75269
    Name: 2021-08-04 00:00:00, dtype: int64

    >>> get('Poland').loc['2021-08-05']
    confirmed    2883624
    recovered          0
    deaths         75275
    Name: 2021-08-05 00:00:00, dtype: int64

    >>> get('United Kingdom').loc['2021-08-04']
    confirmed    5980830
    recovered      24693
    deaths        157209
    Name: 2021-08-04 00:00:00, dtype: int64

    >>> get('United Kingdom').loc['2021-08-05']
    confirmed    6010860
    recovered          0
    deaths        157314
    Name: 2021-08-05 00:00:00, dtype: int64

    >>> get().loc['2021-08-04']
    confirmed    200758580
    recovered    130899061
    deaths         4283131
    Name: 2021-08-04 00:00:00, dtype: int64

    >>> get().loc['2021-08-05']
    confirmed    201444202
    recovered            0
    deaths         4294122
    Name: 2021-08-05 00:00:00, dtype: int64
    """
    return pd.concat((
       _parse(confirmed, country, name='confirmed'),
       _parse(recovered, country, name='recovered'),
       _parse(deaths, country, name='deaths'),
    ), axis='columns')


#%%

poland = get('Poland')
germany = get('Germany')
india = get('India')

uk = get('United Kingdom')
france = get('France')
china = get('China')

world = get()


#%%

def liczba_potwierdzonych_oraz_smierci_w_tygodniowych_okresach():
    return world.loc[:, ['confirmed', 'deaths']].resample('W').sum()

def liczba_zachorowan_na_jeden_przypadek_smiertelny():
    return world['confirmed'] / world['deaths']

def procent_smiertelnosci():
    return world['deaths'] / world['confirmed'] * 100*PROCENT

def get_holidays(year: int, calendar: AbstractHolidayCalendar) -> pd.DatetimeIndex:
    return calendar.holidays(start=date(year, 1, 1), end=date(year, 12, 31))

def liczba_zachorowan_po_swietach(year, calendar=PLHolidayCalendar()):
    """
    >>> data = liczba_zachorowan_po_swietach(year=2022)
    >>> plot = data.plot(
    ...    kind='line',
    ...    subplots=True,
    ...    sharey=True,
    ...    sharex=True,
    ...    grid=True,
    ...    figsize=(10,20))
    >>> # plt.show()
    """
    today = pd.Timestamp('today')
    holidays = get_holidays(year, calendar)
    holidays_until_today = holidays[holidays < today]

    def days_after_holiday(holiday, days=10):
        return (poland
                .loc[holiday:, 'confirmed']
                .iloc[:days]
                .diff()
                .reset_index(drop=True)
                .iloc[1:]
                .astype('int'))

    return pd.DataFrame({
        column_name: days_after_holiday(swieto)
        for i, swieto in enumerate(holidays_until_today)
        if (column_name := format(swieto, '%Y-%m-%d'))
    })

7.2.3. Case Study - 0x03

from doctest import testmod as run_tests
import pandas as pd
from matplotlib import pyplot as plt

pd.set_option('display.width', 200)
pd.set_option('display.max_columns', 8)
pd.set_option('display.max_rows', 100)
pd.set_option('display.min_rows', 100)
pd.set_option('display.max_seq_items', 100)

#%%

# CONFIRMED = 'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv'
# RECOVERED = 'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_recovered_global.csv'
# DEATHS = 'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_global.csv'

CONFIRMED = 'https://python3.info/_static/covid19-confirmed.csv'
RECOVERED = 'https://python3.info/_static/covid19-recovered.csv'
DEATHS = 'https://python3.info/_static/covid19-deaths.csv'

COLUMNS = {
    'Province/State': 'region',
    'Country/Region': 'country',
}

confirmed = pd.read_csv(CONFIRMED).rename(columns=COLUMNS)
deaths = pd.read_csv(DEATHS).rename(columns=COLUMNS)
recovered = pd.read_csv(RECOVERED).rename(columns=COLUMNS)

#%%
def _get(df: pd.DataFrame, country: str, name: str) -> pd.Series:
    """
    >>> _get(confirmed, 'Poland', 'confirmed').loc['2021-01-01']
    1305774
    >>> _get(deaths, 'Poland', 'deaths').loc['2021-01-01']
    28956
    >>> _get(recovered, 'Poland', 'recovered').loc['2021-01-01']
    1046281
    """
    if country is not None:
        df = df.query('country == @country')
    return (df
        .transpose()
        .iloc[4:]
        .sum(axis='columns')
        .rename(name)
        .rename(index=pd.to_datetime)
        .astype('int64')
        .convert_dtypes())

def covid19(country: str = None) -> pd.DataFrame:
    """
    >>> covid19('Poland').loc['2021-01-01']
    confirmed    1305774
    deaths         28956
    recovered    1046281
    Name: 2021-01-01 00:00:00, dtype: Int64

    >>> covid19('US').loc['2021-01-01']
    confirmed    20397400
    deaths         352804
    recovered           0
    Name: 2021-01-01 00:00:00, dtype: Int64

    >>> covid19('China').loc['2021-01-01']
    confirmed    102649
    deaths         4884
    recovered     90031
    Name: 2021-01-01 00:00:00, dtype: Int64
    """
    return pd.concat((
        _get(confirmed, country, name='confirmed'),
        _get(deaths, country, name='deaths'),
        _get(recovered, country, name='recovered')
    ), axis='columns')

run_tests()

#%%
pl = covid19('Poland')
us = covid19('US')
india = covid19('India')
china = covid19('China')
france = covid19('France')
world = covid19()


#%%

data = pl['confirmed']
plot_confirmed_total = data.plot(
    kind='line',
    label='Confirmed',
    title='Total confirmed cases in Poland',
    xlabel='Date',
    ylabel='Total confirmed cases',)

plt.tight_layout()
# plt.show()
#%%

data = pl['confirmed'].diff()
plot_confirmed_daily = data.plot(
    kind='line',
    label='Confirmed',
    title='Daily confirmed cases in Poland',
    xlabel='Date',
    ylabel='Daily confirmed cases',)

plt.tight_layout()
# plt.show()

# %%

def mortality(df: pd.DataFrame, since='2020-04-01', until=None) -> pd.Series:
    return (df.deaths / df.confirmed).loc[slice(since,until)].mul(100).dropna()

data = mortality(pl)
plot_mortality = data.plot(
    kind='line',
    title='Mortality in Poland',
    ylabel='mortality [%]',
    label='Mortality',
    xlabel='date')
plt.hlines(data.mean(), xmin=data.index.min(), xmax=data.index.max(), color='red', label='Mean')
plt.legend()
plt.tight_layout()
# plt.show()
../../_images/covid19-c-poland-confirmed-total.png
../../_images/covid19-c-poland-confirmed-daily.png
../../_images/covid19-c-poland-mortality.png