Source code for perfana.datasets.base

import numpy as np
import pandas as pd

from ._file_handler import filepath

__all__ = ["load_cube", "load_etf", "load_hist", "load_smi"]


[docs]def load_cube(*, download=False) -> np.ndarray:
    """
    Loads a sample Monte Carlo simulation of 9 asset classes.

    The dimension of the cube is 80 * 1000 * 9. The first axis represents the time, the second
    represents the number of trials (simulations) and the third represents each asset class.

    Parameters
    ----------
    download: bool
        If True, forces the data to be downloaded again from the repository. Otherwise, loads the data from the
        stash folder

    Returns
    -------
    ndarray
        A data cube of simulated returns
    """
    return np.load(filepath('cube.npy', download))


[docs]def load_etf(*, date_as_index: bool = True, download=False) -> pd.DataFrame:
    """
    Dataset contains prices of 4 ETF ranging from 2001-06-15 to 2019-03-01.

    Parameters
    ----------
    date_as_index:
        If True, sets the first column as the index of the DataFrame

    download: bool
        If True, forces the data to be downloaded again from the repository. Otherwise, loads the data from the
        stash folder

    Returns
    -------
    DataFrame
        A data frame containing the prices of 4 ETF
    """
    fp = filepath('etf.csv', download)

    if date_as_index:
        df = pd.read_csv(fp, index_col=0, parse_dates=[0])
        df.index.name = df.index.name.strip()
    else:
        df = pd.read_csv(fp, parse_dates=[0])

    df.columns = df.columns.str.strip()
    for c in 'VBK', 'BND':
        df[c] = pd.to_numeric(df[c].str.strip())

    return df


def load_hist(*, date_as_index: bool = True, download=False) -> pd.DataFrame:
    """
    Dataset containing 20-years returns data from different asset classes spanning from 1988 to 2019.

    Parameters
    ----------
    date_as_index:
        If True, sets the first column as the index of the DataFrame

    download: bool
        If True, forces the data to be downloaded again from the repository. Otherwise, loads the data from the
        stash folder

    Returns
    -------
    DataFrame
        A data frame containing the prices of 4 ETF
    """
    fp = filepath('hist.csv', download)

    if date_as_index:
        df = pd.read_csv(fp, index_col=0, parse_dates=[0])
        df.index.name = df.index.name.strip()
    else:
        df = pd.read_csv(fp, parse_dates=[0])

    df.columns = df.columns.str.strip()
    return df


[docs]def load_smi(*, as_returns=False, download=False) -> pd.DataFrame:
    """
    Dataset contains the close prices of all 20 constituents of the Swiss Market Index (SMI) from
    2011-09-09 to 2012-03-28.

    Parameters
    ----------
    as_returns: bool
        If true, transforms the price data to returns data

    download: bool
        If True, forces the data to be downloaded again from the repository. Otherwise, loads the data from the
        stash folder

    Returns
    -------
    DataFrame
        A data frame of the closing prices of all 20 constituents of the Swiss Market Index
    """

    df = pd.read_csv(filepath('smi.csv', download), index_col=0, parse_dates=[0])
    if as_returns:
        df = df.pct_change().dropna()
    return df