Source code for perfana.datasets.base

import numpy as np
import pandas as pd

from ._file_handler import filepath

__all__ = ["load_cube", "load_etf", "load_hist", "load_smi"]


[docs]def load_cube(*, download=False) -> np.ndarray: """ Loads a sample Monte Carlo simulation of 9 asset classes. The dimension of the cube is 80 * 1000 * 9. The first axis represents the time, the second represents the number of trials (simulations) and the third represents each asset class. Parameters ---------- download: bool If True, forces the data to be downloaded again from the repository. Otherwise, loads the data from the stash folder Returns ------- ndarray A data cube of simulated returns """ return np.load(filepath('cube.npy', download))
[docs]def load_etf(*, date_as_index: bool = True, download=False) -> pd.DataFrame: """ Dataset contains prices of 4 ETF ranging from 2001-06-15 to 2019-03-01. Parameters ---------- date_as_index: If True, sets the first column as the index of the DataFrame download: bool If True, forces the data to be downloaded again from the repository. Otherwise, loads the data from the stash folder Returns ------- DataFrame A data frame containing the prices of 4 ETF """ fp = filepath('etf.csv', download) if date_as_index: df = pd.read_csv(fp, index_col=0, parse_dates=[0]) df.index.name = df.index.name.strip() else: df = pd.read_csv(fp, parse_dates=[0]) df.columns = df.columns.str.strip() for c in 'VBK', 'BND': df[c] = pd.to_numeric(df[c].str.strip()) return df
def load_hist(*, date_as_index: bool = True, download=False) -> pd.DataFrame: """ Dataset containing 20-years returns data from different asset classes spanning from 1988 to 2019. Parameters ---------- date_as_index: If True, sets the first column as the index of the DataFrame download: bool If True, forces the data to be downloaded again from the repository. Otherwise, loads the data from the stash folder Returns ------- DataFrame A data frame containing the prices of 4 ETF """ fp = filepath('hist.csv', download) if date_as_index: df = pd.read_csv(fp, index_col=0, parse_dates=[0]) df.index.name = df.index.name.strip() else: df = pd.read_csv(fp, parse_dates=[0]) df.columns = df.columns.str.strip() return df
[docs]def load_smi(*, as_returns=False, download=False) -> pd.DataFrame: """ Dataset contains the close prices of all 20 constituents of the Swiss Market Index (SMI) from 2011-09-09 to 2012-03-28. Parameters ---------- as_returns: bool If true, transforms the price data to returns data download: bool If True, forces the data to be downloaded again from the repository. Otherwise, loads the data from the stash folder Returns ------- DataFrame A data frame of the closing prices of all 20 constituents of the Swiss Market Index """ df = pd.read_csv(filepath('smi.csv', download), index_col=0, parse_dates=[0]) if as_returns: df = df.pct_change().dropna() return df