Fund performance against index
Reading Warren Buffet I wanted to test if polish equity funds are performing as bad as those in 60's and 70's, when he was complaining about them.
Fund pricing data I got from inwestinfo.pl website and indicies prices from stooq.pl.
!pip install bs4 pandas fastcore seaborn
import datetime
import pandas as pd
import numpy as np
import requests
from bs4 import BeautifulSoup
import urllib3
urllib3.disable_warnings() # disable ssl verification warnings
from fastcore.parallel import parallel
from functools import reduce
import seaborn as sns
start_date = datetime.datetime.strptime('1995-09-01', '%Y-%m-%d').date()
def dates_since(start_date):
d = start_date
while d < datetime.date.today():
yield d
d += datetime.timedelta(days=1)
def get_fund_prices_for_day(day):
print(".", end="")
# stock funds from date d
url = f'https://www.inwestinfo.pl/fundusze/fundusze-inwestycyjne/wszystkie/?funduszeKategorie%5B%5D=10&archiwum=1&dataArchiwum={day}'
res = requests.get(url, verify=False) # do not verify the certificate
quotes = []
soup = BeautifulSoup(res.content, 'html.parser')
try:
table = soup.find(class_='table-data')
table = table.find('tbody')
for row in table.find_all('tr'):
cells = row.find_all('td')
name = cells[2].get_text()
date = cells[3].get_text()
price = cells[4].get_text()
quotes += [(name, date, price)]
except: # if parsing doesn't go as planned (for example no quotes for that day)
pass
return quotes
print("Getting fund pricing data ...", end="")
quotes = parallel( # run in parallel to speed it up
get_fund_prices_for_day,
dates_since(start_date),
n_workers=16,
progress=False
)
quotes = reduce(lambda q1, q2: q1 + q2, quotes) # drop unnecessary dimension
df = pd.DataFrame.from_records(quotes, columns=['name', 'date', 'price'])
df['date'] = pd.to_datetime(df['date'])
df['price'] = df.price.astype(np.double)
print("Done")
indexes = ['wig', 'wig20']
print(f"Getting pricing data for {', '.join(indexes)} ... ", end="")
for index in indexes:
df_index = pd.read_csv(f'https://stooq.pl/q/d/l/?s={index}&i=y', parse_dates=['Data']) # get raw data as csv
df_index['name'] = index.upper()
df_index = df_index.rename(columns={'Data':'date', 'Zamkniecie': 'price'})[['name', 'date', 'price']]
df = df.append(df_index) # append index data to fund pricing data
print('Done')
df['year'] = df.date.apply(lambda d: d.year)
df_fund_year_prices = df.groupby(['name', 'year'])\
.apply(lambda gr: gr[gr['date'] == gr['date'].max()]['price'])\
.reset_index()[['name', 'year', 'price']]\
.drop_duplicates()
# get previous price
df_fund_year_prices['price_prev'] = df_fund_year_prices.price.shift(1)
df_fund_year_prices['price_change_pct'] = (df_fund_year_prices['price'] - df_fund_year_prices['price_prev']) / df_fund_year_prices['price_prev'] * 100
years_on_market_threshold = 20 # limit to funds that are around long enough
df_long_time_on_market = df_fund_year_prices.groupby('name').aggregate({'year':'count'}).rename(columns={'year':'years_on_market'})
df_long_time_on_market = df_long_time_on_market[df_long_time_on_market.years_on_market > years_on_market_threshold]
long_time_on_market_names = df_long_time_on_market.reset_index().name.values
cmap = sns.diverging_palette(h_neg=10, h_pos=134, n=5, center='light', as_cmap=True)
df_fund_year_prices_old = df_fund_year_prices[
df_fund_year_prices.name.isin(long_time_on_market_names) &
(df_fund_year_prices.price_change_pct < 100) & # remove some outliers - fund conversions
(df_fund_year_prices.price_change_pct > -70) & # remove some outliers
(df_fund_year_prices.year < datetime.date.today().year) # consider only full years
]
df_fund_year_prices_old.pivot(index='year', columns='name', values='price_change_pct')\
.style.background_gradient(cmap=cmap, axis=1)\
.highlight_null('white')
plt = df_fund_year_prices_old.pivot(index='year', columns='name', values='price_change_pct')\
.plot(figsize=(15,10), ylabel="% difference from last year")
TODO:
- compounded results and comparison with indexes
- test if compounded results are simillar with real fund price at the end