# Macro economic indicators (mostly US) from the FRED database
# Detailed info on each indicator check on web: https://fred.stlouisfed.org/series/<indicator_name>
# DOC with the metrics and external exploratory Colab: https://docs.google.com/document/d/1Cf4C3Xz4_yitlzPaLEknHoDlw7KMXey4c49kZ7ucQEE/edit?usp=sharing
FRED_INDICATORS = ['GDP', 'GDPC1', 'GDPPOT', 'NYGDPMKTPCDWLD', # 1. Growth
'CPIAUCSL', 'CPILFESL', 'GDPDEF', # 2. Prices and Inflation
'M1SL', 'WM1NS', 'WM2NS', 'M1V', 'M2V', 'WALCL', # 3. Money Supply
'DFF', 'DTB3', 'DGS5', 'DGS10', 'DGS30', 'T5YIE', # 4. Interest Rates
'T10YIE', 'T5YIFR', 'TEDRATE', 'DPRIME', # 4. Interest Rates
'UNRATE', 'NROU', 'CIVPART', 'EMRATIO', # 5. Employment
'UNEMPLOY', 'PAYEMS', 'MANEMP', 'ICSA', 'IC4WSA', # 5. Employment
'CDSP', 'MDSP', 'FODSP', 'DSPIC96', 'PCE', 'PCEDG', # 6. Income and Expenditure
'PSAVERT', 'DSPI', 'RSXFS', # 6. Income and Expenditure
'INDPRO', 'TCU', 'HOUST', 'GPDI', 'CP', 'STLFSI2', # 7. Other indicators
'DCOILWTICO', 'DTWEXAFEGS', 'DTWEXBGS', # 7. Other indicators
'GFDEBTN', 'GFDEGDQ188S', # 8. Gov-t debt
# 9. Additional indicators from IVAN
'DEXUSEU', 'GVZCLS', 'VIXCLS', 'DIVIDEND',
# 9. Additional indicators from IVAN
'MORTGAGE30US', 'SPCS20RSA'
]
# Macro Indicators from QUANDL
QUANDL_INDICATORS = {'BCHAIN/MKPRU', 'USTREASURY/YIELD', 'USTREASURY/REALYIELD', # 9. Additional indicators from IVAN
# 9. Additional indicators from IVAN
'MULTPL/SHILLER_PE_RATIO_MONTH', 'LBMA/GOLD'
}
# Stock maret indexes
# All indexes: https://stooq.com/t/
STOOQ_INDICATORS = {'^DJI','^SPX'}
for i,value in enumerate(macro_indicators.keys()):
if i%6==0:
print('\n')
print(value, end =", ")
# OUTPUT:
# GDP, GDPC1, GDPPOT, NYGDPMKTPCDWLD, CPIAUCSL, CPILFESL,
# GDPDEF, M1SL, WM1NS, WM2NS, M1V, M2V,
# WALCL, DFF, DTB3, DGS5, DGS10, DGS30,
# T5YIE, T10YIE, T5YIFR, TEDRATE, DPRIME, UNRATE,
# NROU, CIVPART, EMRATIO, UNEMPLOY, PAYEMS, MANEMP,
# ICSA, IC4WSA, CDSP, MDSP, FODSP, DSPIC96,
# PCE, PCEDG, PSAVERT, DSPI, RSXFS, INDPRO,
# TCU, HOUST, GPDI, CP, STLFSI2, DCOILWTICO,
# DTWEXAFEGS, DTWEXBGS, GFDEBTN, GFDEGDQ188S, DEXUSEU, GVZCLS,
# VIXCLS, DIVIDEND, MORTGAGE30US, SPCS20RSA, BCHAIN_MKPRU, USTREASURY_YIELD,
# MULTPL_SHILLER_PE_RATIO_MONTH, USTREASURY_REALYIELD, LBMA_GOLD, SPX, DJI,
i=1
for value in macro_df.keys():
if not ('future' in value):
print(value, end =", ")
if i%8==0:
print('\n')
i+=1
# OUTPUT:
# WM1NS_wow, WM1NS_mom, WM2NS_wow, WM2NS_mom, WALCL_wow, WALCL_mom, DFF, DTB3,
# DGS5, DGS10, DGS30, T5YIE, T10YIE, T5YIFR, TEDRATE, DPRIME,
# ICSA_wow, ICSA_mom, IC4WSA_wow, IC4WSA_mom, STLFSI2, STLFSI2_wow, STLFSI2_mom, DCOILWTICO,
# DCOILWTICO_growth_1d, DCOILWTICO_growth_3d, DCOILWTICO_growth_7d,
# DCOILWTICO_growth_30d, DCOILWTICO_growth_90d, DCOILWTICO_growth_365d, DTWEXAFEGS, DTWEXBGS,
# DEXUSEU, GVZCLS, VIXCLS, MORTGAGE30US, MORTGAGE30US_wow, MORTGAGE30US_mom, BCHAIN_MKPRU, BCHAIN_MKPRU_growth_1d,
# BCHAIN_MKPRU_growth_3d, BCHAIN_MKPRU_growth_7d, BCHAIN_MKPRU_growth_30d, BCHAIN_MKPRU_growth_90d, BCHAIN_MKPRU_growth_365d, LBMA_GOLD, LBMA_GOLD_growth_1d, LBMA_GOLD_growth_3d,
# LBMA_GOLD_growth_7d, LBMA_GOLD_growth_30d, LBMA_GOLD_growth_90d, LBMA_GOLD_growth_365d, SPX, SPX_growth_1d, SPX_growth_3d, SPX_growth_7d,
# SPX_growth_30d, SPX_growth_90d, SPX_growth_365d, DJI, DJI_growth_1d, DJI_growth_3d, DJI_growth_7d, DJI_growth_30d,
# DJI_growth_90d, DJI_growth_365d, GDP_qoq, GDP_yoy, GDPC1_qoq, GDPC1_yoy, GDPPOT_qoq, GDPPOT_yoy,
# NYGDPMKTPCDWLD_yoy, CPIAUCSL_mom, CPIAUCSL_yoy, CPILFESL_mom, CPILFESL_yoy, GDPDEF, GDPDEF_qoq, GDPDEF_yoy,
# M1SL_mom, M1SL_yoy, M1V, M1V_qoq, M1V_yoy, M2V, M2V_qoq, M2V_yoy,
# UNRATE, UNRATE_mom, UNRATE_yoy, NROU, NROU_qoq, NROU_yoy, CIVPART, CIVPART_mom,
# CIVPART_yoy, EMRATIO, EMRATIO_mom, EMRATIO_yoy, UNEMPLOY_mom, UNEMPLOY_yoy, PAYEMS_mom, PAYEMS_yoy,
# MANEMP_mom, MANEMP_yoy, CDSP, CDSP_qoq, CDSP_yoy, MDSP, MDSP_qoq, MDSP_yoy,
# FODSP, FODSP_qoq, FODSP_yoy, DSPIC96_mom, DSPIC96_yoy, PCE_mom, PCE_yoy, PCEDG_mom,
# PCEDG_yoy, PSAVERT, PSAVERT_mom, PSAVERT_yoy, DSPI_mom, DSPI_yoy, RSXFS_mom, RSXFS_yoy,
# INDPRO, INDPRO_mom, INDPRO_yoy, TCU, TCU_mom, TCU_yoy, HOUST_mom, HOUST_yoy,
# GPDI_qoq, GPDI_yoy, div_ratio, CP_qoq, CP_yoy, GFDEBTN_qoq, GFDEBTN_yoy, GFDEGDQ188S,
# GFDEGDQ188S_qoq, GFDEGDQ188S_yoy, DIVIDEND_qoq, DIVIDEND_yoy, SPCS20RSA, SPCS20RSA_mom, SPCS20RSA_yoy, MULTPL_SHILLER_PE_RATIO_MONTH,
# MULTPL_SHILLER_PE_RATIO_MONTH_mom, MULTPL_SHILLER_PE_RATIO_MONTH_yoy,
# Future growth indicators are mostly correlated with each other
future_ind = []
for ind in macro_df.keys():
if 'future' in ind:
future_ind.append(ind)
print(future_ind)
# OUTPUT:
# ['SPX_future_growth_1d', 'SPX_future_growth_3d', 'SPX_future_growth_7d', 'SPX_future_growth_30d', 'SPX_future_growth_90d', 'SPX_future_growth_365d', 'DJI_future_growth_1d', 'DJI_future_growth_3d', 'DJI_future_growth_7d', 'DJI_future_growth_30d', 'DJI_future_growth_90d', 'DJI_future_growth_365d']
# include all features
macro_df_no_future_ind = macro_df.keys()
# do not use future_ind in the list to find correlations with the label (which is a future_indicator)
macro_df_no_future_ind = macro_df_no_future_ind.drop(future_ind)
# imports
from collections import OrderedDict
from sklearn.tree import DecisionTreeRegressor
from matplotlib import pyplot
# all features should be numeric
for key in macro_df.keys():
macro_df[key] = macro_df[key].astype(float)
# include all features
X_keys = macro_df.keys()
# do not use future ind to predict
X_keys = X_keys.drop(future_ind)
# deep copy of the dataframe not to change the original df
macro_copy = macro_df.copy(deep=True)
macro_copy.fillna(0,inplace=True)
# macro_copy.dropna(inplace=True)
#get all features in X and dependent variable in y
X = macro_copy[X_keys]
y = macro_copy['SPX_future_growth_90d']
# define a function that returns an ordered dictionary of features, sorted by importance
def get_importance_features(model):
importance = model.feature_importances_
feat_imp = OrderedDict()
# summarize feature importance
for i,v in enumerate(importance):
feat_imp[X.keys()[i]] = importance[i]
# https://stackoverflow.com/questions/613183/how-do-i-sort-a-dictionary-by-value
sorted_feat_imp = sorted(feat_imp.items(), key=lambda kv: kv[1])
return sorted_feat_imp
# init the class and fit the model
decision_tree_model = DecisionTreeRegressor()
decision_tree_model.fit(X, y)
decision_feat_imp = get_importance_features(decision_tree_model)