import wrds
import pandas as pd
import numpy as np
from pandas.tseries.offsets import *
from scipy import stats
import datetime as dt
import matplotlib.pyplot as plt
###################
# Connect to WRDS #
###################
conn=wrds.Connection()
###################
# CRSP Block #
###################
# added exchcd=-2,-1,0 to address the issue that stocks temp stopped trading
# without exchcd=-2,-1, 0 the non-trading months will be tossed out in the output
# Code Definition
# -2 Halted by the NYSE or AMEX
# -1 Suspended by the NYSE, AMEX, or NASDAQ
# 0 Not Trading on NYSE, AMEX, or NASDAQ
# 1 New York Stock Exchange
# 2 American Stock Exchange
crsp_m = conn.raw_sql("""
select a.permno, a.date, a.ret,
b.shrcd, b.exchcd
from crspm.msf as a
left join crspm.msenames as b
on a.permno=b.permno
and b.namedt<=a.date
and a.date<=b.nameendt
where a.date between '01/01/1963' and '12/31/1989'
and b.exchcd between -2 and 2
and b.shrcd between 10 and 11
""", date_cols=['date'])
# Change variable format to int
crsp_m[['permno','shrcd','exchcd']]=\
crsp_m[['permno','shrcd','exchcd']].astype(int)
# fill in missing return with 0
crsp_m['ret'] = crsp_m['ret'].fillna(0)
# create log return for future usage
crsp_m['logret'] = np.log(1+crsp_m['ret'])
crsp_m.tail()
######################################################
# Create Momentum Portfolio #
# Measures Based on Past (J) Month Compounded Returns #
#######################################################
J = 6 # Formation Period Length: J can be between 3 to 12 months
_tmp_crsp = crsp_m[['permno','date','ret','logret']].sort_values(['permno','date']).set_index('date')
# Calculate Rolling Cumulative Return in the Formation Period
# By summing log returns over the J month formation period
umd = _tmp_crsp.groupby(['permno'])['logret'].rolling(J, min_periods=J).sum().reset_index()
umd = umd.rename(columns={'logret':'sumlogret'})
# Then exp the sum log return to get compound return (not necessary)
umd['cumret']=np.exp(umd['sumlogret'])-1
umd.tail()
########################################
# Formation of 10 Momentum Portfolios #
########################################
# For each date: assign ranking 1-10 based on cumret
# 1=lowest 10=highest cumret
umd=umd.dropna(axis=0, subset=['cumret'])
umd['momr']=umd.groupby('date')['cumret'].transform(lambda x: pd.qcut(x, 10, labels=False))
# shift momr from 0-9 to 1-10
umd.momr=1+umd.momr.astype(int)
umd.tail()
# Examine distribution of cumret by momr
umd.groupby('momr')['cumret'].mean()
# First lineup date to month end date medate
# Then calculate hdate1 and hdate2 using medate
K = 6 # Holding Period Length: K can be between 3 to 12 months
umd['form_date'] = umd['date']
umd['medate'] = umd['date']+MonthEnd(0)
umd['hdate1']=umd['medate']+MonthBegin(1)
umd['hdate2']=umd['medate']+MonthEnd(K)
umd = umd[['permno', 'form_date','momr','hdate1','hdate2']]
umd.tail()
# join rank and return data together
# note: this step mimicks the following proc sql statement from SAS code and takes a while to run
'''
proc sql;
create table umd2
as select distinct a.momr, a.form_date, a.permno, b.date, b.ret
from umd as a, crsp_m as b
where a.permno=b.permno
and a.HDATE1<=b.date<=a.HDATE2;
quit;
'''
port = pd.merge(crsp_m[['permno','date','ret']], umd, on=['permno'], how='inner')
port = port[(port['hdate1']<=port['date']) & (port['date']<=port['hdate2'])]
# Rearrange the columns;
port = port[['permno','form_date', 'momr', 'hdate1','hdate2', 'date', 'ret']]
Let's inspect the output:
# Example using
port.loc[(port.permno==93172) & (port.form_date == '03/31/1986')]
umd_port = port.groupby(['date','momr', 'form_date'])['ret'].mean().reset_index()
# Skip first two years of the sample
start_yr = umd_port.date.dt.year.min()+2
umd_port = umd_port.loc[umd_port.date.dt.year>=start_yr]
umd_port = umd_port.sort_values(by=['date','momr'])
umd_port.head(6)
Then for a given month and momentum rank, average again across all formation dates
# Create one return series per MOM group every month
ewret = umd_port.groupby(['date','momr'])['ret'].mean().reset_index()
ewstd = umd_port.groupby(['date','momr'])['ret'].std().reset_index()
ewret = ewret.rename(columns={'ret':'ewret'})
ewstd = ewstd.rename(columns={'ret':'ewretstd'})
ewretdf = pd.merge(ewret, ewstd, on=['date','momr'], how='inner')
ewretdf = ewretdf.sort_values(by=['momr', 'date'])
ewretdf.head()
# portfolio summary
ewretdf.groupby(['momr'])['ewret'].describe()[['count','mean', 'std']].reset_index()
#################################
# Long-Short Portfolio Returns #
#################################
# Transpose portfolio layout to have columns as portfolio returns
ewret_t = ewretdf.pivot(index='date', columns='momr', values='ewret')
# Add prefix port in front of each column
ewret_t = ewret_t.add_prefix('port')
ewret_t = ewret_t.rename(columns={'port1':'losers', 'port10':'winners'})
ewret_t['long_short'] = ewret_t.winners - ewret_t.losers
ewret_t.head()
# Compute Long-Short Portfolio Cumulative Returns
ewret_t['cumret_winners'] = (1+ewret_t.winners).cumprod()-1
ewret_t['cumret_losers'] = (1+ewret_t.losers).cumprod()-1
ewret_t['cumret_long_short']= (1+ewret_t.long_short).cumprod()-1
#################################
# Portfolio Summary Statistics #
#################################
# Mean
mom_mean = ewret_t[['winners', 'losers', 'long_short']].mean().to_frame()
mom_mean = mom_mean.rename(columns={0:'mean'}).reset_index()
mom_mean
# T-Value and P-Value
t_losers = pd.Series(stats.ttest_1samp(ewret_t['losers'],0.0)).to_frame().T
t_winners = pd.Series(stats.ttest_1samp(ewret_t['winners'],0.0)).to_frame().T
t_long_short = pd.Series(stats.ttest_1samp(ewret_t['long_short'],0.0)).to_frame().T
t_losers['momr']='losers'
t_winners['momr']='winners'
t_long_short['momr']='long_short'
t_output =pd.concat([t_winners, t_losers, t_long_short])\
.rename(columns={0:'t-stat', 1:'p-value'})
# Combine mean, t and p and format output
mom_output = pd.merge(mom_mean, t_output, on=['momr'], how='inner')
mom_output['mean'] = mom_output['mean'].map('{:.2%}'.format)
mom_output['t-stat'] = mom_output['t-stat'].map('{:.2f}'.format)
mom_output['p-value'] = mom_output['p-value'].map('{:.3f}'.format)
print('Momentum Strategy Summary:\n\n', mom_output)
plt.figure(figsize=(12,9))
plt.suptitle('Momentum Strategy', fontsize=20)
ax1 = plt.subplot(211)
ax1.set_title('Long/Short Momentum Strategy', fontsize=15)
ax1.set_xlim([dt.datetime(1965,1,1), dt.datetime(1989,12,31)])
ax1.plot(ewret_t['cumret_long_short'])
ax2 = plt.subplot(212)
ax2.set_title('Cumulative Momentum Portfolios', fontsize=15)
ax2.plot(ewret_t['cumret_winners'], 'b-', ewret_t['cumret_losers'], 'r--')
ax2.set_xlim([dt.datetime(1965,1,1), dt.datetime(1989,12,31)])
ax2.legend(('Winners','Losers'), loc='upper left', shadow=True)
plt.subplots_adjust(top=0.92, hspace=0.2)
Thank you!