import wrds
import saspy
from IPython.display import HTML
import pandas as pd
import datetime
# Start a SAS session and check configuration information
sas = saspy.SASsession(cfgname='default') # Available configurations: 'default', 'sasu8'
sas
# Set a library reference to home directory
home = sas.saslib(libref='home', path='~')
scratch_dir = '/scratch/' + 'your-instituion-nickname' # Please see: https://wrds-www.wharton.upenn.edu/pages/support/the-wrds-cloud/managing-data/storing-your-data-wrds-cloud/
scrt = sas.saslib(libref='scrt', path=scratch_dir)
# Check assinged libraries
assigned_librefs = sas.assigned_librefs() # Check all assigned libraries
print(assigned_librefs)
# Access SAS Datasets
## Set dataset options (SAS datastep options)
### Available options: where (str), keep (str or list), drop (str or list), firstobs (int or str), obs (int or str), format (dict)
options = {'where' : 'fyear = 2018 and indfmt="INDL" and datafmt="STD" and popsrc="D" and consol="C"',
'keep' : 'gvkey fyear fyr datadate indfmt datafmt popsrc consol at lt ni',
'format' : {'datadate':'YYMMDDN8.'}
}
sd = sas.sasdata(table='funda', libref='compa', dsopts=options)
# Summarize
sd.describe()
# First 5 obs
sd.head()
# Last 5 obs
sd.tail()
# Histogram
sd.hist(var='ni', title = 'Net Income histogram (Fiscal year 2018)', label = 'NI')
# To see the last SAS log
print(sas.lastlog())
# Sort
sd.sort(by = 'gvkey fyear', out='funda_copy') # This code raises an error!
By sd.sort, the syntax below runs in the SAS session.
which is invalide sort procedure syntax!
This is because SASPy's _dsopts() method generates dataset options that cannot be used properly in proc sort as shown below.
sd._dsopts()
Thus, use sort method for a SASdata object without dsopts to avoid the error or use submit method instead. Please see section 4 below for 'submit' method.
# Sort
new_sd = sas.sasdata(table='funda', libref='compa')
new_sd.sort(by = 'gvkey fyear', out = 'funda_copy') # This code does not raise any error!
print(sas.lastlog())
df_from_sd = sd.to_df()
df_from_sd.head()
type(df_from_sd), type(sd)
df_from_sas_ds = sas.sd2df(table = 'idx_index', libref = 'compa', method = 'memory') # method: [memory, CSV, DISK], DISK is recommended for large datasets
df_from_sas_ds.head()
db = wrds.Connection()
wrds_table = db.get_table(library = 'crsp', table = 'msi')
# Convert a pandas df to a SAS dataset
wrds_table_sd = sas.df2sd(df = wrds_table, table = 'wrds_table', libref='work')
# Check if 'wrds_table' is correctly created
print(sas.lastlog())
wrds_table_sd.head()
# Submit the job (Add code that gives both log and output)
sas_code = '''%let i=1; %let j=%eval(&i*100+3);
proc odstext;
title "SAS Output";
p "&j is calculated by the formula (i*100 + 3) where i is &i";
run;
'''
results = sas.submit(sas_code)
print(results['LOG'])
HTML(results['LST'])
# Submit and print LOG
sas.submitLOG(sas_code)
# Submit and show OUTPUT
sas_code = '''proc means data=compa.funda; var at lt; where fyear=2018;run; quit;'''
sas.submitLST(sas_code)
# Let's run a WRDS SAS macro for Event Study (SAS macro: https://wrds-www.wharton.upenn.edu/pages/support/applications/event-studies/event-study-research-application/)
# Run your SAS macro in the SASPy SAS Sessioin by submitting %include code
sas.submitLOG('''%include "~/SASPy_Application/SAS_macro/event_study.sas";''')
The first line of the cell must start with "%%SAS session_name"
%%SAS sas
/* Get S&P 500 companies*/
proc sql;
create table crspd500 as
select a.*, b.ret, b.date
from crsp.dsp500list as a,
crsp.dsf as b
where a.permno=b.permno
and b.date >= a.start and b.date<= a.ending
order by date;
quit;
#From SAS to Python (int, float, str types are available)
sas.submitLST('''%let today = %sysfunc(today(),date9.);
%let num_var=10;
proc odstext;
title SAS Macro Variables;
mvar today num_var;
p "SAS Macro variable ''today'': "|| today;
p "SAS Macro variable ''num_var'': "|| num_var;
run;
''')
today = sas.symget('today')
num_var = sas.symget('num_var')
print ('Transfer SAS macro variables to Python session\n','today:', today, 'type:', type(today), 'num_var:', num_var, 'type:', type(num_var))
# Create and assign value to a SAS macro var from Python
tomorrow = datetime.date.today() + datetime.timedelta(days = 1)
sas.symput(name='tomorrow_py', value=tomorrow)
sas.submitLST('''proc odstext;
title "Python to SAS";
p "&tomorrow_py from Python, &today from SAS";
run;
''')
# Change value of a SAS macro var
sas.symput(name='tomorrow_py', value='06262020')
sas.submitLST('''proc odstext;
title "Change value from Python";
p "&tomorrow_py from Python, &today from SAS";
run;
''')
# PC-SAS connect
PC_SAS = '''%let wrds = wrds.wharton.upenn.edu 4016;
options comamid=TCP remote=wrds;
signon username="&username" password="&pw";
'''
prompt = {'username':True, 'pw': True}
sas.submitLOG(code=PC_SAS, prompt=prompt)
# Submit SAS code by 'submit' method
main_code = '''your main sas code to run on WRDS server'''
main_code_remote = ' '.join['rsubmit;', main_code, 'endrsubmit;']
sas.submit(main_code_remote)
# Disconnect from the remote SAS session. Note that this does not end your LOCAL SASPY SAS session.
sas.submit('''signoff;''')
# Finish the SAS
sas.endsas()
db.close()