Creates OptionMetrics-CRSP Link Table
Example
/* ********************************************************************************* */
/* ******************** W R D S R E S E A R C H M A C R O S ******************** */
/* ********************************************************************************* */
/* WRDS Macro: OCLINK */
/* Summary : Creates OptionMetrics-CRSP Link Table */
/* Date : November 1, 2010 */
/* Author : Rabih Moussawi, WRDS */
/* Variables : - OPTIONMID and CRSPID are OptionMetrics and CRSP Names Datasets */
/* - OUTSET: OptionMetrics-CRSP link table output dataset */
/* ********************************************************************************* */
%MACRO OCLINK (OPTIONMID=OPTIONM.SECNMD,CRSPID=CRSP.MSENAMES,OUTSET=WORK.OCLINK);
options nonotes;
/* Check Validity of Library Assignments */
%if (%sysfunc(libref(crsp))) %then %do;
%let cs=/wrds/crsp/sasdata/;
libname crsp ("&cs/m_stock","&cs/q_stock","&cs/a_stock");
%end;
%if (%sysfunc(libref(optionm))) %then %do; libname optionm "/wrds/optionm/sasdata"; %end;
%put; %put ## START. ;
/* Step 1: Link by CUSIP between CRSP's PERMNO and OptionMetrics' SECID */
proc sort data=&OPTIONMID out=_OPT1 (keep=secid cusip issuer effect_date);
by secid cusip effect_date;
run;
/* Create first and last 'start dates' for CUSIP link */
proc sql;
create table _OPT2
as select *, min(effect_date) as fdate, max(effect_date) as ldate
from _OPT1
group by secid, cusip
order by secid, cusip, effect_date;
quit;
/* Label date range variables and keep only most recent company name for CUSIP link */
data _OPT2;
set _OPT2;
by secid cusip;
if last.cusip;
label fdate="First Start date of CUSIP record";
label ldate="Last Start date of CUSIP record";
format fdate ldate date9.;
drop effect_date;
run;
/* CRSP: Get all PERMNO-NCUSIP combinations */
proc sort data=&CRSPID out=_CRSP1 (keep=PERMNO NCUSIP comnam namedt nameendt);
where not missing(NCUSIP);
by PERMNO NCUSIP namedt;
run;
/* Arrange effective dates for CUSIP link */
proc sql;
create table _CRSP2
as select PERMNO,NCUSIP,comnam,min(namedt)as namedt,max(nameendt) as nameenddt
from _CRSP1
group by PERMNO, NCUSIP
order by PERMNO, NCUSIP, NAMEDT;
quit;
/* Label date range variables and keep only most recent company name */
data _CRSP2;
set _CRSP2;
by permno ncusip;
if last.ncusip;
label namedt="Start date of CUSIP record";
label nameenddt="End date of CUSIP record";
format namedt nameenddt date9.;
run;
/* Create CUSIP Link Table */
/* CUSIP date ranges are only used in scoring as CUSIPs are not reused for
different companies overtime */
proc sql;
create table _LINK1_1
as select *
from _OPT2 as a, _CRSP2 as b
where a.CUSIP = b.NCUSIP
order by SECID, PERMNO, ldate;
quit;
/* Score links using CUSIP date range and company name spelling distance */
/* Idea: date ranges the same cusip was used in CRSP and OptionMetrics should intersect */
data _LINK1_2;
set _LINK1_1;
by SECID PERMNO;
if last.permno; /* Keep link with most recent company name */
name_dist = min(spedis(issuer,comnam),spedis(comnam,issuer));
if (not ((ldate < namedt) or (fdate > nameenddt))) and name_dist < 30 then SCORE = 0;
else if (not ((ldate < namedt) or (fdate > nameenddt))) then score = 1;
else if name_dist < 30 then SCORE = 2;
else SCORE = 3;
keep SECID PERMNO issuer comnam score;
run;
%put ## # Step2: Linking using TICKERs... ;
/* Step 2: Find links for the remaining unmatched cases using Exchange Ticker */
/* Identify remaining unmatched cases */
proc sql;
create table _NOMATCH1
as select distinct a.*
from _OPT1 (keep=secid) as a
where a.secid NOT in (select distinct secid from _LINK1_2)
order by a.secid;
quit;
/* Drop Step1 Tables*/
proc sql; drop table _OPT1,_OPT2,_CRSP1,_CRSP2; quit;
/* Add OptionMetrics identifying information & drop tickers that have ? or ZZZZ */
proc sql;
create table _NOMATCH2
as select b.SECID, b.issuer, b.ticker, b.effect_date, b.cusip
from _NOMATCH1 as a, &OPTIONMID as b
where a.SECID = b.SECID and not (missing(b.ticker))
and ticker not like '%?%' and ticker not like '%ZZZZ%'
order by secid, ticker, effect_date;
quit;
/* Create first and last 'start dates' for Exchange Tickers */
proc sql;
create table _NOMATCH3
as select *, min(effect_date) as fdate, max(effect_date) as ldate
from _NOMATCH2
group by secid, ticker
order by secid, ticker, effect_date;
quit;
/* Label date range variables and keep only most recent company name */
data _NOMATCH3;
set _NOMATCH3;
by secid ticker;
if last.ticker;
label fdate="First Start date of OFTIC record";
label ldate="Last Start date of OFTIC record";
format fdate ldate date9.;
drop effect_date;
run;
/* Get entire list of CRSP stocks with Exchange Ticker information */
/* Give CRSP's Trading Ticker precedence over CRSP Standardized Ticker */
proc sql;
create table _CRSP1
as select coalesce(tsymbol,ticker) as ticker, comnam, permno,
ncusip, namedt, nameendt
from &CRSPID
order by permno, ticker, namedt;
run;
/* Arrange effective dates for link by Exchange Ticker */
proc sql;
create table _CRSP2
as select permno,comnam,ticker,ncusip,
min(namedt)as namedt,max(nameendt) as nameenddt
from _CRSP1
where not missing(ticker)
group by permno, ticker
order by permno, ticker, namedt;
quit;
/* Label date range variables and keep only most recent company name */
data _CRSP2;
set _CRSP2;
by permno ticker;
if last.ticker;
label namedt="Start date of exch. ticker record";
label nameenddt="End date of exch. ticker record";
format namedt nameenddt date9.;
run;
/* Merge remaining unmatched cases using Exchange Ticker */
/* Note: Use ticker date ranges as exchange tickers are reused overtime */
proc sql;
create table _LINK2_1
as select a.secid,a.ticker, b.permno, a.issuer, b.comnam, a.cusip, b.ncusip, a.ldate
from _NOMATCH3 as a, _CRSP2 as b
where strip(a.ticker) = strip(b.ticker) and
(ldate >= namedt) and (fdate <= nameenddt)
order by secid, ticker, ldate;
quit;
/* Score using company name using 6-digit CUSIP and company name spelling distance */
data _LINK2_2;
set _LINK2_1;
name_dist = min(spedis(issuer,comnam),spedis(comnam,issuer));
if substr(cusip,1,6)=substr(ncusip,1,6) and name_dist < 30 then SCORE=0;
else if substr(cusip,1,6)=substr(ncusip,1,6) then score = 4;
else if name_dist < 30 then SCORE = 5;
else SCORE = 6;
run;
/* Some companies may have more than one SECID-PERMNO link, */
/* so re-sort and keep the case (PERMNO & Company name from CRSP) */
/* that gives the lowest score for each OptionM SECID (first.secid=1) */
proc sort data=_LINK2_2; by secid score; run;
data _LINK2_3;
set _LINK2_2;
by secid score;
if first.secid;
keep secid permno issuer comnam permno score;
run;
%put ## # Step3: Finalizing Links and Scores... ;
/* Step 3: Add Exchange Ticker links to CUSIP links */
/* Create Labels for OCLINK dataset and variables */
/* Create final link table and save it in prespecified directory */
data &OUTSET (label="OptionMetrics-CRSP Link Table");
set _LINK1_2 _LINK2_3;
label ISSUER = "Company Name in OptionMetrics";
label COMNAM= "Company Name in CRSP";
label SCORE= "Link Score: 0(best) - 6";
run;
/* Final Sort */
proc sort data=&OUTSET; by SECID SCORE PERMNO; run;
%put ## # Step4: Link Table &OUTSET Ready... ;
/* House Cleaning */
proc sql;
drop table _CRSP1,_CRSP2,
_LINK1_1,_LINK1_2,_LINK2_1,_LINK2_2,_LINK2_3,
_NOMATCH1,_NOMATCH2,_NOMATCH3;
quit;
%put ## DONE .; %put;
options notes;
%MEND OCLINK;
/* ********************************************************************************* */
/* ************* Material Copyright Wharton Research Data Services *************** */
/* ****************************** All Rights Reserved ****************************** */
/* ********************************************************************************* */