WRDS Macros: VW_AVGPRICE

VW_AVGPRICE

Example

/* ********************************************************************************* */
/* ******************** W R D S   R E S E A R C H   M A C R O S ******************** */
/* ********************************************************************************* */
/* WRDS Macro: VW_AVGPRICE                                                           */
/* Summary   :                                                                       */
/*                                                                                   */
/*                                                                                   */
/*                                                                                   */
/* Date      :                                                                       */
/* Version:   1.0                                                                    */
/* Author    : Mark Keintz, WRDS                                                     */
/* Variables : -                                                                     */
/*             -                                                                     */
/* ***********************************************************************************/


%macro vw_avgprice(indsn=,outdsn=
     ,begdate=,enddate=,beghms=09:30:00,endhms=16:00:00,inthms=00:00:60
     ,symlist=_ALL_,symdsn=
     ,p_var=price,v_var=size,d_var=date,t_var=time,s_var=symbol,nt_var=
     ,help=no)
   / des="Generate Volume-weighted average price over user-specified time intervals";

  %local vrs;
  %let vrs=1.2;

  %if %lowcase(&help)=yes %then %do;

    %put *********************************************************************************;
    %put ** VW_AVGPRICE: Volume Weighted Average Trade Price.                           **;
    %put **                                                                             **;
    %put **  Version &vrs                                                               **;
    %put **                                                                             **;
    %put **     Generates a dataset of volume-weighted average trade prices             **;
    %put **     for user-specified time intervals (e.g. minute-by-minute,               **;
    %put **     30 seconds, 5 minutes, etc.).                                           **;
    %put **                                                                             **;
    %put **     The output dataset will have the following variables:                   **;
    %put **       SYMBOL       (Or user-designated trading ticker var)                  **;
    %put **       DATE         (Or the user-designated date var).  Note                 **;
    %put **                    that if the D_VAR argument (see below) is                **;
    %put **                    set to null, then no date variable will be               **;
    %put **                    written to the output dataset.                           **;
    %put **       INTRVL_BEGTIME (SAS time stamp for beginning of each time             **;
    %put **                      interval)                                              **;
    %put **       VW_AVG_PRICE (Vol-weighted avg price for the interval)                **;
    %put **       TOTAL_VOL    (Total trade-vol for this time interval)                 **;
    %put **       N_TRADES     (Number of trades for this time interval)                **;
    %put **                                                                             **;
    %put **     The output will be sorted by SYMBOL DATE INTRVL_BEGTIME.                **;
    %put **                                                                             **;
    %put **  Parameter list:  (Note all parameters are in the "name=value"              **;
    %put **     form, and may be used in any order).                                    **;
    %put **                                                                             **;
    %put **  INDSN= (Required)  Name of input dataset. Must have variablles             **;
    %put **    SYMBOL DATE TIME PRICE and SIZE (the "volume" variable).                 **;
    %put **    (Other varnames can be substitued - see parameters below).               **;
    %put **    Must be sorted by SYMBOL DATE TIME.                                      **;
    %put **                                                                             **;
    %put **  OUTDSN= (Required) Name of output dataset to generate.  It                 **;
    %put **    will be sorted by SYMBOL DATE INTRVL_BEGTIME (time at                    **;
    %put **    beginning of interval) and will also have vars TOTAL_VOL                 **;
    %put **    VW_AVG_PRICE and N_TRADES.                                               **;
    %put **                                                                             **;
    %put **  BEGDATE= (Optional, default=blank).  Beginning date, in date9              **;
    %put **    form, without the quotes (e.g. 02DEC2008).  Accept no INDSN              **;
    %put **    record PRECEDING begdate.  If blank, then no BEGDATE                     **;
    %put **    filtering needed.  Also if D_VAR is blank, no BEGDATE filtering          **;
    %put **    will be done, regardless of BEGDATE value.                               **;
    %put **                                                                             **;
    %put **  ENDDATE= (Optional, default=blank).  Ending date in date9 form,            **;
    %put **    without the quotes (e.g. 02DEC2008).  Accept no INDSN                    **;
    %put **    records after enddate.  If blank then no ENDDATE filtering               **;
    %put **    needed.  Also if D_VAR is blank, no ENDDATE filtering will be            **;
    %put **    done, regardless of ENDDATE value.                                       **;
    %put **                                                                             **;
    %put **  BEGHMS= (Optional, default=09:30:00).  Beginning time, in                  **;
    %put **    hh:mm:ss form. Accept only records with                                  **;
    %put **    TIME >= BEGHMS.  Note BEGHMS is treated differently                      **;
    %put **    than ENDHMS.  Records with time=BEGHMS are included,                     **;
    %put **    while records with time=ENDHMS are excluded.                             **;
    %put **                                                                             **;
    %put **  ENDHMS= (Optional, default=16:00:00).  Ending time, in                     **;
    %put **    hh:mm:ss form.  Accept only records with                                 **;
    %put **    TIME <= ENDHMS. Note ENDHMS is treated differently                       **;
    %put **    than BEGHMS.  Records with time=BEGHMS are included,                     **;
    %put **    while records with time=ENDHMS are excluded.                             **;
    %put **                                                                             **;
    %put **  INTHMS= (Optional, default=00:00:60)  Interval size in                     **;
    %put **    hh:mm:ss form.  Length of intervals over which volume-                   **;
    %put **    weighted means will be calculated.                                       **;
    %put **                                                                             **;
    %put **  SYMLIST= (Optional, default=_ALL_). Space-separated list of                **;
    %put **    stock SYMBOLS (without quote marks) to accept from INDSN.                **;
    %put **    If symlist=_ALL_, then take all symbols from INDSN.                      **;
    %put **                                                                             **;
    %put **  SYMDSN= (Optional, default=blank).  For use when the SYMLISST              **;
    %put **    parameter becomes too large.  SYMDSN names a dataset with the            **;
    %put **    set of symbols to use in filtering INDSN.  Note this dataset             **;
    %put **    must use the same variable name for trading symbol as in INDSN.          **;
    %put **    NOTE: If SYMLIST has any value other than _ALL_, then SYMDSN             **;
    %put **    is ignored.                                                              **;
    %put **                                                                             **;
    %put **  HELP= (optional, default=no). If help=yes (any case), generate             **;
    %put **     these notes.                                                            **;
    %put **                                                                             **;
    %put **                                                                             **;
    %put **  The parameters below specify the actual variable names used in             **;
    %put **    this macro.  Note that they default to the varnames used in              **;
    %put **    the TAQ trades datasets (i.e. the "ct" datasets).                        **;
    %put **                                                                             **;
    %put **  P_VAR= (Optional, default=price).  Name of the trading price               **;
    %put **    price variable in the INDSN dataset.                                     **;
    %put **                                                                             **;
    %put **  V_VAR= (Optional, default=size).  Name of the trading volume               **;
    %put **    variable in the INDSN dataset.                                           **;
    %put **                                                                             **;
    %put **  D_VAR= (Optional, default=date).  Name of the date variable                **;
    %put **    in the INDSN dataset.  Must be stored as a SAS data value.               **;
    %put **    Note that if you set D_VAR to a null value, then no date                 **;
    %put **    variable is read from INDSN nor output to OUTDSN, and no                 **;
    %put **    date filtering will be done.                                             **;
    %put **                                                                             **;
    %put **  T_VAR= (Optional, default=time).  Name of the time variable in             **;
    %put **    the INDSN dataset.  Must be stored as a SAS time value.                  **;
    %put **                                                                             **;
    %put **  S_VAR. (Optional, default=symbol).  Name of the trading ticker             **;
    %put **    variable in the INDSN dataset. Must be a character variable.             **;
    %put **                                                                             **;
    %put **  NT_VAR. (Optional, default=blank). Name of var (if any) that               **;
    %put **    contains the number of trades represented by the current                 **;
    %put **    INDSN record.  If blank then assume each incoming record                 **;
    %put **    represents one trade.                                                    **;
    %put **                                                                             **;
    %put **                                                                             **;
    %put ** Usage Examples:                                                             **;
    %put %nrstr(    %VW_AVGPRICE(indsn=taq.ct_20081202,outdsn=mydata,beghms=12:00:00,inthms=05:00)) ;
    %put **     generates dataset MYDATA, with volume-weighted average                  **;
    %put **     prices for 5-minutes intervals, for all trades between noon             **;
    %put **     noon and 4PM from taq.ct_20081202                                       **;
    %put **                                                                             **;
    %put **    Note that all parameters are entered in the form                         **;
    %put **      param_name1=param_value1,param_name2=param_value2,...                  **;
    %put **                                                                             **;
    %put **    There are no positional parameters, and the "name="                      **;
    %put **    parameters may be entered in any order.                                  **;
    %put **                                                                             **;
    %put ** DEPENDENCIES:                                                               **;
    %put **    DBLQUOTELIST (a WRDS macro)                                              **;
    %put **                                                                             **;
    %put ** VERSION 1.0: Initial version.                                               **;
    %put **                                                                             **;
    %put ** VERSION 1.1: Allow specification of different price, volume,                **;
    %put **   date, and time variables.  Also add provision for the NT_VAR              **;
    %put **   parameter.                                                                **;
    %put **                                                                             **;
    %put ** VERSION 1.2: Permit specification of user-specified dataset of              **;
    %put **   requested symbols, instead of a user-specified list.                      **;
    %put **                                                                             **;
    %put ** ******************************************************************************;
    %goto done;
  %end;

   %local        /****************************************************************/
                 /* Other local macrovars:                                       */
                 /*                                                              */
     sym_method  /* =1 if symbol filtering to be done via a WHERE clause.        */
                 /* =2 if symbol filtering to be done via a JOIN/MERGE.          */
                 /* =3 if no symbol filtering to be done.                        */
                 /*                                                              */
     qsymlen     /* Estimated length of comma-separated list of quoted symbols.  */
                 /*                                                              */
     qsymlist    /* symlist with symbols quoted, comma-separated.                */
                 /*                                                              */
     wh_filter   /* Where filter to apply to current record.                     */
                 /*                                                              */
     wh_text     /* Utility, starts as a blank, becomes "and", for building      */
                 /* compound where expressions.                                  */
                 /*                                                              */
     int_secs    /* Interval size, in seconds.                                   */
                 /*                                                              */
     int_text    /* Interval size as text (e.g. minute, 5-minute, 30 seconds),   */
                 /*   to be used in dataset and variable labels.                 */
                 /*                                                              */
     nt_yn       /* If nt_var is blank, then NT_YN=no, else NT_YN=yes.           */
                 /* **************************************************************/
     ;


/* First determine SYM_METHOD */

  %if %upcase(&symlist) ^= _ALL_ %then %let sym_method=1;
  %else %if &symdsn =            %then %let sym_method=3;
  %else %do;
    proc sql noprint;
      select sum(length(trim(&s_var))+3) into : qsymlen from (select distinct &s_var from &symdsn);
    %if &qsymlen > 32700 %then %do;
      %let sym_method=2;
      %put +----------------------------------------------------------+ ;
      %put |  Too many &s_var values (total length=&qsymlen.).        | ;
      %put |  Will use SYM_METHOD 2 (Merge of &indsn with &symdsn).     ;
      %put +----------------------------------------------------------+ ;
    %end;
  %else %do;
      %let sym_method=1;
      %put +----------------------------------------------------------+ ;
      %put |  Length of all &s_var values is &qsymlen..               | ;
      %put |  Will convert to SYM_METHOD 1 (i.e. WHERE clause).       | ;
      %put +----------------------------------------------------------+ ;
      select distinct &s_var into : symlist separated by ' ' from &symdsn;
    %end;
    quit;
  %end;

  %let int_secs = %sysfunc(inputn(&inthms,time8.));/* Interval Size, in seconds */

/* Construct interval size in text form (e.g. "2 Minutes", "30 Seconds", "03:30") */
  %if       &int_secs < 60                %then %let int_text = &int_secs Seconds ;
  %else %if &int_secs = 60                %then %let int_text = Minute ;
  %else %if %sysfunc(mod(&int_secs,60))=0 %then %let int_text = %eval(&int_secs/60) Minutes;
  %else                                         %let int_text = %sysfunc(putn(time8.)) ;

  %let wh_text =  ;     /* change  to "and" after setting first filter component */
  %let wh_filter= ;     /* Initialize where filter for trade data */

/* Begin building the compound where conditions */
/* Add "&S_VAR in ("AAA" "BBB" ... ) " filter, if requested */
  %if       &sym_method=3 %then;        /* &S_VAR filter NOT needed */
  %else %if &sym_method=2 %then;        /* &S_VAR filter wlll be done via MERGE */
  %else %if &sym_method=1 %then %do;    /* &S_VAR filter using WHERE is needed */
     %let qsymlist  = %dblquotelist(&symlist)  ;
     %let wh_filter = &wh_filter &wh_text ( &s_var in ( &qsymlist ) );
     %let wh_text   = %str(and);
  %end;

/* Add a DATE RANGE filter, if requested */
  %if &d_var = %then ;                      /* DATE filter not possible */
  %else %if &begdate= and &enddate= %then ; /** DATE filter not needed */
  %else %do;                            /* DATE filter IS needed */
    %if &begdate = &enddate %then %let wh_filter = &wh_filter &wh_text ( &d_var = "&begdate"d );
    %else %if &begdate =    %then %let wh_filter = &wh_filter &wh_text ( &d_var <= "&enddate"d );
    %else %if &enddate =    %then %let wh_filter = &wh_filter &wh_text ( &d_var >= "&begdate"d );
    %else                         %let wh_filter = &wh_filter &wh_text ( &d_var between "&begdate"d and "&enddate"d );
    %let wh_text = %str(and);
  %end;

/* Add a TIME RANGE filter, if requested */
  %if &beghms=00:00:00 and &endhms=24:00:00 %then;/* TIME filter NOT needed */
  %else %do;                                      /* TIME filer IS needed */
    %if       &beghms = 00:00:00 %then %let wh_filter = &wh_filter &wh_text ( &t_var <= ("&endhmst"t -1) );
    %else %if &endhms = 24:00:00 %then %let wh_filter = &wh_filter &wh_text ( &t_var >= "&beghms"t );
    %else                              %let wh_filter = &wh_filter &wh_text ( &t_var between "&beghms"t and ("&endhms"t - 1) );
    %let wh_text = %str(and);
  %end;

/* With completed WHERE clause, convert it to dataset name parameter format */
  %if wh_text ^= %then %let wh_filter= %str( where= ( &wh_filter ) );
  %else                %let wh_filter= %str( where=(1));

/* Set up nt_yn, to be used a few times below */
  %if &nt_var= %then %let nt_yn=no;
  %else %let nt_yn=yes;

/* If sym_method requires join or merge, prepare dataset of requested symbols */
  %if &sym_method=2 %then %do;
    proc sort data=&symdsn (keep=&s_var) out=_symlist_ nodupkeys;
      by &s_var;
    run;
  %end;

/* Make a data view, in which each specific time is mapped to its time-interval */
  data vtemp (keep=&s_var &d_var intrvl_begtime &p_var &v_var &nt_var )  / view=vtemp ;
    %if &sym_method=2 %then %str(
      merge &indsn ( in=indata &wh_filter ) _symlist_ (in=inkeep);
      by &s_var;
      if inkeep=indata;
      );
    %else %str(
      set &indsn ( &wh_filter );
      );

    /* If NT_VAR exists, divide by Volume, so weighted PROC MEANS yields right N_TRADES */
    %if &nt_yn=yes %then %str(&nt_var = &nt_var / &v_var;);

    /* Establish beginning of each time interval */
    INTRVL_BEGTIME= &t_var - mod(&t_var,&int_secs);
    attrib INTRVL_BEGTIME  label = "Current &int_text Start Time" format=time8.0 length=4;
  run;

  proc means data=vtemp noprint;
    by &s_var &d_var intrvl_begtime;
    var &p_var &nt_var ;
    weight &v_var ;
    output out=&outdsn (
        label="%upcase(&v_var)-Weighted Average %upcase(&p_var), &int_text BY &int_text"
        drop=_TYPE_ _FREQ_
        sortedby = &s_var &d_var intrvl_begtime
        )
       mean(&p_var)=VW_AVG_PRICE  sumwgt(&p_var)=TOTAL_VOL
       %if &nt_yn=yes %then sum(&nt_var) = N_TRADES ;
       %else                           n = N_TRADES ;
       ;
  run;

  /* Assign Attributes of new variables */
  /* NB: macrovar SYSDSN is the library and name of the last dataset created */
  proc datasets nolist library=%scan(&sysdsn,1,%str( ));
    modify %scan(&sysdsn,2,%str( )) ;
      label TOTAL_VOL    = "Total Shares Traded This &int_text" ;
      label VW_AVG_PRICE = "Volume Weighted Average &p_var this &int_text" ;
      label N_TRADES     = "Number of Trades This &int_text" ;
    run;
  quit;

%done: ;
%mend vw_avgprice;


/* ********************************************************************************* */
/* *************  Material Copyright Wharton Research Data Services  *************** */
/* ****************************** All Rights Reserved ****************************** */
/* ********************************************************************************* */

Top of Section

Top