%dataset_quarterly.m

%reads in data and transforms to desired dataset

function [Y, X, vars, lev]=dataset_quarterly(esty1,estq1,esty2,estq2,nlags,shock_extract,use_yields,slope)

%Step 1: load yield data and select yields specified in use_yields
%-----------------------------------------------------------------

if use_yields == 1
    
    yield3m = xlsread('YieldData.xls','CRSPme quarterly','D3:D196');     % 3-month yield in annualized basis; 1959:2 - 2007:3; *** last 3 years need to be updated *** 
    yield5y = xlsread('YieldData.xls','CRSPme quarterly','L3:L196');     % 5-year yield in annualized basis; 1959:2 - 2007:3;
    yield10y = xlsread('YieldData.xls','GSW quarterly','K3:K196');       % 10-year yield in annualized basis; 1971:4-2009:3 from GSW; read in for 1959:2 - 2007:3;     

    yields = [yield5y (yield5y - yield3m) yield3m]; % read in long-rate, spread and short-rate
    yieldvars=['    5-year bond yield      '
               ' Spread (5-year - 3-month) '
               '     3-month bond yield    '];
         
    if slope == 0;
        ts_select=[1 2];
    elseif slope == 1;
        ts_select=[2 1];
    end
     
elseif use_yields == 2
    ffr = xlsread('YieldData.xls','CRSPme quarterly','N3:N196');
    yield5y = xlsread('YieldData.xls','CRSPme quarterly','L3:L196');     % quarter average 5-year yield in annualized basis; 1959:2 - 2007:3;
    yield10y = xlsread('YieldData.xls','GSW quarterly','K3:K196');       % quarter average 10-year yield in annualized basis; 1971:4-2009:3 from GSW; read in for 1959:2 - 2007:3;

    yields = [yield5y (yield5y - ffr)]; % read in long-rate and spread
    yieldvars=['     5-year bond yield     '
               'Spread (5-year - Fedfunds) '];
         
    if slope == 0;
        ts_select=[1];
    elseif slope == 1;
        ts_select=[2];
    end
                   
elseif use_yields == 3
    yields=xlsread('factors.xls','factors_quarterly','F4:H188');     %Diebold-Li factor extraction based on 3-60 months yields
                                                                    %level, slope, curvature; 1959:2 - 2005:2; 

    % yields=xlsread('factors.xls','factors_quarterly','B4:D188');     %Diebold-Li factor extraction based on 3-120 months yields from DRA
                                                                      %level, slope, curvature; 1972:1 - 2000:4;                                                                
    yieldvars=['           Level           '
               '           Slope           '
               '         Curvature         '];
         
    if slope == 0;
        ts_select=[1 2];
    elseif slope == 1;
        ts_select=[2 1];
    end
  
end

%Step 2: load macro data and select macro variables in VAR
%---------------------------------------------------------
macro=xlsread('macrodata_AER.xls',1,'B8:M192');  %see xls sheet for details on data
                                                                      
macrovars =   ['  Gross domestic product   '      %1) real gdp
               '         Consumption       '      %2) real c
               '          Investment       '      %3) real i
               '     Federal Funds rate    '      %4) fed funds rate
               ' Total factor productivity '      %5) TFP from Fernald					                                                
               '     Real S&P500 index     '      %6) Shiller's s&p comp index, cpi deflated, per capita					                                                
               ' Equip&Soft price index    '      %7) rel price of equipment and software (as in Cummins-Violante-Fisher)					                                               
               '   investment price index  '      %8) rel price of	investment (same source)				                                               	                                                
               '         Inflation         '      %9) GDP inflation  					                                                
               '       CPI inflation       '      %10) CPI inflation					                                                
               '       PCE inflation       '      %11) PCE inflation					                                                
               '    Consumption (nd&s)     '];    %12) c nondurables & services deflated by PCE					                                               
					                                               
%rescaling some of the variables         
   macro=100*macro;
   macro(:,4)=macro(:,4)/100;
   macro(:,9)=macro(:,9)/100;
   macro(:,10)=macro(:,10)/100;
   macro(:,11)=macro(:,11)/100;
    
%select macro variables in VAR
macro_select=[5 2 9 4];  %baseline VAR   
% macro_select=[5 8 7 2 9 4];  %VAR with investment and e&s price deflators 
% macro_select=[5 2 1 3 6 9 4]; %large VAR       


%Step 3: setup VAR and compute lags
%----------------------------------

%select sample period
daty1=1959;       % First Year of Data Set
datq1=2;       % First Quarter of Data Set
n1 = (esty1-daty1)*4 + ( estq1 - datq1 + 1);          
n2 = (esty2-daty1)*4 + ( estq2 - datq1 + 1); 
       
if shock_extract == 1 
    %-----------------------------------------------------
    % data structure: term structure var + macro var
    %----------------------------------------------------- 
    if use_yields == 0;
        data = macro(n1:n2,macro_select);
        vars = macrovars(macro_select,:);
    else
        data = [yields(n1:n2,ts_select) macro(n1:n2,macro_select)];
        vars = [yieldvars(ts_select,:); macrovars(macro_select,:)];
    end
    
elseif (shock_extract == 2 | shock_extract == 3) 
    %-----------------------------------------------------
    % data structure: macro var(TFP ordered first) 
    %                      + term structure var
    %-----------------------------------------------------
    if use_yields == 0;
        data = macro(n1:n2,macro_select);
        vars = macrovars(macro_select,:);
    else
        data = [macro(n1:n2,macro_select) yields(n1:n2,ts_select)];
        vars = [macrovars(macro_select,:); yieldvars(ts_select,:)];    
%     data=[macro(n1:n2,macro_select(1:3)) ffr(n1:n2) yields(n1:n2,ts_select)]; % for end-of-quarter i-rates
    end
    
else
    display('must select a shock extraction method')
    return
end

%checking for out-of-sample values
if sum(sum(data>99999));
    disp('dataseries out of range')
    return
end

%set prior for first lag of each variable (if estimation with Minnesota
%prior
[rowy,coly]=size(data);
lev=ones(coly,1);            

%compute nlags lags
[T,nvars]=size(data);
for p=1:nlags;
    X(:,1+(p-1)*nvars:p*nvars)=data((nlags+1-p):(T-p),:);
    %gives (T-nlags) x nvars*nlags matrix of lags for all variables
    %first lag of all variables first, then second lag of all variables
    %and so on...                                               
end;

%rescaling variables since we loose nlags observations through the lagging 
Y=data((nlags+1):T,:);