/************************************************************************
 
           %MVN macro:  Generating multivariate normal data
                                   
 
    DISCLAIMER:
      THIS INFORMATION IS PROVIDED BY SAS INSTITUTE INC. AS A SERVICE TO
      ITS USERS.  IT IS PROVIDED "AS IS".  THERE ARE NO WARRANTIES,
      EXPRESSED OR IMPLIED, AS TO MERCHANTABILITY OR FITNESS FOR A
      PARTICULAR PURPOSE REGARDING THE ACCURACY OF THE MATERIALS OR CODE
      CONTAINED HEREIN.
 
    PURPOSE:
      The %MVN macro generates multivariate normal data using the
      Cholesky root of the variance-covariance matrix.  Bivariate normal
      data can be generated using the DATA step code that follows the
      macro.
                                                                       
    REQUIRES:
      The %MVN macro requires Version 6.06 or later of SAS/IML software.
      The DATA step code for generating bivariate normal data requires
      only Version 6.06 Base SAS software.
 
    USAGE:
      The macro input/output paramters are:

           VARCOV= SAS data set that contains the variance-covariance
                   (and only the variance covariance) matrix.  The macro
                   expects m variables and m observations in the data
                   set, where m is the number of variables to generate.

           MEANS=  SAS data set that contains the mean vector.  The
                   macro expects a single variable with m observations
                   containing the m means for the variables generated.

           N=      Number of observations to generate.

           SEED=   Starting seed value for the random number generator.
                   Default value is 0, which will use the system clock
                   to generate a seed.

           SAMPLE= SAS data set name for the resulting multivariate
                   normal data.  The variable names will be COL1-COLm.

    LIMITATIONS:
      No error checking is done.  The macro assumes that dataset
      names entered are valid, and exist in the case of the VARCOV=
      and MEANS= options.

    EXAMPLE:
      This example generates 20000 observations from a 3 variable
      multivariate normal distribution with specified mean vector and
      covariance matrix.

        * Store the variance-covariance matrix in a data set ;
        data varcov;
           input m1-m3;
           cards;
          4 1.8   4
        1.8   9 3.6
          4 3.6  16
        ;

        * Store the mean vector in a data set ;
        data means;
           input m1;
           cards;
        10
        20
        30
        ;

        %mvn(varcov=varcov,
             means=means,
             n=20000,
             sample=sample)

        proc corr data=sample noprob cov; 
          run;

 
************************************************************************/

%macro mvn(varcov=,       /* dataset for variance-covariance matrix */
           means=,        /* dataset for mean vector */
           n=,            /* sample size */
           seed=0,        /* seed for random number generator */
           sample=);      /* output dataset name */


 /* Get initial seed value.  If seed<=0, then generate seed from the
    system clock. */

data _null_;
   if &seed le 0 then do;
      seed = int(time());   /* get clock time in integer seconds */
      put seed=;
      call symput('seed',seed);   /* store seed as macro variable */
   end;
run;


 /* Generate the multivariate normal data in SAS/IML */

proc iml worksize=100;
   use &varcov;            /* read variance-covariance matrix */
   read all into cov;
   use &means;             /* read means */
   read all into mu;
   v=nrow(cov);            /* calculate number of variables */
   n=&n;
   seed = &seed;
   l=t(root(cov));         /* calculate cholesky root of cov matrix */
   z=normal(j(v,&n,&seed));/* generate nvars*samplesize normals */
   x=l*z;                  /* premultiply by cholesky root */
   x=repeat(mu,1,&n)+x;    /* add in the means */
   tx=t(x);
   create sample from tx;  /* write out sample data to sas dataset */
   append from tx;
quit;

%mend mvn;


/********************************************************************

   Generating bivariate normal data using the DATA step

   Three methods are presented in increasing order of efficiency.

********************************************************************/

data a;
  keep x y;
  mu1=10; mu2=20; var1=4; var2=9; rho=.5;
  do i = 1 to 10000;
    x = mu1+sqrt(var1)*rannor(123);
    y = (mu2+rho*(sqrt(var2)/sqrt(var1))*(x-mu1)) +
        sqrt(var2*(1-rho**2))*rannor(123);
      output;
  end;
run;
proc corr noprob; run;


data b;
  keep x y;
  mu1=10; mu2=20; var1=4; var2=9; rho=.5;
  do i = 1 to 10000;
     /*  generate standard normal variates with corr of rho  */
     x = rannor(123);
     y = rho*x+sqrt(1-rho**2)*rannor(123);

     /*  transform to designated mean and variance  */
     x = mu1 + sqrt(var1)*x;
     y = mu2 + sqrt(var2)*y;
       output;
  end;
run;
proc corr noprob; run;


data c;
  keep x y;
  mu1=10; mu2=20; var1=4; var2=9; rho=.5;
  std1=sqrt(var1);  std2=sqrt(var2);
  c=sqrt(1-rho**2);
    do i = 1 to 10000;
       x = rannor(123);
       y = rho*x+c*rannor(123);
       x = mu1 + sqrt(var1)*x;
       y = mu2 + sqrt(var2)*y;
         output;
    end;
run;
proc corr noprob; run;