n<-30 x<-rnorm(n) se.x<-sqrt(var(x)/n) cat(" mean = ",round(mean(x),3)," variance = ", round(var(x),3)," se =",round(se.x,3),"\n") # x contains n = 30 random standard normal values iter<- 5000 x1<- NULL for (i in 1:iter) {x1[i]<- mean(x[ceiling(n*runif(n))])} cat("mean = ",round(mean(x1),4),"standard error = ", round(sqrt(var(x1)),4),"\n") xtemp<- matrix(sample(x,n*iter,replace=T),n,iter) x2<- apply(xtemp,2,mean) cat("mean = ",round(mean(x2),4),"standard error =", round(sqrt(var(x2)),4),"\n") # x contains the n sampled data values temp<- matrix(sample(x,n*iter,replace=T),n,iter) boot.temp<- apply(temp,2,fcn) cat("mean = ",round(mean(boot.temp),4),"standard error =",round(sqrt(var(boot.temp)),4),"\n") # the S-vector d contains the n=9 sample distances d<- c(43.64,50.67,33.56,27.75,43.35,29.56,38.83,35.95,20.01) fcn<- function(x){length(x)/sum(1/x)} iter<- 2000 n<- length(x) temp<- matrix(sample(d,n*iter,replace=T),n,iter) boot.hm<- apply(temp,2,fcn) cat("mean = ",round(mean(boot.hm),4),"standard error = ",round(sqrt(var(boot.hm)),4),"\n") quantile(boot.hm) quantile(boot.hm,c(0.025,0.975)) # bootstrap estimates and standard error for relative risk a<-178 b<- 79 na<- 1589 nb<- 1565 # x = type-A, y = type-B, 0 = no chd and 1 = chd x<- c(rep(1,a),rep(0,na-a)) y<- c(rep(1,b),rep(0,nb-b)) iter<- 2000 rr<- NULL for(i in 1:iter) { tempx<- x[ceiling(na*runif(na))] tempy<- y[ceiling(nb*runif(nb))] rr[i]<- (sum(tempx)/na)/(sum(tempy)/nb) } mean(rr) #[1] 2.22345 # 95% confidence intervals quantile(rr,c(0.025,0.975)) # 2.5% 97.5% # #1.757939 2.877773. # # # # MEDIAN # parametric estimate n<- 25 x<- rnorm(n) median.x<-median(x) #[1] -0.099666187 # estimated variance med.se<-sqrt(1.571/n) #[1] 0.2506628 # bootstrap estimate iter<- 5000 d<- matrix(sample(x,n*iter,replace=T),n,iter) xx<- apply(d,2,median) cat("median = ",round(mean(xx),4),"standard error = ", round(sqrt(var(xx)),4),"\n") #median = -0.3882 standard error = 0.1786. bp<-read.table("bp.dat",header=T) attach(bp) n<- length(age) iter<- 2000 age0<- 50 est1<- NULL est2<- NULL est3<- NULL for(i in 1:iter) { v<- ceiling(n*runif(n)) xtemp<- age[v] ytemp<- sbp[v] b<- lsfit(xtemp,ytemp)$coef est1[i]<- b[1] est2[i]<- b[2] est3[i]<- b[l]+b[2]*age0 } se1<- sqrt(var(est1)) cat("intercept =",round(mean(est1),3)," sel = " round(se1,3),"\n") #intercept = 93.726 sel = 17.136 se2<- sqrt(var(est2)) cat("slope =",round(mean(est2),3)," round(se2,3),"\n") #slope = 0.878 se2 = 0.354 se3<- sqrt(var(est3)) cat("y=sbp at age 50 =",round(mean(est3),3)," se3 = ",round(se3,3),"\n") # y=sbp at age 50 = 137.602 se3 = 2.320. otemp<- glm(sbp~age) b<- otemp$coefficients e<- sbp-(b[1]+b[2]*age) # bootstrap estimates of the slope iter<- 2000 bb<- NULL for(i in 1:iter) { v<- ceiling(n*runif(n)) yy<- b[1]+b[2]*age+e[v] bb[i]<-lsfit(age,y)$coef[2] } mean.bb<-mean(bb) se.bb<-sqrt(var(bb)) # bootstrap estimate and 95% confidence interval for R^2 # read data from exterior file bwt.data dframe<- read.table("bwt.data",header=T) attach(dframe) x<- cbind(ht,wt,gain) y<- bwt otemp<- glm(y~x,family=gaussian) r2<- cor(predict(otemp),y)^2 cat("squared multiple correlation coefficient =", round(r2,4),"\n") #squared multiple correlation coefficient = 0.1B3B # bootstrap estimation iter<- 1000 r<- NULL for (i in 1:iter) { v<- ceiling(nrow(x)*runif(nrow(x))) xtemp<- x[v,] ytemp<- y[v] otemp<- glm(ytemp~xtemp,family=gaussian) r[i]<- cor(predict(otemp),ytemp)^2 } rbar<- mean(r) se<- sqrt(var(r)) cat(" bootstrap R-squared =",round(se,4),"\n") # bootstrap R-squared = 0.1B63 se = 0.0224 # quartile: 95% limits round(quantile(r,c(0.025,0.975)),4) # 2.5% 97.5% # 0.1436 0.2301 # # #normal based 95% confidence limits lower<- rbar-1.96*se upper<- rbar+1.96*se cat("lower = ",round(lower,4)," upper = ",round(upper,4),"\n") # #lower = 0.1424 upper = 0.2301. # #",round(rbar,4)," se = # input data contained in x0 jack<- function(x0,n) { x<- matrix(rep(x0,length(x0)),n,n) matrix(x[row(x)!=col(x)],n-1,n) } n<- 10 x0<- rnorm(n,4,2) jdata<- jack(x0,n) round(x0,2) round(jdata,2) jmean<- apply(jdata,2,mean) cat("mean(jack)=",round(mean(jmean),3),"mean =",round(mean(x0),3),"\n") #mean(jack)= 3.77 mean = 3.77 cat("var =",round(sgrt(sum((n-1)*(jmean-mean(jmean))^2)/n),3), "var =",round(sgrt(var(x0)/n),3),"\n") #var = 0.BB var = 0.B8. sd<- function(x){sqrt(var(x))} n<- 10 x<- rnorm(n) iter<- 2000 temp<- matrix(sample(x,n*iter,replace=T),n,iter) sx<- apply(temp,2,sd) # bias estimate bias.x<-sd(x)-mean(sx) d<- NULL for (i in 1:iter) { xtemp<- sample(atype,n1,replace=T) ytemp<- sample(btype,n2,replace=T) d[i]<- mean(xtemp)-mean(ytemp) } dbar<- mean(d) se<- sqrt(var(d)) # distance from 0 in standard deviations # z<- dbar/se pvalue<- 2*(1-pnorm(abs(z))) cbind(dbar,se,z,pvalue) # 95% limits of mean cholesterol levels # quantile(d,c(0.025,0.975)) # # p<- sum(ifelse(d<0.0,1,0))/iter # previous parametric t-value = 2.562 pvalue<- 1-pt(2.562,3B) cbind(p,pvalue) # #two-sample test-randomiation approach # # atype contains n1=20 observations for type-A individuals # btype contains n2=20 observations for type-B individuals d<- c(atype,btype) # randomization -- null distribution iter<- 5000 difbar<- NULL for (i in 1:iter) { v<- sample(1:length(d),length(atype),replace=F) difbar[i]<- mean(d[v])-mean(d[-v]) } quantile(difbar,seq(0,1,0.1)) dmean<- mean(atype)-mean(btype) p<- sum(ifelse(difbard > mean,1,0))/iter cat("observed difference =",round(dmean,3),"\n", "p-value (randomization test) =",round(p,4),"\n") # read array: bwt, height, weight and gain (columns) indata<- read.table("bwt.data",header=T) # changes data frame into a conventional n by k+1 matrix xtemp<- data.matrix(indata) n<- nrow(xtemp) k<- ncol(xtemp)-1 x<- cbind(rep(1,n),xtemp[,-1]) y<- xtemp[,1] # estimates the regression coefficients b<- solve(t(x)%*%x)%*%t(x)%*%y round(b,4) # calculation of the estimated values yhat<- x%*%b # calculation of the residual values res<- y-yhat quantile(res,c(0,0.25,0.5,0.75,1)) ssr<- sum(res^2) s2<- ssr/(n-k-1) cat("\n","estimated variance of y =",round(s2,4),"\n") # variance/covariance array associated with the estimates b v<- solve(t(x)%*%x)*s2 round(v,B) ssr<- sum(res^2) cat("residual sum of squares =",round(ssr,4),"degrees of freedom",n-k-1,"\n") ss0<- sum((y-mean(y))^2) cat("total sum of squares =",round(ss0,4),"degrees of freedom",n-1,"\n") df1<- n-k-1 f<- ((ss0-ssr)/k)/(ssr/df1) pvalue<-1-pf(f,k,df1) round(cbind(f,pvalue),k) # squared multiple correlations coefficient r2<- cor(yhat,y)^2 cat("\n","squared multiple correlation coefficient =", round(r2,4),"\n") sb<- sqrt(diag(v)) tvalues<- b/sb pvalues<- 2*(1-pt(abs(tvalues),n-k-1)) round(cbind(b,sb,tvalues,pvalues),4) # gdata = data frame with 28 pairs (x=weeks, y=gain) otemp<- nls(~y-a*b^x,gdata,start=list(a=3,b=1)) summary(otemp) read.table("ridge.data",header=T) otemp<- nls(rate~(1+b*msv)*exp(c*age^2),xdata, start=list(b=0.01,c=0.005)) summary(otemp) rvalues<- function(b,c,msv,age){rate-(1+b*msv)*exp(c*age^2)} otemp<- nls(~rvalues(b,c,msv,age),xdata, start=list(b=0.01,c=0.005)) f<- coef(otemp) rate0<- (1+f[1]*msv)*exp(f[2]*age^2) round(t(matrix(rate0,7,8)),2) round(cor(rate0,rate),3) ex<- pyears*rate0/100000 x2<- sum((deaths-ex)^2/ex) pvalue<- 1-pchisq(x2,54) cbind(x2,pvalue) # a0 and m0 = xy-coordinates a0<- seq(min(age),max(age),1) m0<- seq(min(msv),max(msv),1) # surface height = z z<- outer(a0,m0,function(a0,m0){(1+f[1]*m0)*exp(f[2]*a0^2)}) persp(a0,m0,z,xlab="age",ylab="mSv",zlab="rate", zlim=c(0,1500)) title("Nonlinear fit -- Oak Ridge data") otemp0<- ms(~(rate-(1+b*msv)*exp(c*age^2))^2,xdata,start=list(b=0.03,c=0.001)) # exponential distribution "data" n<- 50 x<- rexp(n,2) # maximum likelihood estimate (closed-form) mle.x<-1/mean(x) # ms-estimation edata<- data.frame(x=x) temp<- ms(~(-n*log(u)+u*sum(x)),edata,start=list(u=2)) # equivalently, the sum is implicit temp<- ms((-log(u)+u*x),edata,start=list(u=2)) # standard normal distribution "data" n<- 50 x<- rnorm(n) # maximum likelihood estimates closed-form mean.x<-mean(x) var.x<-((n-1)/n)*var(x) # ms-estimation ndata<- data.frame(x=x) temp<- ms(~(n*log(v)/2+sum(0.5*(x-m)^2/v)),ndata, start=list(m=0,v=1)) # equivalently, the sum is implicit temp<- ms(log(v)/2+0.5*(x-m)^2/v,ndata, start=list(m=0,v=1)) clike<- function(x,n,k,c0,m,s){ z<- (c0-m)/s -(n-k)*log(s)-0.5*sum(((x-m)/s)^2)+k*log(pnorm(z)) } # data x<- c(0.02,0.18,0.06,0.09,0.03,0.04,0.6,0.04,0.02,0.04,0.09,0.10) # number of censored values k<- 5 n<- k+length(x) # limit of detection c0<- 0.015 cat("xbar =",round(mean(x),4),"estimated variance =", round(var(x),4),"\n") #maximum likelihood estimation xin<- data.frame(x=x) est<- ms(-clike(x,n,k,c0,m,s),list(m=mean(x), s=sgrt(var(x))),data=xin) cat("estimated mean =",round(est$parameters[1],4),"\n","estimated standard deviation =", round(est$parameters[2],4),"\n") #coffee consumption data n<- c(41,213,127,142,67,211,133,76) r<- c(9,94,53,60,11,59,53,28) #S-likelihood function like<- function(n,r,a,b1,b2){ x1<- c(0,2,4,5,0,2,4,5) x2<- c(1,1,1,1,0,0,0,0) q<- a+b1*x1+b2*x2 -(sum((n-r)*q)+sum(n*log(1+exp(-q)))) datain<- data.frame(r=r,n=n) ms(~-like(n,r,a,b1,b2),list(a=-2.0,b1=0.5,b2=0.5) data=datain) # repeat of the glm/logistic analysis p<- r/n x1<- c(0,2,4,5,0,2,4,5) x2<- c(1,1,1,1,0,0,0,0) otemp<- glm(p~x1+x2,family=binomial,weights=n) otemp$coefficients