n<-30
x<-rnorm(n) 
se.x<-sqrt(var(x)/n)
cat(" mean = ",round(mean(x),3)," variance = ", round(var(x),3)," se =",round(se.x,3),"\n")

# x contains n = 30 random standard normal values
iter<- 5000
x1<- NULL
for (i in 1:iter) {x1[i]<- mean(x[ceiling(n*runif(n))])}
cat("mean = ",round(mean(x1),4),"standard error = ", round(sqrt(var(x1)),4),"\n")

xtemp<- matrix(sample(x,n*iter,replace=T),n,iter)
x2<- apply(xtemp,2,mean)
cat("mean = ",round(mean(x2),4),"standard error =",  round(sqrt(var(x2)),4),"\n")

# x contains the n sampled data values
temp<- matrix(sample(x,n*iter,replace=T),n,iter)
boot.temp<- apply(temp,2,fcn)
cat("mean = ",round(mean(boot.temp),4),"standard error =",round(sqrt(var(boot.temp)),4),"\n")


# the S-vector d contains the n=9 sample distances
d<- c(43.64,50.67,33.56,27.75,43.35,29.56,38.83,35.95,20.01)
fcn<- function(x){length(x)/sum(1/x)}
iter<- 2000
n<- length(x)
temp<- matrix(sample(d,n*iter,replace=T),n,iter)
boot.hm<- apply(temp,2,fcn)
cat("mean = ",round(mean(boot.hm),4),"standard error = ",round(sqrt(var(boot.hm)),4),"\n")

quantile(boot.hm)
quantile(boot.hm,c(0.025,0.975))


# bootstrap estimates and standard error for relative risk
a<-178
b<- 79
na<- 1589
nb<- 1565
# x = type-A, y = type-B, 0 = no chd and 1 = chd
x<- c(rep(1,a),rep(0,na-a))
y<- c(rep(1,b),rep(0,nb-b))
iter<- 2000
rr<- NULL
for(i in 1:iter) {
	tempx<- x[ceiling(na*runif(na))]
	tempy<- y[ceiling(nb*runif(nb))]
	rr[i]<- (sum(tempx)/na)/(sum(tempy)/nb)
}

mean(rr)
#[1] 2.22345
# 95% confidence intervals
quantile(rr,c(0.025,0.975))
#	2.5% 	97.5%
#
#1.757939	2.877773.
#
#
#
#  MEDIAN
#	parametric estimate
n<- 25
x<- rnorm(n)
median.x<-median(x)
#[1]	-0.099666187
#	estimated variance
med.se<-sqrt(1.571/n)
#[1]	0.2506628
#	bootstrap estimate
iter<- 5000
d<- matrix(sample(x,n*iter,replace=T),n,iter)
xx<- apply(d,2,median)
cat("median = ",round(mean(xx),4),"standard error = ", round(sqrt(var(xx)),4),"\n")
#median = -0.3882 standard error = 0.1786.

bp<-read.table("bp.dat",header=T)
attach(bp)
n<- length(age)
iter<- 2000
age0<- 50
est1<- NULL
est2<- NULL
est3<- NULL
for(i in 1:iter) {
	v<- ceiling(n*runif(n))
	xtemp<- age[v]
	ytemp<- sbp[v]
	b<- lsfit(xtemp,ytemp)$coef
	est1[i]<- b[1]
	est2[i]<- b[2]
	est3[i]<- b[l]+b[2]*age0 
    }

se1<- sqrt(var(est1))
cat("intercept =",round(mean(est1),3)," sel = " round(se1,3),"\n")
#intercept = 93.726 sel = 17.136
se2<- sqrt(var(est2))
cat("slope =",round(mean(est2),3)," round(se2,3),"\n")
#slope = 0.878	se2 = 	0.354
se3<- sqrt(var(est3))
cat("y=sbp at age 50 =",round(mean(est3),3)," se3 = ",round(se3,3),"\n")
#	y=sbp at age 50 = 137.602	se3 =	2.320.


otemp<- glm(sbp~age) 
b<- otemp$coefficients 
e<- sbp-(b[1]+b[2]*age) 
# bootstrap estimates of the slope 
iter<- 2000 
bb<- NULL 
for(i in 1:iter) 
{ 
    v<- ceiling(n*runif(n))
    yy<- b[1]+b[2]*age+e[v] 
    bb[i]<-lsfit(age,y)$coef[2]
}
mean.bb<-mean(bb)
se.bb<-sqrt(var(bb))


# bootstrap estimate and 95% confidence interval for R^2 
# read data from exterior file bwt.data 
dframe<- read.table("bwt.data",header=T) 
attach(dframe) 
x<- cbind(ht,wt,gain) 
y<- bwt 
otemp<- glm(y~x,family=gaussian) 
r2<- cor(predict(otemp),y)^2 
cat("squared multiple correlation coefficient =", round(r2,4),"\n")

#squared multiple correlation coefficient = 0.1B3B 
# bootstrap estimation 
iter<- 1000 
r<- NULL

for (i in 1:iter)
{ 
v<- ceiling(nrow(x)*runif(nrow(x)))
xtemp<- x[v,] 
ytemp<- y[v] 
otemp<- glm(ytemp~xtemp,family=gaussian) 
r[i]<- cor(predict(otemp),ytemp)^2
}

rbar<- mean(r)
se<- sqrt(var(r))
cat(" bootstrap R-squared =",round(se,4),"\n")
#	bootstrap R-squared = 0.1B63	se = 0.0224
# quartile: 95% limits
round(quantile(r,c(0.025,0.975)),4)
#		2.5% 97.5%
#	0.1436 0.2301
#
#	#normal based 95% confidence limits
	lower<- rbar-1.96*se
	upper<- rbar+1.96*se
	cat("lower = ",round(lower,4),"	upper = ",round(upper,4),"\n")
#
#lower =	0.1424	upper =	0.2301.
#
#",round(rbar,4),"	se =

# input data contained in x0 
jack<- function(x0,n) { 
    x<- matrix(rep(x0,length(x0)),n,n) 
    matrix(x[row(x)!=col(x)],n-1,n)
}

n<- 10
x0<- rnorm(n,4,2)
jdata<- jack(x0,n)
round(x0,2)
round(jdata,2)
jmean<- apply(jdata,2,mean)
cat("mean(jack)=",round(mean(jmean),3),"mean =",round(mean(x0),3),"\n")
#mean(jack)= 3.77 mean = 3.77
cat("var =",round(sgrt(sum((n-1)*(jmean-mean(jmean))^2)/n),3), "var =",round(sgrt(var(x0)/n),3),"\n") 
#var = 0.BB var = 0.B8.


sd<- function(x){sqrt(var(x))} 
n<- 10 
x<- rnorm(n) 
iter<- 2000 
temp<- matrix(sample(x,n*iter,replace=T),n,iter) 
sx<- apply(temp,2,sd) 
# bias estimate 
bias.x<-sd(x)-mean(sx) 

d<- NULL
for (i in 1:iter) {
xtemp<- sample(atype,n1,replace=T)
ytemp<- sample(btype,n2,replace=T)
d[i]<- mean(xtemp)-mean(ytemp)
}
 dbar<- mean(d)
 se<- sqrt(var(d))
# distance from 0 in standard deviations
#
z<- dbar/se
pvalue<- 2*(1-pnorm(abs(z)))
cbind(dbar,se,z,pvalue)
# 95% limits of mean cholesterol levels
#
quantile(d,c(0.025,0.975))
#
#
p<- sum(ifelse(d<0.0,1,0))/iter
# previous parametric t-value = 2.562
pvalue<- 1-pt(2.562,3B)
cbind(p,pvalue)
#
#two-sample test-randomiation approach
#
# atype contains n1=20 observations for type-A individuals
# btype contains n2=20 observations for type-B individuals
d<- c(atype,btype)
# randomization -- null distribution
iter<- 5000
difbar<- NULL
for (i in 1:iter) {
v<- sample(1:length(d),length(atype),replace=F)
difbar[i]<- mean(d[v])-mean(d[-v])
}
quantile(difbar,seq(0,1,0.1))
dmean<- mean(atype)-mean(btype)
p<- sum(ifelse(difbard > mean,1,0))/iter
cat("observed difference =",round(dmean,3),"\n",
"p-value (randomization test) =",round(p,4),"\n")

# read array: bwt, height, weight and gain (columns)
indata<- read.table("bwt.data",header=T)
# changes data frame into a conventional n by k+1 matrix
xtemp<- data.matrix(indata)
n<- nrow(xtemp)
k<- ncol(xtemp)-1
x<- cbind(rep(1,n),xtemp[,-1])
y<- xtemp[,1]
# estimates the regression coefficients
b<- solve(t(x)%*%x)%*%t(x)%*%y
round(b,4)

# calculation of the estimated values
yhat<- x%*%b
# calculation of the residual values
res<- y-yhat
quantile(res,c(0,0.25,0.5,0.75,1))

ssr<- sum(res^2)

s2<- ssr/(n-k-1)
cat("\n","estimated variance of y =",round(s2,4),"\n")


# variance/covariance array associated with the estimates b
v<- solve(t(x)%*%x)*s2
round(v,B)


ssr<- sum(res^2)
cat("residual sum of squares =",round(ssr,4),"degrees of freedom",n-k-1,"\n")


ss0<- sum((y-mean(y))^2)
cat("total sum of squares =",round(ss0,4),"degrees of freedom",n-1,"\n")


df1<- n-k-1
f<- ((ss0-ssr)/k)/(ssr/df1)
pvalue<-1-pf(f,k,df1)
round(cbind(f,pvalue),k)

# squared multiple correlations coefficient
r2<- cor(yhat,y)^2
cat("\n","squared multiple correlation coefficient =", round(r2,4),"\n")

sb<- sqrt(diag(v))
tvalues<- b/sb
pvalues<- 2*(1-pt(abs(tvalues),n-k-1))
round(cbind(b,sb,tvalues,pvalues),4)


# gdata = data frame with 28 pairs (x=weeks, y=gain)
otemp<- nls(~y-a*b^x,gdata,start=list(a=3,b=1))
summary(otemp)


read.table("ridge.data",header=T)

otemp<- nls(rate~(1+b*msv)*exp(c*age^2),xdata, start=list(b=0.01,c=0.005))
summary(otemp)

rvalues<- function(b,c,msv,age){rate-(1+b*msv)*exp(c*age^2)}
otemp<- nls(~rvalues(b,c,msv,age),xdata, start=list(b=0.01,c=0.005))

f<- coef(otemp)
rate0<- (1+f[1]*msv)*exp(f[2]*age^2)
round(t(matrix(rate0,7,8)),2)

round(cor(rate0,rate),3)
ex<- pyears*rate0/100000
x2<- sum((deaths-ex)^2/ex)
pvalue<- 1-pchisq(x2,54)
cbind(x2,pvalue)


# a0 and m0 = xy-coordinates
a0<- seq(min(age),max(age),1)
m0<- seq(min(msv),max(msv),1)
# surface height = z
z<- outer(a0,m0,function(a0,m0){(1+f[1]*m0)*exp(f[2]*a0^2)})
persp(a0,m0,z,xlab="age",ylab="mSv",zlab="rate", zlim=c(0,1500))
title("Nonlinear fit -- Oak Ridge data")

otemp0<- ms(~(rate-(1+b*msv)*exp(c*age^2))^2,xdata,start=list(b=0.03,c=0.001))

# exponential distribution "data"
n<- 50
x<- rexp(n,2)
# maximum likelihood estimate (closed-form)
mle.x<-1/mean(x)

# ms-estimation
edata<- data.frame(x=x)
temp<- ms(~(-n*log(u)+u*sum(x)),edata,start=list(u=2))

# equivalently, the sum is implicit
temp<- ms((-log(u)+u*x),edata,start=list(u=2))

# standard normal distribution "data"
n<- 50
x<- rnorm(n)
# maximum likelihood estimates closed-form
mean.x<-mean(x)
var.x<-((n-1)/n)*var(x)
# ms-estimation
ndata<- data.frame(x=x)

temp<- ms(~(n*log(v)/2+sum(0.5*(x-m)^2/v)),ndata, start=list(m=0,v=1))

# equivalently, the sum is implicit

temp<- ms(log(v)/2+0.5*(x-m)^2/v,ndata, start=list(m=0,v=1))

clike<- function(x,n,k,c0,m,s){ 
    z<- (c0-m)/s -(n-k)*log(s)-0.5*sum(((x-m)/s)^2)+k*log(pnorm(z))
}


# data
x<- c(0.02,0.18,0.06,0.09,0.03,0.04,0.6,0.04,0.02,0.04,0.09,0.10)
# number of censored values
k<- 5
n<- k+length(x)
# limit of detection
c0<- 0.015
cat("xbar =",round(mean(x),4),"estimated variance =",
round(var(x),4),"\n")

#maximum likelihood estimation
xin<- data.frame(x=x)
est<- ms(-clike(x,n,k,c0,m,s),list(m=mean(x),
s=sgrt(var(x))),data=xin)
cat("estimated mean =",round(est$parameters[1],4),"\n","estimated standard deviation =", round(est$parameters[2],4),"\n")


#coffee consumption data
n<- c(41,213,127,142,67,211,133,76)
r<- c(9,94,53,60,11,59,53,28)
#S-likelihood function
like<- function(n,r,a,b1,b2){
x1<- c(0,2,4,5,0,2,4,5)
x2<- c(1,1,1,1,0,0,0,0)
q<- a+b1*x1+b2*x2
-(sum((n-r)*q)+sum(n*log(1+exp(-q))))

datain<- data.frame(r=r,n=n)
ms(~-like(n,r,a,b1,b2),list(a=-2.0,b1=0.5,b2=0.5)
data=datain)
# repeat of the glm/logistic analysis
p<- r/n
x1<- c(0,2,4,5,0,2,4,5)
x2<- c(1,1,1,1,0,0,0,0)
otemp<- glm(p~x1+x2,family=binomial,weights=n)
otemp$coefficients