#####################################################################
# weiranalysis.R
# mary.martin@unh.edu
#
# script to merge together chart and weir data, and to
# run regressions on weir pairs for use in gap filling
# also makes lots of diagnostic graphs
#####################################################################

# add setwd here
#setwd("C:/R")

# read in merged QC'd file
dat=read.csv("allweirsthru20140703_QC.csv",na.strings="-9999")

# create posix date columh
dat$DATETIME = as.POSIXct(strptime(dat[,1], "%Y-%m-%d %H:%M:%S"))

# look at data structure and first/last lines
str(dat)
head(dat)
tail(dat)


# parse out all individual weirs
weir1=dat[,c(70,7)]
weir2=dat[,c(70,14)]
weir3=dat[,c(70,21)]
weir4=dat[,c(70,28)]
weir5=dat[,c(70,35)]
weir6=dat[,c(70,42)]
weir7=dat[,c(70,49)]
weir8=dat[,c(70,56)]
weir9=dat[,c(70,63)]

# create a vector of times from the original file
sensortime=dat$DATETIME

############################################################
# chart data
############################################################

# get list of chart files
files <- list.files(pattern=("*charts.out"))

# make name for each chart file
names <- gsub(pattern='\\.out$', '', files, ignore.case=T)

# vector for col headers in interpolated files
names.chart=c("DATETIME","height")

# for each chart file, read, add DATETIME, interpolate to timestamps in sensor file
for( i in 1:length(names)){
	print(i)

	# read in the chart data
	a <- paste(names[i], ' = read.csv( file=\'', files[i], '\', header=F)', sep='')
	eval(parse( text=a ))
	b <- paste(names[i],'$DATETIME =strptime(',names[i],'[,1], \"%Y-%m-%d %H:%M:%S\")',sep="") 
	eval(parse( text=b ))

	# interpolate each chart file to the primary timestamp of sensor file
	print("start approx routine")
	c=paste(names[i],".i = data.frame(approx(as.POSIXct(",names[i],"[,1]),",names[i],"[,2],as.POSIXct(sensortime)))",sep="")
	eval(parse( text = c))
	d=paste("names(",names[i],".i)=c(\"DATETIME\",\"height\")",sep="")
	eval(parse( text = d ))

  }

############################################################
# merge chart with weir data
############################################################
w1.m=merge(weir1charts.i,weir1,by="DATETIME")
w2.m=merge(weir2charts.i,weir2,by="DATETIME")
w3.m=merge(weir3charts.i,weir3,by="DATETIME")
w4.m=merge(weir4charts.i,weir4,by="DATETIME")
w5.m=merge(weir5charts.i,weir5,by="DATETIME")
w6.m=merge(weir6charts.i,weir6,by="DATETIME")
w7.m=merge(weir7charts.i,weir7,by="DATETIME")
w8.m=merge(weir8charts.i,weir8,by="DATETIME")
w9.m=merge(weir9charts.i,weir9,by="DATETIME")

rm(weir1charts.i,weircharts.i,weir3charts.i,weir4charts.i,weir5charts.i,weir6charts.i,weir7charts.i,weir8charts.i,weir9charts.i)
rm(weir1charts,weircharts,weir3charts,weir4charts,weir5charts,weir6charts,weir7charts,weir8charts,weir9charts)

#make plots for each weir showing chart and sensor
pdf("weir_chartsV3.pdf",16,6)

plot(w1.m[,1],w1.m[,2],type='l',col="blue",main="Weir1",xlab="Date",ylab="Height(ft)",lwd=.5, cex=.1)
lines(w1.m[,1],w1.m[,3],col="red",lwd=.5, cex=.1)
legend("topleft",c("chart","sensor"),lty=c(1,1), col=c("blue","red"))

plot(w2.m[,1],w2.m[,2],type='l',col="blue",main="Weir2",xlab="Date",ylab="Height(ft)",lwd=.5)
lines(w2.m[,1],w2.m[,3],col="red",lwd=.5)
legend("topleft",c("chart","sensor"),lty=c(1,1), col=c("blue","red"))

plot(w3.m[,1],w3.m[,2],type='l',col="blue",main="Weir3",xlab="Date",ylab="Height(ft)",lwd=.5)
lines(w3.m[,1],w3.m[,3],col="red",lwd=.5)
legend("topleft",c("chart","sensor"),lty=c(1,1), col=c("blue","red"))

plot(w4.m[,1],w4.m[,2],type='l',col="blue",main="Weir4",xlab="Date",ylab="Height(ft)",lwd=.5)
lines(w4.m[,1],w4.m[,3],col="red",lwd=.5)
legend("topleft",c("chart","sensor"),lty=c(1,1), col=c("blue","red"))

plot(w5.m[,1],w5.m[,2],type='l',col="blue",main="Weir5",xlab="Date",ylab="Height(ft)",lwd=.5)
lines(w5.m[,1],w5.m[,3],col="red",lwd=.5)
legend("topleft",c("chart","sensor"),lty=c(1,1), col=c("blue","red"))

plot(w6.m[,1],w6.m[,2],type='l',col="blue",main="Weir6",xlab="Date",ylab="Height(ft)",lwd=.5)
lines(w6.m[,1],w6.m[,3],col="red",lwd=.5)
legend("topleft",c("chart","sensor"),lty=c(1,1), col=c("blue","red"))

plot(w7.m[,1],w7.m[,2],type='l',col="blue",main="Weir7",xlab="Date",ylab="Height(ft)",lwd=.5)
lines(w7.m[,1],w7.m[,3],col="red",lwd=.5)
legend("topleft",c("chart","sensor"),lty=c(1,1), col=c("blue","red"))

plot(w8.m[,1],w8.m[,2],type='l',col="blue",main="Weir8",xlab="Date",ylab="Height(ft)",lwd=.5)
lines(w8.m[,1],w8.m[,3],col="red",lwd=.5)
legend("topleft",c("chart","sensor"),lty=c(1,1), col=c("blue","red"))

plot(w9.m[,1],w9.m[,2],type='l',col="blue",main="Weir9",xlab="Date",ylab="Height(ft)",lwd=.5)
lines(w9.m[,1],w9.m[,3],col="red",lwd=.5)
legend("topleft",c("chart","sensor"),lty=c(1,1), col=c("blue","red"))

dev.off()


# make plots of weir ht vs maxht
pdf("weirht_vs_weirmaxht.pdf")
plot(dat[,8],dat[,9],main="weir1",xlab="height",ylab="maxheight",cex=.3,col="red")
abline(0,1)
grid()
plot(dat[,15],dat[,16],main="weir2",xlab="height",ylab="maxheight",cex=.3,col="red")
abline(0,13)
grid()
plot(dat[,22],dat[,23],main="weir3",xlab="height",ylab="maxheight",cex=.3,col="red")
abline(0,1)
grid()
plot(dat[,29],dat[,30],main="weir4",xlab="height",ylab="maxheight",cex=.3,col="red")
abline(0,1)
grid()
plot(dat[,36],dat[,37],main="weir5",xlab="height",ylab="maxheight",cex=.3,col="red")
abline(0,1)
grid()
plot(dat[,43],dat[,44],main="weir6",xlab="height",ylab="maxheight",cex=.3,col="red")
abline(0,1)
grid()
plot(dat[,50],dat[,51],main="weir7",xlab="height",ylab="maxheight",cex=.3,col="red")
abline(0,1)
grid()
plot(dat[,57],dat[,58],main="weir8",xlab="height",ylab="maxheight",cex=.3,col="red")
abline(0,1)
grid()
plot(dat[,64],dat[,65],main="weir9",xlab="height",ylab="maxheight",cex=.3,col="red")
abline(0,1)
grid()
dev.off()


############################################################
# run regressions to look for best gap filling candidates
############################################################

# assign each month to a season
# note that 0 denotes all data
seasoncolors=c(1,1,2,2,2,3,3,3,4,4,4,1)

# num will be number of weir that needs to be gap filled
num=c(rep(1,8),rep(2,8),rep(3,8),rep(4,8),rep(5,8),rep(6,8),rep(7,8),rep(8,8),rep(9,8)) 

# numfill will be number of weir to be used for the gap filling
numfill=c(2,3,4,5,6,7,8,9,1,3,4,5,6,7,8,9,1,2,4,5,6,7,8,9,1,2,3,5,6,7,8,9,1,2,3,4,6,7,8,9,1,2,3,4,5,7,8,9,1,2,3,4,5,6,8,9,1,2,3,4,5,6,7,9,1,2,3,4,5,6,7,8)

# modnames are num.numfill and are  used to label models
modnames=c("1.2","1.3","1.4","1.5","1.6","1.7","1.8","1.9","2.1","2.3","2.4","2.5","2.6","2.7","2.8","2.9","3.1","3.2","3.4","3.5","3.6","3.7","3.8","3.9","4.1","4.2","4.3","4.5","4.6","4.7","4.8","4.9","5.1","5.2","5.3","5.4","5.6","5.7","5.8","5.9","6.1","6.2","6.3","6.4","6.5","6.7","6.8","6.9","7.1","7.2","7.3","7.4","7.5","7.6","7.8","7.9","8.1","8.2","8.3","8.4","8.5","8.6","8.7","8.9","9.1","9.2","9.3","9.4","9.5","9.6","9.7","9.8")

# create a dataframe to hold results of models
modeloutput=data.frame(target=numeric(0),fillweir=numeric(0),modelname=character(0),season=numeric(0),coef=numeric(0),slope=numeric(0),rsq=numeric(0),adj.rsq=numeric(0),stringsAsFactors=FALSE)

# begin a model number counter and create %03d format to be used to label files
z=181
index=sprintf("%03d",z)

# turn off scientific notation 
options(scipen=999)

# run regression for weir pairs
# note that R runs out of memory with all models, so I split this here and run twice
#for(i in seq(1,36)){
for(i in seq(37,72)){

  wnum= c()
  wfill=c()

  s=0 # set season to 0 for full  year
  modtype="annual"

  # subset the weir files for weir and fill-weir
  # note - for the annual runs, this could just be a simple assignment
  str=paste("wnum=w",num[i],".m",sep="")
  eval(parse( text = str))
  str2=paste("wfill=w",numfill[i],".m",sep="")
  eval(parse( text = str2))
  print(dim(wnum))
  print(dim(wfill))
  
  # assign
  modelname=paste("lms",num[i],".",numfill[i],".",s,sep="")
  print(modelname)
  
  # run the model
  assign(paste("lms",num[i],".",numfill[i],".",s,sep=""),eval(parse(text=paste("lm(wnum[,3] ~ wfill[,3])",sep="")) )) 

  # pull out info from the model run
  intercept=eval(parse(text=paste("lms",num[i],".",numfill[i],".",s,"$coefficients[1]",sep="")))
  slope=eval(parse(text=paste("lms",num[i],".",numfill[i],".",s,"$coefficients[2]",sep="")))
  rsq=eval(parse(text=paste("summary(lms",num[i],".",numfill[i],".",s,")$r.squared",sep="")))
  adj.rsq=eval(parse(text=paste("summary(lms",num[i],".",numfill[i],".",s,")$adj.r.squared",sep="")))
  pval=eval(parse(text=paste("format.pval(summary(lms",num[i],".",numfill[i],".",s,")$coefficients[2,4])",sep="")))
  
  # print results into the model summary table
  print(index)
  modeloutput[index,1]=num[i]
  modeloutput[index,2]=numfill[i]
  modeloutput[index,3]=as.character(modelname)
  modeloutput[index,4]=s
  modeloutput[index,5]=intercept
  modeloutput[index,6]=slope
  modeloutput[index,7]=rsq
  modeloutput[index,8]=adj.rsq
  #modeloutput[index,9]=pval

  # get cols of data to use below in graphs
  xs=wnum[,3]
  ys=wfill[,3]
 
  # find upper limit to axes using max of both weirs
  axlimit=max(c(xs,ys),na.rm=TRUE)
  
  # set graph format to one per page
  par(mfrow=c(1,1))
  
  # define graph labels
  ylabel=paste("Weir ",num[i]," stage height (ft)",sep="")
  xlabel=paste("Weir ",numfill[i]," stage height (ft)",sep="")
  maintitle=paste("Weir ",num[i]," predicted by ",numfill[i]," Season = ",s,sep="")
  
  # open png device
  png(paste(index,".png",sep=""))

  # scatterplot of the two weirs
  plot(ys,xs,pch=20,cex=.3,xlab=xlabel,ylab=ylabel,xlim=c(0,axlimit),ylim=c(0,axlimit),col=seasoncolors[as.POSIXlt(w1.m$DATETIME)$mon + 1])
  title(main=maintitle)
  abline(0,1)

  # add the regression line and the equations and rsq
  str3=paste("abline(lms",num[i],".",numfill[i],".",s,",col=\"red\")",sep="")
  eval(parse( text = str3))
  mtext(padj=.5,cex=.75,side=3,line=-1,bquote(y== .(slope) * x + .(intercept) ))
  mtext(padj=.5,cex=.75,side=3,line=-2,bquote(Rsq == .(adj.rsq)))

  grid() #  plot grid
  dev.off() # turn off annual scatterplot device
  
  # increment model number counter
  index=sprintf("%03d",as.numeric(index)+1)
   
  # plot full timeline of both weirs 
  pdf(paste("sensor_timeline.",num[i],".",numfill[i],".pdf",sep=""),16,6)
    plot(wnum[,1],wnum[,3],type='l',col="blue",main="Weir1",xlab="Date",ylab="Height(ft)",lwd=.5, cex=.1)
    lines(wfill[,1],wfill[,3],col="red",lwd=.5, cex=.1)
    legend("topleft",c(paste("Weir ",num[i],sep=""),paste("Weir ",numfill[i]),sep=""),lty=c(1,1), col=c("blue","red"))
  dev.off()

  ###################################################################### 
  # now run models for each season
  ###################################################################### 
  
  #set 2x2 plot layout for season loop to follow
  png(paste(index,".png",sep=""))
  par(mfrow=c(2,2))

  # create an inner loop from 1 to 4 to create conditions subsets by season
  # run models for all seasons
  for(s in 1:4){
  
  # get list of months that fall in current season, decrement by one since POSIX month counts from 0
  months=which(seasoncolors==s)-1
  wnum= c()
  wfill=c()
  modtype="seasonal"
  
  # subset the weir files for weir and fill-weir for each season
  # this is a little messy since the conditional statement has to be used 3 times
  # to get the three months in the season, and then the data are concatenated with rbind
  for(c in 1:3){
    print(paste("subsetting ",months[c]))
    str=paste("wnum",c,"=w",num[i],".m[as.POSIXlt(w",num[i],".m$DATETIME)$mon %in% ",months[c],",]",sep="")
    eval(parse( text = str))
    str2=paste("wfill",c,"=w",numfill[i],".m[as.POSIXlt(w",numfill[i],".m$DATETIME)$mon %in% ",months[c],",]",sep="")
    eval(parse( text = str2))
  }

  wnum=rbind(wnum1,wnum2,wnum3)
  wfill=rbind(wfill1,wfill2,wfill3)
  
  print(dim(wnum))
  print(dim(wfill))

  modelname=paste("lms",num[i],".",numfill[i],".",s,sep="")
  print(modelname)

  # run the regression model
  assign(paste("lms",num[i],".",numfill[i],".",s,sep=""),eval(parse(text=paste("lm(wnum[,3] ~ wfill[,3])",sep="")) )) 

  # pull out summary info from the models
  intercept=eval(parse(text=paste("lms",num[i],".",numfill[i],".",s,"$coefficients[1]",sep="")))
  slope=eval(parse(text=paste("lms",num[i],".",numfill[i],".",s,"$coefficients[2]",sep="")))
  rsq=eval(parse(text=paste("summary(lms",num[i],".",numfill[i],".",s,")$r.squared",sep="")))
  adj.rsq=eval(parse(text=paste("summary(lms",num[i],".",numfill[i],".",s,")$adj.r.squared",sep="")))
  pval=eval(parse(text=paste("format.pval(summary(lms",num[i],".",numfill[i],".",s,")$coefficients[2,4])",sep="")))

  print(index)
  # print values to model summary output table
  modeloutput[index,1]=num[i]
  modeloutput[index,2]=numfill[i]
  modeloutput[index,3]=as.character(modelname)
  modeloutput[index,4]=s
  modeloutput[index,5]=intercept
  modeloutput[index,6]=slope
  modeloutput[index,7]=rsq
  modeloutput[index,8]=adj.rsq
  #modeloutput[index,9]=pval
  
  # get cols of data to use below in graphs
  xs=wnum[,3]
  ys=wfill[,3]
  axlimit=max(c(xs,ys),na.rm=TRUE)
 
  # define graph labels
  ylabel=paste("Weir ",num[i]," stage height (ft)",sep="")
  xlabel=paste("Weir ",numfill[i]," stage height (ft)",sep="")
  maintitle=paste("Weir ",num[i]," predicted by ",numfill[i]," Season = ",s,sep="")

  # scatterplot of the two weirs
  plot(ys,xs,pch=20,cex=.3,xlab=xlabel,ylab=ylabel,xlim=c(0,axlimit),ylim=c(0,axlimit),col=s)
  title(main=maintitle)
  abline(0,1)

  # add the regression line and the equations and rsq
  str3=paste("abline(lms",num[i],".",numfill[i],".",s,",col=\"red\")",sep="")
  eval(parse( text = str3))
  mtext(padj=.5,cex=.75,side=3,line=-1,bquote(y== .(slope) * x + .(intercept) ))
  mtext(padj=.5,cex=.75,side=3,line=-2,bquote(Rsq == .(adj.rsq)))

  grid()

  # increment model number counter
  index=sprintf("%03d",as.numeric(index)+1)


 } #end season loop
dev.off() # close the device with the 4 seasonal graphs on a single page

}# end model loop

# write out the model summary data
# NOTE - change this from PART1 to PART 2 when splitting up the 360 models
#write.csv(modeloutput,"model_summary_table_PART1.csv", row.names=FALSE)
write.csv(modeloutput,"model_summary_table_PART2.csv", row.names=FALSE)