##################################################################### # weiranalysis.R # mary.martin@unh.edu # # script to merge together chart and weir data, and to # run regressions on weir pairs for use in gap filling # also makes lots of diagnostic graphs ##################################################################### # add setwd here #setwd("C:/R") # read in merged QC'd file dat=read.csv("allweirsthru20140703_QC.csv",na.strings="-9999") # create posix date columh dat$DATETIME = as.POSIXct(strptime(dat[,1], "%Y-%m-%d %H:%M:%S")) # look at data structure and first/last lines str(dat) head(dat) tail(dat) # parse out all individual weirs weir1=dat[,c(70,7)] weir2=dat[,c(70,14)] weir3=dat[,c(70,21)] weir4=dat[,c(70,28)] weir5=dat[,c(70,35)] weir6=dat[,c(70,42)] weir7=dat[,c(70,49)] weir8=dat[,c(70,56)] weir9=dat[,c(70,63)] # create a vector of times from the original file sensortime=dat$DATETIME ############################################################ # chart data ############################################################ # get list of chart files files <- list.files(pattern=("*charts.out")) # make name for each chart file names <- gsub(pattern='\\.out$', '', files, ignore.case=T) # vector for col headers in interpolated files names.chart=c("DATETIME","height") # for each chart file, read, add DATETIME, interpolate to timestamps in sensor file for( i in 1:length(names)){ print(i) # read in the chart data a <- paste(names[i], ' = read.csv( file=\'', files[i], '\', header=F)', sep='') eval(parse( text=a )) b <- paste(names[i],'$DATETIME =strptime(',names[i],'[,1], \"%Y-%m-%d %H:%M:%S\")',sep="") eval(parse( text=b )) # interpolate each chart file to the primary timestamp of sensor file print("start approx routine") c=paste(names[i],".i = data.frame(approx(as.POSIXct(",names[i],"[,1]),",names[i],"[,2],as.POSIXct(sensortime)))",sep="") eval(parse( text = c)) d=paste("names(",names[i],".i)=c(\"DATETIME\",\"height\")",sep="") eval(parse( text = d )) } ############################################################ # merge chart with weir data ############################################################ w1.m=merge(weir1charts.i,weir1,by="DATETIME") w2.m=merge(weir2charts.i,weir2,by="DATETIME") w3.m=merge(weir3charts.i,weir3,by="DATETIME") w4.m=merge(weir4charts.i,weir4,by="DATETIME") w5.m=merge(weir5charts.i,weir5,by="DATETIME") w6.m=merge(weir6charts.i,weir6,by="DATETIME") w7.m=merge(weir7charts.i,weir7,by="DATETIME") w8.m=merge(weir8charts.i,weir8,by="DATETIME") w9.m=merge(weir9charts.i,weir9,by="DATETIME") rm(weir1charts.i,weircharts.i,weir3charts.i,weir4charts.i,weir5charts.i,weir6charts.i,weir7charts.i,weir8charts.i,weir9charts.i) rm(weir1charts,weircharts,weir3charts,weir4charts,weir5charts,weir6charts,weir7charts,weir8charts,weir9charts) #make plots for each weir showing chart and sensor pdf("weir_chartsV3.pdf",16,6) plot(w1.m[,1],w1.m[,2],type='l',col="blue",main="Weir1",xlab="Date",ylab="Height(ft)",lwd=.5, cex=.1) lines(w1.m[,1],w1.m[,3],col="red",lwd=.5, cex=.1) legend("topleft",c("chart","sensor"),lty=c(1,1), col=c("blue","red")) plot(w2.m[,1],w2.m[,2],type='l',col="blue",main="Weir2",xlab="Date",ylab="Height(ft)",lwd=.5) lines(w2.m[,1],w2.m[,3],col="red",lwd=.5) legend("topleft",c("chart","sensor"),lty=c(1,1), col=c("blue","red")) plot(w3.m[,1],w3.m[,2],type='l',col="blue",main="Weir3",xlab="Date",ylab="Height(ft)",lwd=.5) lines(w3.m[,1],w3.m[,3],col="red",lwd=.5) legend("topleft",c("chart","sensor"),lty=c(1,1), col=c("blue","red")) plot(w4.m[,1],w4.m[,2],type='l',col="blue",main="Weir4",xlab="Date",ylab="Height(ft)",lwd=.5) lines(w4.m[,1],w4.m[,3],col="red",lwd=.5) legend("topleft",c("chart","sensor"),lty=c(1,1), col=c("blue","red")) plot(w5.m[,1],w5.m[,2],type='l',col="blue",main="Weir5",xlab="Date",ylab="Height(ft)",lwd=.5) lines(w5.m[,1],w5.m[,3],col="red",lwd=.5) legend("topleft",c("chart","sensor"),lty=c(1,1), col=c("blue","red")) plot(w6.m[,1],w6.m[,2],type='l',col="blue",main="Weir6",xlab="Date",ylab="Height(ft)",lwd=.5) lines(w6.m[,1],w6.m[,3],col="red",lwd=.5) legend("topleft",c("chart","sensor"),lty=c(1,1), col=c("blue","red")) plot(w7.m[,1],w7.m[,2],type='l',col="blue",main="Weir7",xlab="Date",ylab="Height(ft)",lwd=.5) lines(w7.m[,1],w7.m[,3],col="red",lwd=.5) legend("topleft",c("chart","sensor"),lty=c(1,1), col=c("blue","red")) plot(w8.m[,1],w8.m[,2],type='l',col="blue",main="Weir8",xlab="Date",ylab="Height(ft)",lwd=.5) lines(w8.m[,1],w8.m[,3],col="red",lwd=.5) legend("topleft",c("chart","sensor"),lty=c(1,1), col=c("blue","red")) plot(w9.m[,1],w9.m[,2],type='l',col="blue",main="Weir9",xlab="Date",ylab="Height(ft)",lwd=.5) lines(w9.m[,1],w9.m[,3],col="red",lwd=.5) legend("topleft",c("chart","sensor"),lty=c(1,1), col=c("blue","red")) dev.off() # make plots of weir ht vs maxht pdf("weirht_vs_weirmaxht.pdf") plot(dat[,8],dat[,9],main="weir1",xlab="height",ylab="maxheight",cex=.3,col="red") abline(0,1) grid() plot(dat[,15],dat[,16],main="weir2",xlab="height",ylab="maxheight",cex=.3,col="red") abline(0,13) grid() plot(dat[,22],dat[,23],main="weir3",xlab="height",ylab="maxheight",cex=.3,col="red") abline(0,1) grid() plot(dat[,29],dat[,30],main="weir4",xlab="height",ylab="maxheight",cex=.3,col="red") abline(0,1) grid() plot(dat[,36],dat[,37],main="weir5",xlab="height",ylab="maxheight",cex=.3,col="red") abline(0,1) grid() plot(dat[,43],dat[,44],main="weir6",xlab="height",ylab="maxheight",cex=.3,col="red") abline(0,1) grid() plot(dat[,50],dat[,51],main="weir7",xlab="height",ylab="maxheight",cex=.3,col="red") abline(0,1) grid() plot(dat[,57],dat[,58],main="weir8",xlab="height",ylab="maxheight",cex=.3,col="red") abline(0,1) grid() plot(dat[,64],dat[,65],main="weir9",xlab="height",ylab="maxheight",cex=.3,col="red") abline(0,1) grid() dev.off() ############################################################ # run regressions to look for best gap filling candidates ############################################################ # assign each month to a season # note that 0 denotes all data seasoncolors=c(1,1,2,2,2,3,3,3,4,4,4,1) # num will be number of weir that needs to be gap filled num=c(rep(1,8),rep(2,8),rep(3,8),rep(4,8),rep(5,8),rep(6,8),rep(7,8),rep(8,8),rep(9,8)) # numfill will be number of weir to be used for the gap filling numfill=c(2,3,4,5,6,7,8,9,1,3,4,5,6,7,8,9,1,2,4,5,6,7,8,9,1,2,3,5,6,7,8,9,1,2,3,4,6,7,8,9,1,2,3,4,5,7,8,9,1,2,3,4,5,6,8,9,1,2,3,4,5,6,7,9,1,2,3,4,5,6,7,8) # modnames are num.numfill and are used to label models modnames=c("1.2","1.3","1.4","1.5","1.6","1.7","1.8","1.9","2.1","2.3","2.4","2.5","2.6","2.7","2.8","2.9","3.1","3.2","3.4","3.5","3.6","3.7","3.8","3.9","4.1","4.2","4.3","4.5","4.6","4.7","4.8","4.9","5.1","5.2","5.3","5.4","5.6","5.7","5.8","5.9","6.1","6.2","6.3","6.4","6.5","6.7","6.8","6.9","7.1","7.2","7.3","7.4","7.5","7.6","7.8","7.9","8.1","8.2","8.3","8.4","8.5","8.6","8.7","8.9","9.1","9.2","9.3","9.4","9.5","9.6","9.7","9.8") # create a dataframe to hold results of models modeloutput=data.frame(target=numeric(0),fillweir=numeric(0),modelname=character(0),season=numeric(0),coef=numeric(0),slope=numeric(0),rsq=numeric(0),adj.rsq=numeric(0),stringsAsFactors=FALSE) # begin a model number counter and create %03d format to be used to label files z=181 index=sprintf("%03d",z) # turn off scientific notation options(scipen=999) # run regression for weir pairs # note that R runs out of memory with all models, so I split this here and run twice #for(i in seq(1,36)){ for(i in seq(37,72)){ wnum= c() wfill=c() s=0 # set season to 0 for full year modtype="annual" # subset the weir files for weir and fill-weir # note - for the annual runs, this could just be a simple assignment str=paste("wnum=w",num[i],".m",sep="") eval(parse( text = str)) str2=paste("wfill=w",numfill[i],".m",sep="") eval(parse( text = str2)) print(dim(wnum)) print(dim(wfill)) # assign modelname=paste("lms",num[i],".",numfill[i],".",s,sep="") print(modelname) # run the model assign(paste("lms",num[i],".",numfill[i],".",s,sep=""),eval(parse(text=paste("lm(wnum[,3] ~ wfill[,3])",sep="")) )) # pull out info from the model run intercept=eval(parse(text=paste("lms",num[i],".",numfill[i],".",s,"$coefficients[1]",sep=""))) slope=eval(parse(text=paste("lms",num[i],".",numfill[i],".",s,"$coefficients[2]",sep=""))) rsq=eval(parse(text=paste("summary(lms",num[i],".",numfill[i],".",s,")$r.squared",sep=""))) adj.rsq=eval(parse(text=paste("summary(lms",num[i],".",numfill[i],".",s,")$adj.r.squared",sep=""))) pval=eval(parse(text=paste("format.pval(summary(lms",num[i],".",numfill[i],".",s,")$coefficients[2,4])",sep=""))) # print results into the model summary table print(index) modeloutput[index,1]=num[i] modeloutput[index,2]=numfill[i] modeloutput[index,3]=as.character(modelname) modeloutput[index,4]=s modeloutput[index,5]=intercept modeloutput[index,6]=slope modeloutput[index,7]=rsq modeloutput[index,8]=adj.rsq #modeloutput[index,9]=pval # get cols of data to use below in graphs xs=wnum[,3] ys=wfill[,3] # find upper limit to axes using max of both weirs axlimit=max(c(xs,ys),na.rm=TRUE) # set graph format to one per page par(mfrow=c(1,1)) # define graph labels ylabel=paste("Weir ",num[i]," stage height (ft)",sep="") xlabel=paste("Weir ",numfill[i]," stage height (ft)",sep="") maintitle=paste("Weir ",num[i]," predicted by ",numfill[i]," Season = ",s,sep="") # open png device png(paste(index,".png",sep="")) # scatterplot of the two weirs plot(ys,xs,pch=20,cex=.3,xlab=xlabel,ylab=ylabel,xlim=c(0,axlimit),ylim=c(0,axlimit),col=seasoncolors[as.POSIXlt(w1.m$DATETIME)$mon + 1]) title(main=maintitle) abline(0,1) # add the regression line and the equations and rsq str3=paste("abline(lms",num[i],".",numfill[i],".",s,",col=\"red\")",sep="") eval(parse( text = str3)) mtext(padj=.5,cex=.75,side=3,line=-1,bquote(y== .(slope) * x + .(intercept) )) mtext(padj=.5,cex=.75,side=3,line=-2,bquote(Rsq == .(adj.rsq))) grid() # plot grid dev.off() # turn off annual scatterplot device # increment model number counter index=sprintf("%03d",as.numeric(index)+1) # plot full timeline of both weirs pdf(paste("sensor_timeline.",num[i],".",numfill[i],".pdf",sep=""),16,6) plot(wnum[,1],wnum[,3],type='l',col="blue",main="Weir1",xlab="Date",ylab="Height(ft)",lwd=.5, cex=.1) lines(wfill[,1],wfill[,3],col="red",lwd=.5, cex=.1) legend("topleft",c(paste("Weir ",num[i],sep=""),paste("Weir ",numfill[i]),sep=""),lty=c(1,1), col=c("blue","red")) dev.off() ###################################################################### # now run models for each season ###################################################################### #set 2x2 plot layout for season loop to follow png(paste(index,".png",sep="")) par(mfrow=c(2,2)) # create an inner loop from 1 to 4 to create conditions subsets by season # run models for all seasons for(s in 1:4){ # get list of months that fall in current season, decrement by one since POSIX month counts from 0 months=which(seasoncolors==s)-1 wnum= c() wfill=c() modtype="seasonal" # subset the weir files for weir and fill-weir for each season # this is a little messy since the conditional statement has to be used 3 times # to get the three months in the season, and then the data are concatenated with rbind for(c in 1:3){ print(paste("subsetting ",months[c])) str=paste("wnum",c,"=w",num[i],".m[as.POSIXlt(w",num[i],".m$DATETIME)$mon %in% ",months[c],",]",sep="") eval(parse( text = str)) str2=paste("wfill",c,"=w",numfill[i],".m[as.POSIXlt(w",numfill[i],".m$DATETIME)$mon %in% ",months[c],",]",sep="") eval(parse( text = str2)) } wnum=rbind(wnum1,wnum2,wnum3) wfill=rbind(wfill1,wfill2,wfill3) print(dim(wnum)) print(dim(wfill)) modelname=paste("lms",num[i],".",numfill[i],".",s,sep="") print(modelname) # run the regression model assign(paste("lms",num[i],".",numfill[i],".",s,sep=""),eval(parse(text=paste("lm(wnum[,3] ~ wfill[,3])",sep="")) )) # pull out summary info from the models intercept=eval(parse(text=paste("lms",num[i],".",numfill[i],".",s,"$coefficients[1]",sep=""))) slope=eval(parse(text=paste("lms",num[i],".",numfill[i],".",s,"$coefficients[2]",sep=""))) rsq=eval(parse(text=paste("summary(lms",num[i],".",numfill[i],".",s,")$r.squared",sep=""))) adj.rsq=eval(parse(text=paste("summary(lms",num[i],".",numfill[i],".",s,")$adj.r.squared",sep=""))) pval=eval(parse(text=paste("format.pval(summary(lms",num[i],".",numfill[i],".",s,")$coefficients[2,4])",sep=""))) print(index) # print values to model summary output table modeloutput[index,1]=num[i] modeloutput[index,2]=numfill[i] modeloutput[index,3]=as.character(modelname) modeloutput[index,4]=s modeloutput[index,5]=intercept modeloutput[index,6]=slope modeloutput[index,7]=rsq modeloutput[index,8]=adj.rsq #modeloutput[index,9]=pval # get cols of data to use below in graphs xs=wnum[,3] ys=wfill[,3] axlimit=max(c(xs,ys),na.rm=TRUE) # define graph labels ylabel=paste("Weir ",num[i]," stage height (ft)",sep="") xlabel=paste("Weir ",numfill[i]," stage height (ft)",sep="") maintitle=paste("Weir ",num[i]," predicted by ",numfill[i]," Season = ",s,sep="") # scatterplot of the two weirs plot(ys,xs,pch=20,cex=.3,xlab=xlabel,ylab=ylabel,xlim=c(0,axlimit),ylim=c(0,axlimit),col=s) title(main=maintitle) abline(0,1) # add the regression line and the equations and rsq str3=paste("abline(lms",num[i],".",numfill[i],".",s,",col=\"red\")",sep="") eval(parse( text = str3)) mtext(padj=.5,cex=.75,side=3,line=-1,bquote(y== .(slope) * x + .(intercept) )) mtext(padj=.5,cex=.75,side=3,line=-2,bquote(Rsq == .(adj.rsq))) grid() # increment model number counter index=sprintf("%03d",as.numeric(index)+1) } #end season loop dev.off() # close the device with the 4 seasonal graphs on a single page }# end model loop # write out the model summary data # NOTE - change this from PART1 to PART 2 when splitting up the 360 models #write.csv(modeloutput,"model_summary_table_PART1.csv", row.names=FALSE) write.csv(modeloutput,"model_summary_table_PART2.csv", row.names=FALSE)