1 + 2 x <- c(3,4,45,56,7,7) x x x * 3 mydata <- read.csv("c:/kewoo/eai/d20171024.0930-1055.AcurityConnector.with-header.csv", header=TRUE) chron library(chron)head(mydata) install.packages("chron") install.packages("chron") head(mydata) head(mydata,2) str(mydata) install.packages("psych") library(chron) .libPaths() library(tidyverse) library(lubridate) library(tidyverse) rm(x) x <- rnorm(50) y <- rnorm(x) plot(x,y) ls() rm(x,y) x <- 1:20 w <- 1 + sqrt(x)/2 dummy <- data.frame(x=x, y= x + rnorm(x)*w) dummy fm <- lm(y ~ x, data=dummy) summary(fm) fm1 <- lm(y ~ x, data=dummy, weight=1/w^2) summary(fm1) attach(dummy) attach(fm1) deatch(fm1) detach(fm1) dummy xdummy <- data.frame(x1=x, y1=x + rnorm(x)*w) attach(xdummy) rm(xdummy) detach(xdummy) detach(dummy) rm(dummy) rm(xdummy) dummy dummy <- data.frame(x=x1, y1 = x + rnorm(x)*w) attach(dummy) plot(fitted(fm), resid(fm), xlab="Fitted values", ylab="Residuals", main="Residuals vs Fitted") abline(coef(fm)) plot(x, y) lrf <- lowess(x, y) lines(x, lrf$y) abline(0, 1, lty=3) abline(coef(fm)) abline(coef(fm1), col = "red") detach() # asd sa # section 2 filepath <- system.file("data", "morley.tab" , package="datasets") filepath file.show(filepath) mm <- read.table(filepath) mm mm$Expt <- factor(mm$Expt) mm$Run <- factor(mm$Run) atatch(mm) attach(mm) plot(Expt, Speed, main="Speed of Light Data", xlab="Experiment No.") fm <- aov(Speed ~ Run + Expt, data=mm) summary(fm) fm0 <- update(fm, . ~ . - Run) anova(fm0, fm) detach() rm(fm, fm0) pi e x <- seq(-pi, pi, len=50) x <- x y <- x f <- outer(x,y, function(x,y) cos(y)/(1 + x^2)) f oldpar <- par(no.readonly = TRUE) par(pty="s") contour(x, y, f) contour(x, y, f, nlevels=15, add=TRUE) fa <- (f-t(f))/2 contour(x, y, fa, nlevels=15) par(oldpar) image(x, y, f) image(x, y, fa) objects() objects(); rm(x, y, f, fa) th <- seq(-pi, pi, len=100) z <- exp(1i*th) par(pty="s") plot(z, type="l") w <- rnorm(100) + rnorm(100)*1i w <- ifelse(Mod(w) > 1, 1/w, w) plot(w, xlim=c(-1,1), ylim=c(-1,1), pch="+",xlab="x", ylab="y") lines(z) w <- sqrt(runif(100))*exp(2*pi*runif(100)*1i) plot(w, xlim=c(-1,1), ylim=c(-1,1), pch="+", xlab="x", ylab="y") lines(z) rm(th, w, z) q() x <- read.table(file = "clipboard", sep="\t", header=TRUE, stringsAsFactor=FALSE) df1.zoo<-zoo(data[,-1],data[,1]) df2 <- as.data.frame(as.zoo(merge(as.xts(df1.zoo), as.xts(zoo(,seq(start(df1.zoo),end(df1.zoo),by=10)))))) data.length <- length(data$TIME) time.min <- data$TIME[1] time.max <- data$TIME[data.length] all.dates <- seq(time.min, time.max, by="min") mutate(data, TIME= strptime(TIME, format= "%Y-%m%-%d %H:%M:%s")) data start <- now() seq(start, start + days(3), by = "15 min") start <- now() seq(start, start + days(3), by = "15 min") # 2017-11-01: graph exceptionRec count after adding extra data library(tidyverse) library(lubridate) library(xts) library(zoo) library(ggplot2) x <- read.table(file = "c:/kewoo/eai/exceptionRec.counts.txt", sep="\t", header=TRUE, stringsAsFactor=FALSE) vdata <- mutate(x, ts= ymd_hms(ts)) plot(x=data$ts, y=data$exceptionrec_ct) qplot(ts, exceptionrec_ct, data=vdata) df1.zoo<-zoo(vdata[,-1],vdata[,1]) xts01 <- merge(as.xts(df1.zoo), as.xts(zoo(,seq(start(df1.zoo),end(df1.zoo),by=10)))) xts01[is.na(xts01)] <- 0 autoplot(xts01) plot(xts01) # INTERESTING: from https://stackoverflow.com/questions/3386850/how-can-i-change-xts-to-data-frame-and-keep-index-in-r index(xts01) coredata(xts01) fxts01 <- fortify(xts01) barplot(fxts01$as.xts.df1.zoo.,names.arg=fxts01$Index) ## START LEGACY CURIOSITY df2 <- as.data.frame(as.zoo(merge(as.xts(df1.zoo), as.xts(zoo(,seq(start(df1.zoo),end(df1.zoo),by=10)))))) df2 df2[is.na(df2)] <- 0 plot(df2$ts, df2$exceptionrec_ct) class(df2$ts) str(df2) df2 View View(df2) ## END LEGACY CURIOSITY: CARRY ON ## 2017-11-02: C:/kewoo/eai/d20171019.0950-1200.allEAI.csv using tidyverse ## OLD WAY rd20171019 <- read.table(file = "C:/kewoo/eai/d20171019.0950-1200.allEAI.csv", sep="\t", header=TRUE, stringsAsFactor=FALSE) rd20171019 <- read_csv("C:/kewoo/eai/d20171019.0950-1200.allEAI.csv") plot(rd20171024$start, rd20171024$duration_ms) qplot(`start`, duration_ms, data=rd20171024) hist(rd20171024$start, "mins", format = "%H:%M") ggplot(rd20171024, aes(interval(start)))1 + 2 x <- c(3,4,45,56,7,7) x x x * 3 mydata <- read.csv("c:/kewoo/eai/d20171024.0930-1055.AcurityConnector.with-header.csv", header=TRUE) chron library(chron)head(mydata) install.packages("chron") install.packages("chron") head(mydata) head(mydata,2) str(mydata) install.packages("psych") library(chron) .libPaths() library(tidyverse) library(lubridate) library(tidyverse) rm(x) x <- rnorm(50) y <- rnorm(x) plot(x,y) ls() rm(x,y) x <- 1:20 w <- 1 + sqrt(x)/2 dummy <- data.frame(x=x, y= x + rnorm(x)*w) dummy fm <- lm(y ~ x, data=dummy) summary(fm) fm1 <- lm(y ~ x, data=dummy, weight=1/w^2) summary(fm1) attach(dummy) attach(fm1) deatch(fm1) detach(fm1) dummy xdummy <- data.frame(x1=x, y1=x + rnorm(x)*w) attach(xdummy) rm(xdummy) detach(xdummy) detach(dummy) rm(dummy) rm(xdummy) dummy dummy <- data.frame(x=x1, y1 = x + rnorm(x)*w) attach(dummy) plot(fitted(fm), resid(fm), xlab="Fitted values", ylab="Residuals", main="Residuals vs Fitted") abline(coef(fm)) plot(x, y) lrf <- lowess(x, y) lines(x, lrf$y) abline(0, 1, lty=3) abline(coef(fm)) abline(coef(fm1), col = "red") detach() # asd sa # section 2 filepath <- system.file("data", "morley.tab" , package="datasets") filepath file.show(filepath) mm <- read.table(filepath) mm mm$Expt <- factor(mm$Expt) mm$Run <- factor(mm$Run) atatch(mm) attach(mm) plot(Expt, Speed, main="Speed of Light Data", xlab="Experiment No.") fm <- aov(Speed ~ Run + Expt, data=mm) summary(fm) fm0 <- update(fm, . ~ . - Run) anova(fm0, fm) detach() rm(fm, fm0) pi e x <- seq(-pi, pi, len=50) x <- x y <- x f <- outer(x,y, function(x,y) cos(y)/(1 + x^2)) f oldpar <- par(no.readonly = TRUE) par(pty="s") contour(x, y, f) contour(x, y, f, nlevels=15, add=TRUE) fa <- (f-t(f))/2 contour(x, y, fa, nlevels=15) par(oldpar) image(x, y, f) image(x, y, fa) objects() objects(); rm(x, y, f, fa) th <- seq(-pi, pi, len=100) z <- exp(1i*th) par(pty="s") plot(z, type="l") w <- rnorm(100) + rnorm(100)*1i w <- ifelse(Mod(w) > 1, 1/w, w) plot(w, xlim=c(-1,1), ylim=c(-1,1), pch="+",xlab="x", ylab="y") lines(z) w <- sqrt(runif(100))*exp(2*pi*runif(100)*1i) plot(w, xlim=c(-1,1), ylim=c(-1,1), pch="+", xlab="x", ylab="y") lines(z) rm(th, w, z) q() x <- read.table(file = "clipboard", sep="\t", header=TRUE, stringsAsFactor=FALSE) df1.zoo<-zoo(data[,-1],data[,1]) df2 <- as.data.frame(as.zoo(merge(as.xts(df1.zoo), as.xts(zoo(,seq(start(df1.zoo),end(df1.zoo),by=10)))))) data.length <- length(data$TIME) time.min <- data$TIME[1] time.max <- data$TIME[data.length] all.dates <- seq(time.min, time.max, by="min") mutate(data, TIME= strptime(TIME, format= "%Y-%m%-%d %H:%M:%s")) data start <- now() seq(start, start + days(3), by = "15 min") start <- now() seq(start, start + days(3), by = "15 min") # 2017-11-01: graph exceptionRec count after adding extra data library(tidyverse) library(lubridate) library(xts) library(zoo) library(ggplot2) x <- read.table(file = "c:/kewoo/eai/exceptionRec.counts.txt", sep="\t", header=TRUE, stringsAsFactor=FALSE) vdata <- mutate(x, ts= ymd_hms(ts)) plot(x=data$ts, y=data$exceptionrec_ct) qplot(ts, exceptionrec_ct, data=vdata) df1.zoo<-zoo(vdata[,-1],vdata[,1]) xts01 <- merge(as.xts(df1.zoo), as.xts(zoo(,seq(start(df1.zoo),end(df1.zoo),by=10)))) xts01[is.na(xts01)] <- 0 autoplot(xts01) plot(xts01) # INTERESTING: from https://stackoverflow.com/questions/3386850/how-can-i-change-xts-to-data-frame-and-keep-index-in-r index(xts01) coredata(xts01) fxts01 <- fortify(xts01) barplot(fxts01$as.xts.df1.zoo.,names.arg=fxts01$Index) ## START LEGACY CURIOSITY df2 <- as.data.frame(as.zoo(merge(as.xts(df1.zoo), as.xts(zoo(,seq(start(df1.zoo),end(df1.zoo),by=10)))))) df2 df2[is.na(df2)] <- 0 plot(df2$ts, df2$exceptionrec_ct) class(df2$ts) str(df2) df2 View View(df2) ## END LEGACY CURIOSITY: CARRY ON ## 2017-11-02: C:/kewoo/eai/d20171019.0950-1200.allEAI.csv using tidyverse ## OLD WAY rd20171019 <- read.table(file = "C:/kewoo/eai/d20171019.0950-1200.allEAI.csv", sep="\t", header=TRUE, stringsAsFactor=FALSE) # rd20171019 <- read_csv("C:/kewoo/eai/d20171019.0950-1200.allEAI.csv") rd20171024 <- read_csv("C:/kewoo/eai/d20171024.0930-1055.allEAI.csv") plot(rd20171024$start, rd20171024$duration_ms) qplot(`start`, duration_ms, data=rd20171024) hist(rd20171024$start, "mins", format = "%H:%M") ggplot(rd20171024, aes(start)) + geom_histogram(binwidth=5) # can change width unlike base::hist ggplot(rd20171024, aes(interval(rd20171024$start, rd20171024$endt))) +geom_histogram(binwidth=1) # can change width unlike base::hist interval(rd20171024$start, rd20171024$endt) # something to match the simul.jy #transmute(rd20171024, # ts = ??? 10-sec-interval times ???, # inflight_count = ??? count( ts %within% interval(rd20171024$start, rd20171024$endt) ???) # 2017-11-03 tspan01 <- seq(min(rd20171024$start), max(rd20171024$endt), by=1000) intervals01 <- interval(tspan01, tspan01 + 1000) sum(int_overlaps(intervals01[1], interval(rd20171024$start, rd20171024$endt))) overlap_count01 <- sum(int_overlaps(intervals01, interval(rd20171024$start, rd20171024$endt))) # TODO: check https://stackoverflow.com/questions/28195996/count-number-of-rows-matching-a-criteria # https://www.statmethods.net/input/contents.html fx01 <- function(arg1) sum(int_overlaps(arg1, interval(rd20171024$start, rd20171024$endt))) tspan01 <- seq(min(rd20171024$start), max(rd20171024$endt), by=10) counts01 <- lapply(intervals01, fx01) # the counts are very high because rd20171024 includes all EAI transactions from 0930-1055 plot(tspan01, counts01) fx02 <- function(arg1) sum(int_overlaps(arg1, interval(rd20171024[grep("AcurityConnector", rd20171024$componentname),]$start, rd20171024[grep("AcurityConnector", rd20171024$componentname),]$endt))) tspan01 <- seq(min(rd20171024$start), max(rd20171024$endt), by=60) intervals01 <- interval(tspan01, tspan01 + 60) counts01 <- lapply(intervals01, fx02) plot(tspan01, counts01) fx03 <- function(arg1) sum(int_overlaps(arg1, interval(rd20171024.AC$start, rd20171024.AC$endt))) tspan03 <- seq(min(rd20171024.AC$start), max(rd20171024.AC$endt), by=10) intervals03 <- interval(tspan03, tspan03 + 10) counts03 <- lapply(intervals03, fx03) plot(tspan03, counts03) rd20171024.AC <- filter(rd20171024, str_detect(componentname, "AcurityConnector")) rd20171024.ACwithIntervals <- mutate(rd20171024.AC, cInt01 = interval(start, endt)) fx04 <- function(arg1) sum(int_overlaps(arg1, rd20171024.ACwithIntervals$cInt01)) tspan04 <- seq(min(rd20171024.ACwithIntervals$start), max(rd20171024.ACwithIntervals$endt), by=10) intervals04 <- interval(tspan04, tspan04 + 10) counts04 <- lapply(intervals04, fx04) plot(tspan04, counts04) rd20171024.withIntervals <- mutate(rd20171024, cInt01 = interval(start, endt)) fx05 <- function(arg1) sum(int_overlaps(arg1, rd20171024.withIntervals$cInt01)) tspan05 <- seq(min(rd20171024.withIntervals$start), max(rd20171024.withIntervals$endt), by=10) intervals05 <- interval(tspan05, tspan05 + 10) counts05 <- lapply(intervals05, fx05) plot(tspan05, counts05) rd20171024.ACwithIntervals <- mutate(rd20171024.AC, cInt01 = interval(start, endt)) tspan04 <- seq(min(rd20171024.ACwithIntervals$start), max(rd20171024.ACwithIntervals$endt), by=10) intervals04 <- interval(tspan04, tspan04 + 10) counts04 <- sum(int_overlaps(intervals04, rd20171024.ACwithIntervals$cInt01)) ### DOESN'T WORK counts04 <- sum(int_overlaps(tspan04, rd20171024.ACwithIntervals$cInt01)) ### DOESN'T WORK plot(tspan04, counts04) summarise(filter(rd20171024, int_overlaps(interval(start, endt), intervals01.t$int_col[4])), sum = n())$sum[1] summarise(filter(rd20171024, int_overlaps(interval(start, endt), col1)), sum = n())$sum[1] library(stringr) rd20171024.ACwithIntervals <- mutate(rd20171024.AC, cInt01 = interval(start, endt)) rd20171024.withIntervalList <- mutate(rd20171024, ints = seq(start, endt, by=10)) qplot(`start`, duration_ms, data=rd20171024) # 20171108 library(plyr) ## for ddply library(tidyverse) library(lubridate) library(xts) library(zoo) library(ggplot2) rd20171024 <- read_csv("C:/kewoo/eai/d20171024.0930-1055.allEAI.csv") rd20171024.AC <- filter(rd20171024, str_detect(componentname, "AcurityConnector")) # google search "lubridate round time" # round_date courtesy of https://rdrr.io/cran/lubridate/man/round_date.html rd20171024.AC.round10sec <- mutate(rd20171024.AC, r10start = round_date(start, "10 seconds"), r10endt = round_date(start, "10 seconds")) # next two lines failed, but brought me crucial error message: # 'from' must be of length 1 df01 <- data.frame(st = rd20171024$start, et = rd20171024$endt) # , ints = seq(st,et,by=10)) df01$ints <- seq(df01$st, df01$et, by=10) # desperate google search "r seq from must be of length 1" # start and end date expansion courtesy of https://stackoverflow.com/questions/11494511/expand-ranges-defined-by-from-and-to-columns tsa01 <- ddply(rd20171024.AC.round10sec, "transactionid", summarise, ints = seq(r10start,r10endt,by=10)) tsa02 <- as_tibble(tsa01) %>% count("ints") ggplot(tsa02, aes(x=ints,y=freq)) + geom_line() rd20171024.freqInts <- mutate(rd20171024, r10start = round_date(start, "10 seconds"), r10endt = round_date(start, "10 seconds")) %>% ddply("transactionid", plyr::summarise, ints = seq(r10start,r10endt,by=10)) %>% as_tibble() %>% count("ints") # If loading plyr after dplyr (bad thing), runs <41secs and gives desired result # If loading plyr before dplyr, takes 3-4 minutes and has incorrect result # UNLESS using plyr::summarise and plyr::count rd20171024.freqInts <- transmute(rd20171024 ,transactionid ,r10start = round_date(start, "10 seconds") ,r10endt = round_date(start, "10 seconds")) %>% ddply("transactionid", plyr::summarise, ints = seq(r10start,r10endt,by=10)) %>% as_tibble() %>% plyr::count("ints") rd20171024.ACfreqInts <- rd20171024 %>% filter(str_detect(componentname, "AcurityConnector")) %>% transmute(transactionid ,r10start = round_date(start, "10 seconds") ,r10endt = round_date(start, "10 seconds")) %>% ddply("transactionid", summarise, ints = seq(r10start,r10endt,by=10)) %>% as_tibble() %>% count("ints") # google search "overlay plots in r ggplot2" # from https://stackoverflow.com/questions/9109156/ggplot-combining-two-plots-from-different-data-frames ggplot() + geom_line(data=rd20171024.ACfreqInts, aes(x=ints,y=freq), color='green') + geom_line(data=rd20171024.freqInts, aes(x=ints,y=freq), color='red') ggplot() + geom_line(data=rd20171024, aes(x=start,y=duration_ms), color='blue') # this doesn't work to expand intervals across transactionids even though it's supposed to be equivalent # according to https://blog.rstudio.com/2014/01/17/introducing-dplyr/ tmp02 %>% group_by(transactionid) %>% summarise(ints = seq(.$r10start, .$r10end, by=10)) # dplyr solution from https://stackoverflow.com/questions/11494511/expand-ranges-defined-by-from-and-to-columns # it MIGHT work, but I didn't let it run beyond 40sec (19% complete, ~ 3 m remaining?!) # It creates a tibble with name and ints, but I can't apply count(int) to it tmp03 <- tmp02 %>% rowwise() %>% do(data.frame(name = .$transactionid, ints = seq(.$r10start, .$r10endt, by = 10))) tmp04 <- tmp03 %>% count(ints) ggplot() + geom_line(data=tmp04, aes(x=ints,y=n), color='purple') # data.table way this stuff feels faster than dplyr but isn't very FP when using := methods # alternatively, use the .() aka list() feature and create a new table. Still faster than dplyr or plyr # https://mran.microsoft.com/web/packages/data.table/vignettes/datatable-intro.html library(data.table) # for fread and other data.table functions library(tidyverse) # for as_tibble to feed into ggplot library(lubridate) # for round_date library(fasttime) # for fastPOSIXct dt01=fread("C:/kewoo/eai/d20171024.0930-1055.allEAI.csv") # exploratory str(dt01) nrow(dt01) names(dt01) dt01[, startPct := round_date(as.POSIXct(start), "10 seconds")] dt01[, endtPct := round_date(as.POSIXct(endt), "10 seconds")] # create two new columns in the same statement, a but hard to read though because the column names # are separated from their definitions by the := token dt01[, c("startPct","endtPct") := list(round_date(as.POSIXct(start), "10 seconds"), round_date(as.POSIXct(endt), "10 seconds"))] # gain speed using fasttime - what happens? Takes ~ 3 sec instead of ~ 11 sec # https://stackoverflow.com/questions/29140416/r-data-table-fread-read-column-as-date # https://stackoverflow.com/questions/12786335/why-is-as-date-slow-on-a-character-vector # https://cran.r-project.org/web/packages/fasttime/fasttime.pdf # replace as.POSIXct() with fastPOSIXct() dt01[, c("startPct","endtPct") := list(round_date(fastPOSIXct(start), "10 seconds"), round_date(fastPOSIXct(endt), "10 seconds"))] # new columns without using := dt01b <-dt01[, list(transactionid, startPct = round_date(fastPOSIXct(start), "10 seconds"), endtPct = round_date(fastPOSIXct(endt), "10 seconds"))] dt02 = dt01[, list(ints = seq(startPct, endtPct, by=10)), by = transactionid] # some magic happens here # dt02 = dt01[, list(ints = seq(startPct, endtPct, by=10)), by = correlationid] # ERROR: 'from' must be of length 1 because there are correlationid isn't unique for start/endt pairs # filter on componentanme, list ints and componentname, group by transactionid# dt02 = dt01[componentname %like% 'AcurityConnector', list(ints = seq(startPct, endtPct, by=10), componentname), by = transactionid] # filter on componentanme, list ints and componentname # Wrapping "freq = .N" in a list ensures a data.table object is returned (https://mran.microsoft.com/web/packages/data.table/vignettes/datatable-intro.html) dt03 <- dt02[, list(freq = .N), by = ints] tb01 <- as_tibble(dt03) ggplot() + geom_line(data=tb01, aes(x=ints,y=freq), color='blue') # PS: tidyverse+plyr returns different results to data.table # possibly because different date conversions are being used at the time of data load # 20171109: chain data.tables, split over multiple lines # show transaction flight behaviours over time tb01.allEAI <- dt01[, list(transactionid, startPct = round_date(fastPOSIXct(start), "10 seconds"), endtPct = round_date(fastPOSIXct(endt), "10 seconds")) ][, list(intervals = seq(startPct, endtPct, by=10)), by = transactionid ][, list(txCount = .N), by = intervals] %>% as_tibble() tb01.AC <- dt01[componentname %like% 'AcurityConnector', list(transactionid, startPct = round_date(fastPOSIXct(start), "10 seconds"), endtPct = round_date(fastPOSIXct(endt), "10 seconds")) ][, list(intervals = seq(startPct, endtPct, by=10)), by = transactionid ][, list(txCount = .N), by = intervals] %>% as_tibble() ggplot() + geom_line(data=tb01.AC, aes(x=intervals,y=txCount), color='blue') + geom_line(data=tb01.allEAI, aes(x=intervals,y=txCount), color='red') # 20171110 dt01[, list(transactionid, startPct = round_date(fastPOSIXct(start), "10 seconds"), endtPct = round_date(fastPOSIXct(endt), "10 seconds")) ][, list(intervals = seq(startPct, endtPct, by=10)), by = transactionid ] dt01[1:100, list(transactionid, startPct = round_date(fastPOSIXct(start), "10 seconds"), endtPct = round_date(fastPOSIXct(endt), "10 seconds")) ][, seq(startPct, endtPct,by=10) ] data.table(a = 0:200)[,.(a, b = a * 10) ][,seq(a,b,by=3), by=a] dt01[,.N,by=transactiontype][order(-N)] tb01.allEAI dt02 <- dt01[, list(transactionid, transactiontype, startPct = round_date(fastPOSIXct(start), "10 seconds"), endtPct = round_date(fastPOSIXct(endt), "10 seconds")) ] # find interval with greatest count of transactions filt01 <- dt02[ , list(intervals = seq(startPct, endtPct, by=10)), by = list(transactionid,transactiontype) ][ , .N, by = list(intervals) ][ N == max(N), intervals] dt03.allEAI <- dt02[, list(intervals = seq(startPct, endtPct, by=10)), by = list(transactionid,transactiontype)] dt03.filt01 <- dt02[startPct == filt01, list(intervals = seq(startPct, endtPct, by=10)), by = list(transactionid,transactiontype)] dt03.filt01[, list(txCount = .N), by = list(transactiontype, intervals)][order(-txCount)] # 20171115