File: gfapy-plot-benchmarkdata.R

package info (click to toggle)
gfapy 1.0.0%2Bdfsg-3
  • links: PTS, VCS
  • area: main
  • in suites: buster
  • size: 1,932 kB
  • sloc: python: 11,549; sh: 167; makefile: 66
file content (120 lines) | stat: -rwxr-xr-x 4,040 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
#!/usr/bin/env Rscript
# (c) Giorgio Gonnella, ZBH, Uni Hamburg, 2017

script.name = "./gfapy-plot-benchmarkdata.R"
args <- commandArgs(trailingOnly=TRUE)
if (is.na(args[3])) {
  cat("Usage: ",script.name, " <inputfile> <outpfx> <variable>", "\n")
  cat("variable: either 'segments' or 'connectivity'\n")
  stop("Too few command-line parameters")
}
infname <- args[1]
cat("input data: ",infname,"\n")
outpfx <- args[2]
cat("output prefix:", outpfx, "\n")
xvar <- args[3]
if (xvar != 'segments' && xvar != 'connectivity') {
  stop("variable must be one of: segments, connectivity")
}

library("ggplot2")

#
# The following function is described here:
# http://www.cookbook-r.com/Graphs/Plotting_means_and_error_bars_(ggplot2)/#Helper%20functions
# Licence: CC0 (https://creativecommons.org/publicdomain/zero/1.0/)
#
## Gives count, mean, standard deviation, standard error of the mean, and
## confidence interval (default 95%).
##   data: a data frame.
##   measurevar: the name of a column that contains the var to be summariezed
##   groupvars: a vector containing names of columns that contain grouping vars
##   na.rm: a boolean that indicates whether to ignore NA's
##   conf.interval: the percent range of the confidence interval (default 95%)
summarySE <- function(data=NULL, measurevar, groupvars=NULL, na.rm=FALSE,
                      conf.interval=.95, .drop=TRUE) {
  library(plyr)

  # New version of length which can handle NA's: if na.rm==T, don't count them
  length2 <- function (x, na.rm=FALSE) {
    if (na.rm) sum(!is.na(x))
    else       length(x)
  }

  # This does the summary. For each group's data frame, return a vector with
  # N, mean, and sd
  datac <- ddply(data, groupvars, .drop=.drop,
                 .fun = function(xx, col) {
                   c(N    = length2(xx[[col]], na.rm=na.rm),
                     mean = mean   (xx[[col]], na.rm=na.rm),
                     sd   = sd     (xx[[col]], na.rm=na.rm)
                     )
                 },
                 measurevar
                 )

  # Rename the "mean" column
  datac <- rename(datac, c("mean" = measurevar))

  datac$se <- datac$sd / sqrt(datac$N)  # Calculate standard error of the mean

  # Confidence interval multiplier for standard error
  # Calculate t-statistic for confidence interval:
  # e.g., if conf.interval is .95, use .975 (above/below), and use df=N-1
  ciMult <- qt(conf.interval/2 + .5, datac$N-1)
  datac$ci <- datac$se * ciMult

  return(datac)
}

data <- read.table(infname, header=T, sep="\t")

if (xvar == "segments") {
  xvarname = "lines"
  xlab="Lines (segments 1/3; dovetails 2/3)"
} else {
  xvarname = "mult"
  xlab="Dovetails/segment (segments=4000)"
  data[c("lines")] = (data[c("mult")]+1)*4000
}

time.data <- summarySE(data, measurevar="time", groupvars=c(xvarname))
outfname = paste0(outpfx,"_time.log")
sink(outfname)
print(time.data)
time.lm <- lm(time ~ lines, data=data)
summary(time.lm)
time.nls <- nls(time ~ b + a * lines,
                data=data, start=list(a=0,b=0),
                algorithm="port", lower=c(0,0))
print(time.nls)
sink()

outfname = paste0(outpfx,"_space.log")
sink(outfname)
space.data <- summarySE(data, measurevar="space", groupvars=c(xvarname))
print(space.data)
space.lm <- lm(space ~ lines, data=data)
summary(space.lm)
space.nls <- nls(space ~ b + a * lines,
                 data=data, start=list(a=0,b=0),
                 algorithm="port", lower=c(0,0))
print(space.nls)
sink()

outfname = paste0(outpfx,"_time.pdf")
pdf(outfname)
print(ggplot(time.data, aes_string(x=xvarname, y="time")) +
    geom_errorbar(aes(ymin=time-se, ymax=time+se), width=2) +
        geom_line(size=0.2) + geom_point(size=3) +
        ylab("Total elapsed time (s)") +
        xlab(xlab))
outfname = paste0(outpfx,"_space.pdf")
pdf(outfname)
print(ggplot(space.data, aes_string(x=xvarname, y="space")) +
    geom_errorbar(aes(ymin=space-se, ymax=space+se), width=2) +
        geom_line(size=0.2) + geom_point(size=3) +
        ylab("Memory peak (MB)") +
        xlab(xlab))
dev.off()