File: speedtest.R

package info (click to toggle)
xgboost 1.7.4-1
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 11,432 kB
  • sloc: cpp: 47,464; python: 26,479; java: 3,868; xml: 1,236; sh: 1,095; ansic: 919; makefile: 323; javascript: 19
file content (70 lines) | stat: -rw-r--r-- 1,945 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
# install xgboost package, see R-package in root folder
require(xgboost)
require(gbm)
require(methods)

testsize <- 550000

dtrain <- read.csv("data/training.csv", header=TRUE, nrows=350001)
dtrain$Label = as.numeric(dtrain$Label=='s')
# gbm.time = system.time({
#   gbm.model <- gbm(Label ~ ., data = dtrain[, -c(1,32)], n.trees = 120, 
#                    interaction.depth = 6, shrinkage = 0.1, bag.fraction = 1,
#                    verbose = TRUE)
# })
# print(gbm.time)
# Test result: 761.48 secs

# dtrain[33] <- dtrain[33] == "s"
# label <- as.numeric(dtrain[[33]])
data <- as.matrix(dtrain[2:31])
weight <- as.numeric(dtrain[[32]]) * testsize / length(label)

sumwpos <- sum(weight * (label==1.0))
sumwneg <- sum(weight * (label==0.0))
print(paste("weight statistics: wpos=", sumwpos, "wneg=", sumwneg, "ratio=", sumwneg / sumwpos))

xgboost.time = list()
threads = c(1,2,4,8,16)
for (i in 1:length(threads)){
  thread = threads[i]
  xgboost.time[[i]] = system.time({
    xgmat <- xgb.DMatrix(data, label = label, weight = weight, missing = -999.0)
    param <- list("objective" = "binary:logitraw",
                  "scale_pos_weight" = sumwneg / sumwpos,
                  "bst:eta" = 0.1,
                  "bst:max_depth" = 6,
                  "eval_metric" = "auc",
                  "eval_metric" = "ams@0.15",
                  "nthread" = thread)
    watchlist <- list("train" = xgmat)
    nrounds = 120
    print ("loading data end, start to boost trees")
    bst = xgb.train(param, xgmat, nrounds, watchlist );
    # save out model
    xgb.save(bst, "higgs.model")
    print ('finish training')
  })
}

xgboost.time
# [[1]]
# user  system elapsed 
# 99.015   0.051  98.982 
# 
# [[2]]
# user  system elapsed 
# 100.268   0.317  55.473 
# 
# [[3]]
# user  system elapsed 
# 111.682   0.777  35.963 
# 
# [[4]]
# user  system elapsed 
# 149.396   1.851  32.661 
# 
# [[5]]
# user  system elapsed 
# 157.390   5.988  40.949