File: make-thread-bench

package info (click to toggle)
sbcl 2%3A2.6.2-1
  • links: PTS, VCS
  • area: main
  • in suites: sid
  • size: 52,008 kB
  • sloc: lisp: 535,135; ansic: 42,629; sh: 5,737; asm: 2,406; pascal: 717; makefile: 432; python: 56; cpp: 27
file content (143 lines) | stat: -rw-r--r-- 6,649 bytes parent folder | download | duplicates (5)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
(defglobal *counter* 0)
(declaim (fixnum *counter*))

;; how many threads to make, not all at once, of course
(defparameter howmany-threads 20000)

(defun thread-lambda ()
  (atomic-incf *counter*))

(defun test1 (&optional (n howmany-threads))
  (setq *counter* 0)
  (dotimes (i n)
    (sb-thread:join-thread (sb-thread:make-thread #'thread-lambda)))
  (assert (= *counter* n)))

(defun test2 (&optional (n howmany-threads))
  (setq *counter* 0)
  (let (prev-thread)
    (dotimes (i n)
      ;; while starting the new thread, join the previously made.
      (let ((new-thread (sb-thread:make-thread #'thread-lambda)))
        (when prev-thread
          (sb-thread:join-thread prev-thread))
        (setq prev-thread new-thread)))
    (sb-thread:join-thread prev-thread))
  (assert (= *counter* n)))

(defun test3 (&optional (n howmany-threads))
  (setq *counter* 0)
  ;; try making 10 at a time and waiting for those 10 before making more
  (dotimes (i (/ n 10))
    (let ((threads))
      (dotimes (i 10)
        (push (sb-thread:make-thread #'thread-lambda) threads))
      (dolist (thread threads)
        (sb-thread:join-thread thread))))
  (assert (= *counter* n)))

Without :pauseless-threadstart
-------------------------------

$ perf stat ... --eval '(test1)' --quit
          6,435.72 msec task-clock:u              #    1.103 CPUs utilized
                 0      context-switches:u        #    0.000 K/sec
                 0      cpu-migrations:u          #    0.000 K/sec
           219,735      page-faults:u             #    0.034 M/sec
     1,484,016,696      cycles:u                  #    0.231 GHz
    17,008,247,359      stalled-cycles-frontend:u # 1146.10% frontend cycles idle
       525,232,002      instructions:u            #    0.35  insn per cycle
                                                  #   32.38  stalled cycles per insn
       145,098,224      branches:u                #   22.546 M/sec
         4,045,481      branch-misses:u           #    2.79% of all branches

       5.834005191 seconds time elapsed
       0.684522000 seconds user
       6.391622000 seconds sys

$ perf stat ... --eval '(test2)' --quit
          6,383.26 msec task-clock:u              #    1.182 CPUs utilized
                 0      context-switches:u        #    0.000 K/sec
                 0      cpu-migrations:u          #    0.000 K/sec
           220,625      page-faults:u             #    0.035 M/sec
     1,435,852,231      cycles:u                  #    0.225 GHz
    16,839,228,290      stalled-cycles-frontend:u # 1172.77% frontend cycles idle
       524,776,029      instructions:u            #    0.37  insn per cycle
                                                  #   32.09  stalled cycles per insn
       144,951,979      branches:u                #   22.708 M/sec
         4,018,908      branch-misses:u           #    2.77% of all branches

       5.401308431 seconds time elapsed
       0.773771000 seconds user
       6.235207000 seconds sys

$ perf stat ... --eval '(test3)' --quit
          7,813.04 msec task-clock:u              #    1.133 CPUs utilized
                 0      context-switches:u        #    0.000 K/sec
                 0      cpu-migrations:u          #    0.000 K/sec
           131,699      page-faults:u             #    0.017 M/sec
     1,477,424,012      cycles:u                  #    0.189 GHz
    20,769,787,623      stalled-cycles-frontend:u # 1405.81% frontend cycles idle
       525,748,318      instructions:u            #    0.36  insn per cycle
                                                  #   39.51  stalled cycles per insn
       145,134,345      branches:u                #   18.576 M/sec
         4,412,369      branch-misses:u           #    3.04% of all branches

       6.894458409 seconds time elapsed
       0.641120000 seconds user
       7.864134000 seconds sys

Conclusion: approximately 300 microseconds to start a thread

With :pauseless-threadstart
----------------------------

$ perf stat ... --eval '(test1)' --quit
          1,541.91 msec task-clock:u              #    1.068 CPUs utilized
                 0      context-switches:u        #    0.000 K/sec
                 0      cpu-migrations:u          #    0.000 K/sec
               633      page-faults:u             #    0.411 K/sec
       801,317,853      cycles:u                  #    0.520 GHz
     3,751,130,586      stalled-cycles-frontend:u #  468.12% frontend cycles idle
       352,170,311      instructions:u            #    0.44  insn per cycle
                                                  #   10.65  stalled cycles per insn
        81,884,155      branches:u                #   53.106 M/sec
         1,334,916      branch-misses:u           #    1.63% of all branches

       1.443664147 seconds time elapsed
       0.425407000 seconds user
       1.471034000 seconds sys

$ perf stat ... --eval '(test2)' --quit
          1,249.01 msec task-clock:u              #    1.550 CPUs utilized
                 0      context-switches:u        #    0.000 K/sec
                 0      cpu-migrations:u          #    0.000 K/sec
               573      page-faults:u             #    0.459 K/sec
       668,480,325      cycles:u                  #    0.535 GHz
     3,086,418,550      stalled-cycles-frontend:u #  461.71% frontend cycles idle
       376,966,208      instructions:u            #    0.56  insn per cycle
                                                  #    8.19  stalled cycles per insn
        95,790,671      branches:u                #   76.694 M/sec
           918,120      branch-misses:u           #    0.96% of all branches

       0.805563424 seconds time elapsed
       0.325539000 seconds user
       1.162641000 seconds sys

$ perf stat ... --eval '(test3)' --quit
          1,308.09 msec task-clock:u              #    1.455 CPUs utilized
                 0      context-switches:u        #    0.000 K/sec
                 0      cpu-migrations:u          #    0.000 K/sec
             4,838      page-faults:u             #    0.004 M/sec
       660,514,151      cycles:u                  #    0.505 GHz
     3,230,465,234      stalled-cycles-frontend:u #  489.08% frontend cycles idle
       364,307,811      instructions:u            #    0.55  insn per cycle
                                                  #    8.87  stalled cycles per insn
        92,966,084      branches:u                #   71.070 M/sec
           730,714      branch-misses:u           #    0.79% of all branches

       0.899161220 seconds time elapsed
       0.339090000 seconds user
       1.227741000 seconds sys

Conclusion: 40 to 100 microseconds to start a thread