1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141
|
// Copyright 2022 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package json
import (
"bytes"
"fmt"
"io"
"testing"
)
func TestIntern(t *testing.T) {
var sc stringCache
const alphabet = "abcdefghijklmnopqrstuvwxyz"
for i := 0; i <= len(alphabet); i++ {
want := alphabet[i:]
if got := sc.make([]byte(want)); got != want {
t.Fatalf("make = %v, want %v", got, want)
}
}
for i := 0; i < 1000; i++ {
want := fmt.Sprintf("test%b", i)
if got := sc.make([]byte(want)); got != want {
t.Fatalf("make = %v, want %v", got, want)
}
}
}
var sink string
func BenchmarkIntern(b *testing.B) {
datasetStrings := func(name string) (out [][]byte) {
var data []byte
for _, ts := range jsonTestdata() {
if ts.name == name {
data = ts.data
}
}
dec := NewDecoder(bytes.NewReader(data))
for {
k, n := dec.StackIndex(dec.StackDepth())
isObjectName := k == '{' && n%2 == 0
tok, err := dec.ReadToken()
if err != nil {
if err == io.EOF {
break
}
b.Fatalf("ReadToken error: %v", err)
}
if tok.Kind() == '"' && !isObjectName {
out = append(out, []byte(tok.String()))
}
}
return out
}
tests := []struct {
label string
data [][]byte
}{
// Best is the best case scenario where every string is the same.
{"Best", func() (out [][]byte) {
for i := 0; i < 1000; i++ {
out = append(out, []byte("hello, world!"))
}
return out
}()},
// Repeat is a sequence of the same set of names repeated.
// This commonly occurs when unmarshaling a JSON array of JSON objects,
// where the set of all names is usually small.
{"Repeat", func() (out [][]byte) {
for i := 0; i < 100; i++ {
for _, s := range []string{"first_name", "last_name", "age", "address", "street_address", "city", "state", "postal_code", "phone_numbers", "gender"} {
out = append(out, []byte(s))
}
}
return out
}()},
// Synthea is all string values encountered in the Synthea FHIR dataset.
{"Synthea", datasetStrings("SyntheaFhir")},
// Twitter is all string values encountered in the Twitter dataset.
{"Twitter", datasetStrings("TwitterStatus")},
// Worst is the worst case scenario where every string is different
// resulting in wasted time looking up a string that will never match.
{"Worst", func() (out [][]byte) {
for i := 0; i < 1000; i++ {
out = append(out, []byte(fmt.Sprintf("%016x", i)))
}
return out
}()},
}
for _, tt := range tests {
b.Run(tt.label, func(b *testing.B) {
// Alloc simply heap allocates each string.
// This provides an upper bound on the number of allocations.
b.Run("Alloc", func(b *testing.B) {
b.ReportAllocs()
for i := 0; i < b.N; i++ {
for _, b := range tt.data {
sink = string(b)
}
}
})
// Cache interns strings using stringCache.
// We want to optimize for having a faster runtime than Alloc,
// and also keeping the number of allocations closer to GoMap.
b.Run("Cache", func(b *testing.B) {
b.ReportAllocs()
for i := 0; i < b.N; i++ {
var sc stringCache
for _, b := range tt.data {
sink = sc.make(b)
}
}
})
// GoMap interns all strings in a simple Go map.
// This provides a lower bound on the number of allocations.
b.Run("GoMap", func(b *testing.B) {
b.ReportAllocs()
for i := 0; i < b.N; i++ {
m := make(map[string]string)
for _, b := range tt.data {
s, ok := m[string(b)]
if !ok {
s = string(b)
m[s] = s
}
sink = s
}
}
})
})
}
}
|