fork of https://github.com/sourcegraph/zoekt
1package zoekt
2
3import (
4 "math/rand"
5 "reflect"
6 "testing"
7 "testing/quick"
8
9 "golang.org/x/exp/slices"
10)
11
12const exampleQuery = "const data: Event = { ...JSON.parse(message.data), type: message.event }"
13
14func genFrequencies(ngramOffs []runeNgramOff, max int) []uint32 {
15 seen := map[ngram]uint32{}
16 var frequencies []uint32
17 for _, n := range ngramOffs {
18 freq, ok := seen[n.ngram]
19 if !ok {
20 freq = uint32(rand.Intn(max))
21 seen[n.ngram] = freq
22 }
23 frequencies = append(frequencies, freq)
24 }
25 return frequencies
26}
27
28func BenchmarkMinFrequencyNgramOffsets(b *testing.B) {
29 ngramOffs := splitNGrams([]byte(exampleQuery))
30 slices.SortFunc(ngramOffs, func(a, b runeNgramOff) bool {
31 return a.ngram < b.ngram
32 })
33 frequencies := genFrequencies(ngramOffs, 100)
34 for i := 0; i < b.N; i++ {
35 x0, x1 := minFrequencyNgramOffsets(ngramOffs, frequencies)
36 if x0 == x1 {
37 b.Fatal("should not be the same")
38 }
39 }
40}
41
42func TestMinFrequencyNgramOffsets(t *testing.T) {
43 // Our implementation has ill-defined tie breaks when the 2nd smallest
44 // frequency can be tied with others. Fixing that would make the CPU perf
45 // worse, so what we do instead is just validate that what we get back is
46 // acceptable.
47 if err := quick.Check(func(s string, maxFreq uint16) bool {
48 ngramOffs := splitNGrams([]byte(s))
49 if len(ngramOffs) == 0 {
50 return true
51 }
52
53 slices.SortFunc(ngramOffs, func(a, b runeNgramOff) bool {
54 return a.ngram < b.ngram
55 })
56 frequencies := genFrequencies(ngramOffs, int(maxFreq))
57 x0, x1 := minFrequencyNgramOffsets(ngramOffs, frequencies)
58
59 if x0.index > x1.index {
60 t.Log("x0 should be before x1")
61 return false
62 }
63
64 if len(ngramOffs) <= 1 {
65 return true
66 }
67
68 // Now we just assert that we found two items with the smallest
69 // frequencies.
70 idx0 := slices.Index[runeNgramOff](ngramOffs, x0)
71 idx1 := slices.Index[runeNgramOff](ngramOffs, x1)
72 start := []uint32{frequencies[idx0], frequencies[idx1]}
73 slices.Sort(start)
74 slices.Sort(frequencies)
75 return reflect.DeepEqual(start, frequencies[:2])
76 }, nil); err != nil {
77 t.Fatal(err)
78 }
79}