fork of https://github.com/sourcegraph/zoekt
1package zoekt
2
3import (
4 "math/rand"
5 "reflect"
6 "slices"
7 "testing"
8 "testing/quick"
9)
10
11const exampleQuery = "const data: Event = { ...JSON.parse(message.data), type: message.event }"
12
13func genFrequencies(ngramOffs []runeNgramOff, max int) []uint32 {
14 seen := map[ngram]uint32{}
15 var frequencies []uint32
16 for _, n := range ngramOffs {
17 freq, ok := seen[n.ngram]
18 if !ok {
19 freq = uint32(rand.Intn(max))
20 seen[n.ngram] = freq
21 }
22 frequencies = append(frequencies, freq)
23 }
24 return frequencies
25}
26
27func BenchmarkMinFrequencyNgramOffsets(b *testing.B) {
28 ngramOffs := splitNGrams([]byte(exampleQuery))
29 slices.SortFunc(ngramOffs, runeNgramOff.Compare)
30 frequencies := genFrequencies(ngramOffs, 100)
31 for i := 0; i < b.N; i++ {
32 x0, x1 := minFrequencyNgramOffsets(ngramOffs, frequencies)
33 if x0 == x1 {
34 b.Fatal("should not be the same")
35 }
36 }
37}
38
39func TestMinFrequencyNgramOffsets(t *testing.T) {
40 // Our implementation has ill-defined tie breaks when the 2nd smallest
41 // frequency can be tied with others. Fixing that would make the CPU perf
42 // worse, so what we do instead is just validate that what we get back is
43 // acceptable.
44 if err := quick.Check(func(s string, maxFreq uint16) bool {
45 ngramOffs := splitNGrams([]byte(s))
46 if len(ngramOffs) == 0 {
47 return true
48 }
49
50 slices.SortFunc(ngramOffs, runeNgramOff.Compare)
51 frequencies := genFrequencies(ngramOffs, int(maxFreq))
52 x0, x1 := minFrequencyNgramOffsets(ngramOffs, frequencies)
53
54 if x0.index > x1.index {
55 t.Log("x0 should be before x1")
56 return false
57 }
58
59 if len(ngramOffs) <= 1 {
60 return true
61 }
62
63 // Now we just assert that we found two items with the smallest
64 // frequencies.
65 idx0 := slices.IndexFunc(ngramOffs, func(a runeNgramOff) bool { return a == x0 })
66 idx1 := slices.IndexFunc(ngramOffs, func(a runeNgramOff) bool { return a == x1 })
67 start := []uint32{frequencies[idx0], frequencies[idx1]}
68 slices.Sort(start)
69 slices.Sort(frequencies)
70 return reflect.DeepEqual(start, frequencies[:2])
71 }, nil); err != nil {
72 t.Fatal(err)
73 }
74}