fork of https://github.com/sourcegraph/zoekt
0

Configure Feed

Select the types of activity you want to include in your feed.

at main 2.9 kB View raw
1package index 2 3import ( 4 "math/rand" 5 "reflect" 6 "slices" 7 "testing" 8 "testing/quick" 9) 10 11const exampleQuery = "const data: Event = { ...JSON.parse(message.data), type: message.event }" 12 13func genFrequencies(ngramOffs []runeNgramOff, max int) []uint32 { 14 seen := map[ngram]uint32{} 15 var frequencies []uint32 16 for _, n := range ngramOffs { 17 freq, ok := seen[n.ngram] 18 if !ok { 19 freq = uint32(rand.Intn(max)) 20 seen[n.ngram] = freq 21 } 22 frequencies = append(frequencies, freq) 23 } 24 return frequencies 25} 26 27func BenchmarkMinFrequencyNgramOffsets(b *testing.B) { 28 ngramOffs := splitNGrams([]byte(exampleQuery)) 29 slices.SortFunc(ngramOffs, runeNgramOff.Compare) 30 frequencies := genFrequencies(ngramOffs, 100) 31 for i := 0; i < b.N; i++ { 32 x0, x1 := minFrequencyNgramOffsets(ngramOffs, frequencies) 33 if x0 == x1 { 34 b.Fatal("should not be the same") 35 } 36 } 37} 38 39func TestMinFrequencyNgramOffsets(t *testing.T) { 40 // Our implementation has ill-defined tie breaks when the 2nd smallest 41 // frequency can be tied with others. Fixing that would make the CPU perf 42 // worse, so what we do instead is just validate that what we get back is 43 // acceptable. 44 if err := quick.Check(func(s string, maxFreq uint16) bool { 45 // Ensure maximum frequency is nonzero so that random sampling will work 46 maxFreq = max(maxFreq, 1) 47 48 ngramOffs := splitNGrams([]byte(s)) 49 if len(ngramOffs) == 0 { 50 return true 51 } 52 53 slices.SortFunc(ngramOffs, runeNgramOff.Compare) 54 frequencies := genFrequencies(ngramOffs, int(maxFreq)) 55 x0, x1 := minFrequencyNgramOffsets(ngramOffs, frequencies) 56 57 if x0.index > x1.index { 58 t.Log("x0 should be before x1") 59 return false 60 } 61 62 if len(ngramOffs) <= 1 { 63 return true 64 } 65 66 // Now we just assert that we found two items with the smallest 67 // frequencies. 68 idx0 := slices.IndexFunc(ngramOffs, func(a runeNgramOff) bool { return a == x0 }) 69 idx1 := slices.IndexFunc(ngramOffs, func(a runeNgramOff) bool { return a == x1 }) 70 start := []uint32{frequencies[idx0], frequencies[idx1]} 71 slices.Sort(start) 72 slices.Sort(frequencies) 73 return reflect.DeepEqual(start, frequencies[:2]) 74 }, nil); err != nil { 75 t.Fatal(err) 76 } 77} 78 79func TestFindSelectiveNGrams(t *testing.T) { 80 if err := quick.Check(func(s string, maxFreq uint16) bool { 81 // Ensure maximum frequency is nonzero so that random sampling will work 82 maxFreq = max(maxFreq, 1) 83 84 ngramOffs := splitNGrams([]byte(s)) 85 if len(ngramOffs) == 0 { 86 return true 87 } 88 89 slices.SortFunc(ngramOffs, runeNgramOff.Compare) 90 indexMap := make([]int, len(ngramOffs)) 91 for i, n := range ngramOffs { 92 indexMap[n.index] = i 93 } 94 95 frequencies := genFrequencies(ngramOffs, int(maxFreq)) 96 x0, x1 := findSelectiveNgrams(ngramOffs, indexMap, frequencies) 97 98 if len(ngramOffs) <= 1 { 99 return true 100 } 101 102 // Just assert the invariant that x0 is before x1. This test mostly checks 103 // for out-of-bounds errors. 104 return x0.index < x1.index 105 }, nil); err != nil { 106 t.Fatal(err) 107 } 108}