fork of https://github.com/sourcegraph/zoekt
1// Copyright 2016 Google Inc. All rights reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15package index
16
17import (
18 "encoding/binary"
19 "log"
20 "math"
21 "math/rand"
22 "reflect"
23 "slices"
24 "strconv"
25 "testing"
26 "testing/quick"
27
28 "github.com/google/go-cmp/cmp"
29)
30
31var _ = log.Println
32
33func TestNgram(t *testing.T) {
34 in := "abc"
35 n := stringToNGram(in)
36 if n.String() != "abc" {
37 t.Errorf("got %q, want %q", n, "abc")
38 }
39
40 f := func(b ngramRunes) bool {
41 n := runesToNGram(b)
42 got := ngramRunes(ngramToRunes(n))
43 if !reflect.DeepEqual(b, got) {
44 t.Log(cmp.Diff(b, got))
45 return false
46 }
47 return true
48 }
49 if err := quick.Check(f, nil); err != nil {
50 t.Error(err)
51 }
52}
53
54type ngramRunes [ngramSize]rune
55
56func (ngramRunes) Generate(rand *rand.Rand, size int) reflect.Value {
57 // Same implementation used by testing/quick to generate strings. But we
58 // force it to ngramSize runes.
59 var b ngramRunes
60 for i := range b {
61 b[i] = rune(rand.Intn(0x10ffff))
62 }
63 return reflect.ValueOf(b)
64}
65
66func TestDocSection(t *testing.T) {
67 in := []DocumentSection{{1, 2}, {3, 4}}
68 serialized := marshalDocSections(in)
69 roundtrip := unmarshalDocSections(serialized, nil)
70 if !reflect.DeepEqual(in, roundtrip) {
71 t.Errorf("got %v, want %v", roundtrip, in)
72 }
73}
74
75func TestGenerateCaseNgrams(t *testing.T) {
76 ng := stringToNGram("aB1")
77 gotNG := generateCaseNgrams(ng)
78
79 got := map[string]bool{}
80 for _, n := range gotNG {
81 got[string(ngramToBytes(n))] = true
82 }
83
84 want := map[string]bool{
85 "aB1": true,
86 "AB1": true,
87 "ab1": true,
88 "Ab1": true,
89 }
90
91 if !reflect.DeepEqual(got, want) {
92 t.Errorf("got %v, want %v", got, want)
93 }
94}
95
96func TestNextFileIndex(t *testing.T) {
97 for _, tc := range []struct {
98 off, curFile uint32
99 ends []uint32
100 want uint32
101 }{
102 {math.MaxUint32, 0, []uint32{34}, 1},
103 {9, 0, []uint32{34}, 0},
104 {450, 0, []uint32{100, 200, 300, 400, 500, 600}, 4},
105 } {
106 got := nextFileIndex(tc.off, tc.curFile, tc.ends)
107 if got != tc.want {
108 t.Errorf("%v: got %d, want %d", tc, got, tc.want)
109 }
110 }
111}
112
113func TestDeltas(t *testing.T) {
114 in := []uint32{1, 72, 0xfff}
115 out := toSizedDeltas(in)
116 round := fromSizedDeltas(out, nil)
117 if !reflect.DeepEqual(in, round) {
118 t.Errorf("got %v, want %v", round, in)
119 }
120}
121
122func TestSizedDeltas(t *testing.T) {
123 encode := func(nums []uint32) []byte {
124 return toSizedDeltas(nums)
125 }
126 decode := func(data []byte) []uint32 {
127 if len(data) == 0 {
128 return nil
129 }
130 return fromSizedDeltas(data, nil)
131 }
132 testIncreasingIntCoder(t, encode, decode)
133}
134
135func TestFromDeltas(t *testing.T) {
136 decode := func(data []byte) []uint32 {
137 if len(data) == 0 {
138 return nil
139 }
140 return fromDeltas(data, nil)
141 }
142 testIncreasingIntCoder(t, toDeltas, decode)
143}
144
145func TestCompressedPostingIterator(t *testing.T) {
146 decode := func(data []byte) []uint32 {
147 if len(data) == 0 {
148 return nil
149 }
150
151 var nums []uint32
152 i := newCompressedPostingIterator(data, stringToNGram("abc"))
153 for i.first() != math.MaxUint32 {
154 nums = append(nums, i.first())
155 i.next(i.first())
156 }
157 return nums
158 }
159 testIncreasingIntCoder(t, toDeltas, decode)
160}
161
162func toDeltas(offsets []uint32) []byte {
163 var enc [8]byte
164
165 deltas := make([]byte, 0, len(offsets)*2)
166
167 var last uint32
168 for _, p := range offsets {
169 delta := p - last
170 last = p
171
172 m := binary.PutUvarint(enc[:], uint64(delta))
173 deltas = append(deltas, enc[:m]...)
174 }
175 return deltas
176}
177
178func testIncreasingIntCoder(t *testing.T, encode func([]uint32) []byte, decode func([]byte) []uint32) {
179 f := func(nums []uint32) bool {
180 nums = sortedUnique(nums)
181 b := encode(nums)
182 got := decode(b)
183 if len(nums) == len(got) && len(nums) == 0 {
184 return true
185 }
186 if !reflect.DeepEqual(got, nums) {
187 t.Log(cmp.Diff(nums, got))
188 return false
189 }
190 return true
191 }
192 if err := quick.Check(f, nil); err != nil {
193 t.Error(err)
194 }
195}
196
197func sortedUnique(nums []uint32) []uint32 {
198 if len(nums) == 0 {
199 return nums
200 }
201 slices.Sort(nums)
202 filtered := nums[:1]
203 for _, n := range nums[1:] {
204 if filtered[len(filtered)-1] != n {
205 filtered = append(filtered, n)
206 }
207 }
208 return filtered
209}
210
211func TestCondenseRuneOffsets(t *testing.T) {
212 for i, tc := range []struct {
213 arr []uint32
214 want []runeOffsetCorrection
215 }{
216 {[]uint32{}, []runeOffsetCorrection{}},
217 {[]uint32{0, 100, 200, 300, 400, 500}, []runeOffsetCorrection{}},
218 {[]uint32{0, 105, 205, 310, 420}, []runeOffsetCorrection{{100, 105}, {300, 310}, {400, 420}}},
219 } {
220 got := makeRuneOffsetMap(tc.arr)
221 if !reflect.DeepEqual(got, runeOffsetMap(tc.want)) {
222 t.Errorf("#%d: got %v, want %v", i, got, tc.want)
223 }
224 for j, byteOffset := range tc.arr {
225 runeOffset := uint32(j * runeOffsetFrequency)
226 gotByteOffset, _ := got.lookup(runeOffset)
227 if gotByteOffset != byteOffset {
228 t.Errorf("#%d: lookup(%v) got %v, want %v", i, runeOffset, gotByteOffset, byteOffset)
229 }
230 }
231 }
232}
233
234func TestRuneOffsetLookup(t *testing.T) {
235 for i, tc := range []struct {
236 r uint32
237 wantOff, wantLeft uint32
238 offsets []runeOffsetCorrection
239 }{
240 {0, 0, 0, nil},
241 {1234, 1200, 34, nil},
242 {5, 0, 5, []runeOffsetCorrection{{100, 105}, {400, 430}}},
243 {120, 105, 20, []runeOffsetCorrection{{100, 105}, {400, 430}}},
244 {1234, 1230, 34, []runeOffsetCorrection{{100, 105}, {400, 430}}},
245 } {
246 gotOff, gotLeft := runeOffsetMap(tc.offsets).lookup(tc.r)
247 if gotLeft != tc.wantLeft {
248 t.Errorf("#%d: got left=%v, want left=%v", i, gotLeft, tc.wantLeft)
249 }
250 if gotOff != tc.wantOff {
251 t.Errorf("#%d: got off=%v, want off=%v", i, gotOff, tc.wantOff)
252 }
253 }
254
255 m := runeOffsetMap([]runeOffsetCorrection{{100, 105}, {200, 210}, {400, 430}})
256 inputs := []uint32{0, 1, 99, 100, 101, 199, 200, 201, 300, 399, 400, 401, 510, 610}
257 wanted := []uint32{0, 0, 0, 105, 105, 105, 210, 210, 310, 310, 430, 430, 530, 630}
258 for i, v := range inputs {
259 got, _ := m.lookup(v)
260 if got != wanted[i] {
261 t.Errorf("got off=%v, want off=%v for map=%v", got, wanted[i], m)
262 }
263 }
264}
265
266func TestUnmarshalDocSections(t *testing.T) {
267 f := func(nums []uint32) bool {
268 nums = sortedUnique(nums)
269
270 ds := make([]DocumentSection, 0, len(nums)/2)
271 // Drop the unmatched last one in case of odd len(nums).
272 // Better than skipping the test entirely.
273 for i := 0; i+1 < len(nums); i += 2 {
274 ds = append(ds, DocumentSection{Start: nums[i], End: nums[i+1]})
275 }
276
277 data := marshalDocSections(ds)
278 got := unmarshalDocSections(data, nil)
279 if len(ds) == len(got) && len(ds) == 0 {
280 return true
281 }
282 if !reflect.DeepEqual(ds, got) {
283 t.Log(cmp.Diff(ds, got))
284 return false
285 }
286 return true
287 }
288 if err := quick.Check(f, nil); err != nil {
289 t.Error(err)
290 }
291}
292
293func BenchmarkUnmarshalDocSections(b *testing.B) {
294 rng := rand.New(rand.NewSource(0))
295
296 for size := 10; size <= 10000; size *= 10 {
297 ds := make([]DocumentSection, 0, size)
298 var last uint32
299 for range size {
300 var d DocumentSection
301 last += 1 + uint32(rng.Int31n(200))
302 d.Start = last
303 last += 1 + uint32(rng.Int31n(10))
304 d.End = last
305 ds = append(ds, d)
306 }
307 data := marshalDocSections(ds)
308
309 b.Run(strconv.Itoa(size), func(b *testing.B) {
310 b.ReportAllocs()
311 for i := 0; i < b.N; i++ {
312 if ds := unmarshalDocSections(data, nil); len(ds) != size {
313 b.Fatalf("wrong size: got %d, want %d", len(ds), size)
314 }
315 }
316 })
317 }
318}