fork of https://github.com/sourcegraph/zoekt
1// Copyright 2016 Google Inc. All rights reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15package index
16
17import (
18 "encoding/binary"
19 "log"
20 "math/rand"
21 "reflect"
22 "sort"
23 "strconv"
24 "testing"
25 "testing/quick"
26
27 "github.com/google/go-cmp/cmp"
28)
29
30var _ = log.Println
31
32func TestNgram(t *testing.T) {
33 in := "abc"
34 n := stringToNGram(in)
35 if n.String() != "abc" {
36 t.Errorf("got %q, want %q", n, "abc")
37 }
38
39 f := func(b ngramRunes) bool {
40 n := runesToNGram(b)
41 got := ngramRunes(ngramToRunes(n))
42 if !reflect.DeepEqual(b, got) {
43 t.Log(cmp.Diff(b, got))
44 return false
45 }
46 return true
47 }
48 if err := quick.Check(f, nil); err != nil {
49 t.Error(err)
50 }
51}
52
53type ngramRunes [ngramSize]rune
54
55func (ngramRunes) Generate(rand *rand.Rand, size int) reflect.Value {
56 // Same implementation used by testing/quick to generate strings. But we
57 // force it to ngramSize runes.
58 var b ngramRunes
59 for i := range b {
60 b[i] = rune(rand.Intn(0x10ffff))
61 }
62 return reflect.ValueOf(b)
63}
64
65func TestDocSection(t *testing.T) {
66 in := []DocumentSection{{1, 2}, {3, 4}}
67 serialized := marshalDocSections(in)
68 roundtrip := unmarshalDocSections(serialized, nil)
69 if !reflect.DeepEqual(in, roundtrip) {
70 t.Errorf("got %v, want %v", roundtrip, in)
71 }
72}
73
74func TestGenerateCaseNgrams(t *testing.T) {
75 ng := stringToNGram("aB1")
76 gotNG := generateCaseNgrams(ng)
77
78 got := map[string]bool{}
79 for _, n := range gotNG {
80 got[string(ngramToBytes(n))] = true
81 }
82
83 want := map[string]bool{
84 "aB1": true,
85 "AB1": true,
86 "ab1": true,
87 "Ab1": true,
88 }
89
90 if !reflect.DeepEqual(got, want) {
91 t.Errorf("got %v, want %v", got, want)
92 }
93}
94
95func TestNextFileIndex(t *testing.T) {
96 for _, tc := range []struct {
97 off, curFile uint32
98 ends []uint32
99 want uint32
100 }{
101 {maxUInt32, 0, []uint32{34}, 1},
102 {9, 0, []uint32{34}, 0},
103 {450, 0, []uint32{100, 200, 300, 400, 500, 600}, 4},
104 } {
105 got := nextFileIndex(tc.off, tc.curFile, tc.ends)
106 if got != tc.want {
107 t.Errorf("%v: got %d, want %d", tc, got, tc.want)
108 }
109 }
110}
111
112func TestDeltas(t *testing.T) {
113 in := []uint32{1, 72, 0xfff}
114 out := toSizedDeltas(in)
115 round := fromSizedDeltas(out, nil)
116 if !reflect.DeepEqual(in, round) {
117 t.Errorf("got %v, want %v", round, in)
118 }
119}
120
121func TestSizedDeltas(t *testing.T) {
122 encode := func(nums []uint32) []byte {
123 return toSizedDeltas(nums)
124 }
125 decode := func(data []byte) []uint32 {
126 if len(data) == 0 {
127 return nil
128 }
129 return fromSizedDeltas(data, nil)
130 }
131 testIncreasingIntCoder(t, encode, decode)
132}
133
134func TestFromDeltas(t *testing.T) {
135 decode := func(data []byte) []uint32 {
136 if len(data) == 0 {
137 return nil
138 }
139 return fromDeltas(data, nil)
140 }
141 testIncreasingIntCoder(t, toDeltas, decode)
142}
143
144func TestCompressedPostingIterator(t *testing.T) {
145 decode := func(data []byte) []uint32 {
146 if len(data) == 0 {
147 return nil
148 }
149
150 var nums []uint32
151 i := newCompressedPostingIterator(data, stringToNGram("abc"))
152 for i.first() != maxUInt32 {
153 nums = append(nums, i.first())
154 i.next(i.first())
155 }
156 return nums
157 }
158 testIncreasingIntCoder(t, toDeltas, decode)
159}
160
161func toDeltas(offsets []uint32) []byte {
162 var enc [8]byte
163
164 deltas := make([]byte, 0, len(offsets)*2)
165
166 var last uint32
167 for _, p := range offsets {
168 delta := p - last
169 last = p
170
171 m := binary.PutUvarint(enc[:], uint64(delta))
172 deltas = append(deltas, enc[:m]...)
173 }
174 return deltas
175}
176
177func testIncreasingIntCoder(t *testing.T, encode func([]uint32) []byte, decode func([]byte) []uint32) {
178 f := func(nums []uint32) bool {
179 nums = sortedUnique(nums)
180 b := encode(nums)
181 got := decode(b)
182 if len(nums) == len(got) && len(nums) == 0 {
183 return true
184 }
185 if !reflect.DeepEqual(got, nums) {
186 t.Log(cmp.Diff(nums, got))
187 return false
188 }
189 return true
190 }
191 if err := quick.Check(f, nil); err != nil {
192 t.Error(err)
193 }
194}
195
196func sortedUnique(nums []uint32) []uint32 {
197 if len(nums) == 0 {
198 return nums
199 }
200 sort.Slice(nums, func(i, j int) bool { return nums[i] < nums[j] })
201 filtered := nums[:1]
202 for _, n := range nums[1:] {
203 if filtered[len(filtered)-1] != n {
204 filtered = append(filtered, n)
205 }
206 }
207 return filtered
208}
209
210func TestCondenseRuneOffsets(t *testing.T) {
211 for i, tc := range []struct {
212 arr []uint32
213 want []runeOffsetCorrection
214 }{
215 {[]uint32{}, []runeOffsetCorrection{}},
216 {[]uint32{0, 100, 200, 300, 400, 500}, []runeOffsetCorrection{}},
217 {[]uint32{0, 105, 205, 310, 420}, []runeOffsetCorrection{{100, 105}, {300, 310}, {400, 420}}},
218 } {
219 got := makeRuneOffsetMap(tc.arr)
220 if !reflect.DeepEqual(got, runeOffsetMap(tc.want)) {
221 t.Errorf("#%d: got %v, want %v", i, got, tc.want)
222 }
223 for j, byteOffset := range tc.arr {
224 runeOffset := uint32(j * runeOffsetFrequency)
225 gotByteOffset, _ := got.lookup(runeOffset)
226 if gotByteOffset != byteOffset {
227 t.Errorf("#%d: lookup(%v) got %v, want %v", i, runeOffset, gotByteOffset, byteOffset)
228 }
229 }
230 }
231}
232
233func TestRuneOffsetLookup(t *testing.T) {
234 for i, tc := range []struct {
235 r uint32
236 wantOff, wantLeft uint32
237 offsets []runeOffsetCorrection
238 }{
239 {0, 0, 0, nil},
240 {1234, 1200, 34, nil},
241 {5, 0, 5, []runeOffsetCorrection{{100, 105}, {400, 430}}},
242 {120, 105, 20, []runeOffsetCorrection{{100, 105}, {400, 430}}},
243 {1234, 1230, 34, []runeOffsetCorrection{{100, 105}, {400, 430}}},
244 } {
245 gotOff, gotLeft := runeOffsetMap(tc.offsets).lookup(tc.r)
246 if gotLeft != tc.wantLeft {
247 t.Errorf("#%d: got left=%v, want left=%v", i, gotLeft, tc.wantLeft)
248 }
249 if gotOff != tc.wantOff {
250 t.Errorf("#%d: got off=%v, want off=%v", i, gotOff, tc.wantOff)
251 }
252 }
253
254 m := runeOffsetMap([]runeOffsetCorrection{{100, 105}, {200, 210}, {400, 430}})
255 inputs := []uint32{0, 1, 99, 100, 101, 199, 200, 201, 300, 399, 400, 401, 510, 610}
256 wanted := []uint32{0, 0, 0, 105, 105, 105, 210, 210, 310, 310, 430, 430, 530, 630}
257 for i, v := range inputs {
258 got, _ := m.lookup(v)
259 if got != wanted[i] {
260 t.Errorf("got off=%v, want off=%v for map=%v", got, wanted[i], m)
261 }
262 }
263}
264
265func TestUnmarshalDocSections(t *testing.T) {
266 f := func(nums []uint32) bool {
267 nums = sortedUnique(nums)
268
269 ds := make([]DocumentSection, 0, len(nums)/2)
270 // Drop the unmatched last one in case of odd len(nums).
271 // Better than skipping the test entirely.
272 for i := 0; i+1 < len(nums); i += 2 {
273 ds = append(ds, DocumentSection{Start: nums[i], End: nums[i+1]})
274 }
275
276 data := marshalDocSections(ds)
277 got := unmarshalDocSections(data, nil)
278 if len(ds) == len(got) && len(ds) == 0 {
279 return true
280 }
281 if !reflect.DeepEqual(ds, got) {
282 t.Log(cmp.Diff(ds, got))
283 return false
284 }
285 return true
286 }
287 if err := quick.Check(f, nil); err != nil {
288 t.Error(err)
289 }
290}
291
292func BenchmarkUnmarshalDocSections(b *testing.B) {
293 rng := rand.New(rand.NewSource(0))
294
295 for size := 10; size <= 10000; size *= 10 {
296 ds := make([]DocumentSection, 0, size)
297 var last uint32
298 for i := 0; i < size; i++ {
299 var d DocumentSection
300 last += 1 + uint32(rng.Int31n(200))
301 d.Start = last
302 last += 1 + uint32(rng.Int31n(10))
303 d.End = last
304 ds = append(ds, d)
305 }
306 data := marshalDocSections(ds)
307
308 b.Run(strconv.Itoa(size), func(b *testing.B) {
309 b.ReportAllocs()
310 for i := 0; i < b.N; i++ {
311 if ds := unmarshalDocSections(data, nil); len(ds) != size {
312 b.Fatalf("wrong size: got %d, want %d", len(ds), size)
313 }
314 }
315 })
316 }
317}