fork of https://github.com/sourcegraph/zoekt
0

Configure Feed

Select the types of activity you want to include in your feed.

at main 12 kB View raw
1package index 2 3import ( 4 "bytes" 5 "fmt" 6 "testing" 7 "testing/quick" 8 "unicode/utf8" 9 10 "github.com/google/go-cmp/cmp" 11) 12 13func getNewlines(data []byte) newlines { 14 var locs []uint32 15 for i, c := range data { 16 if c == '\n' { 17 locs = append(locs, uint32(i)) 18 } 19 } 20 return newlines{ 21 locs: locs, 22 fileSize: uint32(len(data)), 23 } 24} 25 26func TestGetLines(t *testing.T) { 27 contents := [][]byte{ 28 []byte("one\ntwo\nthree\nfour"), 29 []byte("one\ntwo\nthree\nfour\n"), 30 []byte("one"), 31 []byte(""), 32 } 33 34 for _, content := range contents { 35 t.Run("", func(t *testing.T) { 36 newLines := getNewlines(content) 37 lines := bytes.SplitAfter(content, []byte{'\n'}) 38 if len(lines) > 0 && len(lines[len(lines)-1]) == 0 { 39 // A trailing newline does not delimit an empty line at the end of a file 40 lines = lines[:len(lines)-1] 41 } 42 wantGetLines := func(low, high int) []byte { 43 low-- 44 high-- 45 if low < 0 { 46 low = 0 47 } 48 if low >= len(lines) { 49 return nil 50 } 51 if high <= 0 { 52 return nil 53 } 54 if high > len(lines) { 55 high = len(lines) 56 } 57 return bytes.Join(lines[low:high], nil) 58 } 59 60 for low := -1; low <= len(lines)+2; low++ { 61 for high := low; high <= len(lines)+2; high++ { 62 want := wantGetLines(low, high) 63 got := newLines.getLines(content, low, high) 64 if d := cmp.Diff(string(want), string(got)); d != "" { 65 t.Fatal(d) 66 } 67 } 68 } 69 }) 70 } 71} 72 73func TestAtOffset(t *testing.T) { 74 cases := []struct { 75 data []byte 76 offset uint32 77 lineNumber int 78 lineStart uint32 79 lineEnd uint32 80 }{{ 81 data: []byte("0.2.4.\n7.9.11.\n"), 82 offset: 0, 83 lineNumber: 1, lineStart: 0, lineEnd: 7, 84 }, { 85 data: []byte("0.2.4.\n7.9.11.\n"), 86 offset: 6, 87 lineNumber: 1, lineStart: 0, lineEnd: 7, 88 }, { 89 data: []byte("0.2.4.\n7.9.11.\n"), 90 offset: 2, 91 lineNumber: 1, lineStart: 0, lineEnd: 7, 92 }, { 93 data: []byte("0.2.4.\n7.9.11.\n"), 94 offset: 2, 95 lineNumber: 1, lineStart: 0, lineEnd: 7, 96 }, { 97 data: []byte("0.2.4.\n7.9.11.\n"), 98 offset: 7, 99 lineNumber: 2, lineStart: 7, lineEnd: 15, 100 }, { 101 data: []byte("0.2.4.\n7.9.11.\n"), 102 offset: 11, 103 lineNumber: 2, lineStart: 7, lineEnd: 15, 104 }, { 105 data: []byte("0.2.4.\n7.9.11.\n"), 106 offset: 15, 107 lineNumber: 3, lineStart: 15, lineEnd: 15, 108 }, { 109 data: []byte("0.2.4.\n7.9.11."), 110 offset: 7, 111 lineNumber: 2, lineStart: 7, lineEnd: 14, 112 }, { 113 data: []byte("\n\n"), 114 offset: 0, 115 lineNumber: 1, lineStart: 0, lineEnd: 1, 116 }, { 117 data: []byte("\n\n"), 118 offset: 1, 119 lineNumber: 2, lineStart: 1, lineEnd: 2, 120 }, { 121 data: []byte("\n\n"), 122 offset: 3, 123 lineNumber: 3, lineStart: 2, lineEnd: 2, 124 }, { 125 data: []byte("line with no newlines"), 126 offset: 3, 127 lineNumber: 1, lineStart: 0, lineEnd: 21, 128 }} 129 130 for _, tt := range cases { 131 t.Run("", func(t *testing.T) { 132 nls := getNewlines(tt.data) 133 gotLineNumber := nls.atOffset(tt.offset) 134 if gotLineNumber != tt.lineNumber { 135 t.Fatalf("expected line number %d, got %d", tt.lineNumber, gotLineNumber) 136 } 137 if gotLineStart := nls.lineStart(gotLineNumber); gotLineStart != tt.lineStart { 138 t.Fatalf("expected line start %d, got %d", tt.lineStart, gotLineStart) 139 } 140 if gotLineEnd := nls.lineStart(gotLineNumber + 1); gotLineEnd != tt.lineEnd { 141 t.Fatalf("expected line end %d, got %d", tt.lineEnd, gotLineEnd) 142 } 143 }) 144 } 145} 146 147func TestLineBounds(t *testing.T) { 148 cases := []struct { 149 data []byte 150 lineNumber int 151 start uint32 152 end uint32 153 }{{ 154 data: []byte("0.2.4.\n7.9.11.\n"), 155 lineNumber: 1, 156 start: 0, end: 7, 157 }, { 158 data: []byte("0.2.4.\n7.9.11.\n"), 159 lineNumber: 2, 160 start: 7, end: 15, 161 }, { 162 data: []byte("0.2.4.\n7.9.11.\n"), 163 lineNumber: 0, 164 start: 0, end: 0, 165 }, { 166 data: []byte("0.2.4.\n7.9.11.\n"), 167 lineNumber: -1, 168 start: 0, end: 0, 169 }, { 170 data: []byte("0.2.4.\n7.9.11.\n"), 171 lineNumber: 202002, 172 start: 15, end: 15, 173 }, { 174 data: []byte("\n\n"), 175 lineNumber: 1, 176 start: 0, end: 1, 177 }, { 178 data: []byte("\n\n"), 179 lineNumber: 2, 180 start: 1, end: 2, 181 }, { 182 data: []byte("\n\n"), 183 lineNumber: 3, 184 start: 2, end: 2, 185 }} 186 187 for _, tt := range cases { 188 t.Run("", func(t *testing.T) { 189 nls := getNewlines(tt.data) 190 gotStart := nls.lineStart(tt.lineNumber) 191 if gotStart != tt.start { 192 t.Fatalf("expected line start %d, got %d", tt.start, gotStart) 193 } 194 gotEnd := nls.lineStart(tt.lineNumber + 1) 195 if gotEnd != tt.end { 196 t.Fatalf("expected line end %d, got %d", tt.end, gotEnd) 197 } 198 }) 199 } 200} 201 202func TestChunkMatches(t *testing.T) { 203 content := []byte(`0.2.4.6.8.10. 20413.16.19.22. 20526.29.32.35. 20639.42.45.48. 20752.55.58.61. 20865.68.71.74. 20978.81.84.87. 210`) 211 match_0_2 := &candidateMatch{byteOffset: 0, byteMatchSz: 2} 212 match_6_10 := &candidateMatch{byteOffset: 6, byteMatchSz: 4} 213 match_10_16 := &candidateMatch{byteOffset: 10, byteMatchSz: 6} 214 match_19_42 := &candidateMatch{byteOffset: 19, byteMatchSz: 23} 215 match_45_48 := &candidateMatch{byteOffset: 45, byteMatchSz: 3} 216 match_71_72 := &candidateMatch{byteOffset: 71, byteMatchSz: 1} 217 218 cases := []struct { 219 candidateMatches []*candidateMatch 220 numContextLines int 221 want []candidateChunk 222 }{{ 223 candidateMatches: []*candidateMatch{match_0_2}, 224 numContextLines: 0, 225 want: []candidateChunk{{ 226 firstLine: 1, 227 minOffset: 0, 228 lastLine: 1, 229 maxOffset: 2, 230 candidates: []*candidateMatch{match_0_2}, 231 }}, 232 }, { 233 candidateMatches: []*candidateMatch{match_0_2}, 234 numContextLines: 5, 235 want: []candidateChunk{{ 236 firstLine: 1, 237 minOffset: 0, 238 lastLine: 1, 239 maxOffset: 2, 240 candidates: []*candidateMatch{match_0_2}, 241 }}, 242 }, { 243 candidateMatches: []*candidateMatch{match_0_2, match_6_10}, 244 numContextLines: 0, 245 want: []candidateChunk{{ 246 firstLine: 1, 247 minOffset: 0, 248 lastLine: 1, 249 maxOffset: 10, 250 candidates: []*candidateMatch{match_0_2, match_6_10}, 251 }}, 252 }, { 253 candidateMatches: []*candidateMatch{match_0_2, match_10_16}, 254 numContextLines: 0, 255 want: []candidateChunk{{ 256 firstLine: 1, 257 minOffset: 0, 258 lastLine: 2, 259 maxOffset: 16, 260 candidates: []*candidateMatch{match_0_2, match_10_16}, 261 }}, 262 }, { 263 candidateMatches: []*candidateMatch{match_0_2, match_19_42}, 264 numContextLines: 0, 265 want: []candidateChunk{{ 266 firstLine: 1, 267 minOffset: 0, 268 lastLine: 1, 269 maxOffset: 2, 270 candidates: []*candidateMatch{match_0_2}, 271 }, { 272 firstLine: 2, 273 minOffset: 19, 274 lastLine: 4, 275 maxOffset: 42, 276 candidates: []*candidateMatch{match_19_42}, 277 }}, 278 }, { 279 candidateMatches: []*candidateMatch{match_0_2, match_19_42}, 280 numContextLines: 1, 281 want: []candidateChunk{{ 282 firstLine: 1, 283 minOffset: 0, 284 lastLine: 4, 285 maxOffset: 42, 286 candidates: []*candidateMatch{match_0_2, match_19_42}, 287 }}, 288 }, { 289 candidateMatches: []*candidateMatch{ 290 match_0_2, match_19_42, match_45_48, match_71_72, 291 }, 292 numContextLines: 0, 293 want: []candidateChunk{{ 294 firstLine: 1, 295 minOffset: 0, 296 lastLine: 1, 297 maxOffset: 2, 298 candidates: []*candidateMatch{match_0_2}, 299 }, { 300 firstLine: 2, 301 minOffset: 19, 302 lastLine: 4, 303 maxOffset: 48, 304 candidates: []*candidateMatch{match_19_42, match_45_48}, 305 }, { 306 firstLine: 6, 307 minOffset: 71, 308 lastLine: 6, 309 maxOffset: 72, 310 candidates: []*candidateMatch{match_71_72}, 311 }}, 312 }, { 313 candidateMatches: []*candidateMatch{ 314 match_0_2, match_19_42, match_45_48, match_71_72, 315 }, 316 numContextLines: 100, 317 want: []candidateChunk{{ 318 firstLine: 1, 319 minOffset: 0, 320 lastLine: 6, 321 maxOffset: 72, 322 candidates: []*candidateMatch{match_0_2, match_19_42, match_45_48, match_71_72}, 323 }}, 324 }} 325 326 newlines := getNewlines(content) 327 for _, tt := range cases { 328 t.Run("", func(t *testing.T) { 329 got := chunkCandidates(tt.candidateMatches, newlines, tt.numContextLines) 330 if diff := cmp.Diff(fmt.Sprintf("%#v\n", tt.want), fmt.Sprintf("%#v\n", got)); diff != "" { 331 t.Fatal(diff) 332 } 333 }) 334 } 335} 336 337func BenchmarkColumnHelper(b *testing.B) { 338 // We simulate looking up columns of evenly spaced matches 339 const matches = 10_000 340 const match = "match" 341 const space = " " 342 const dist = uint32(len(match) + len(space)) 343 data := bytes.Repeat([]byte(match+space), matches) 344 345 b.ResetTimer() 346 347 for i := 0; i < b.N; i++ { 348 columnHelper := columnHelper{data: data} 349 350 lineOffset := 0 351 offset := uint32(0) 352 for offset < uint32(len(data)) { 353 col := columnHelper.get(lineOffset, offset) 354 if col != offset+1 { 355 b.Fatal("column is not offset even though data is ASCII") 356 } 357 offset += dist 358 } 359 } 360} 361 362func TestColumnHelper(t *testing.T) { 363 f := func(line0, line1 string) bool { 364 data := []byte(line0 + line1) 365 lineOffset := len(line0) 366 367 columnHelper := columnHelper{data: data} 368 369 // We check every second rune returns the correct answer 370 offset := lineOffset 371 column := 1 372 for offset < len(data) { 373 if column%2 == 0 { 374 got := columnHelper.get(lineOffset, uint32(offset)) 375 if got != uint32(column) { 376 return false 377 } 378 } 379 _, size := utf8.DecodeRune(data[offset:]) 380 offset += size 381 column++ 382 } 383 384 return true 385 } 386 387 if err := quick.Check(f, nil); err != nil { 388 t.Fatal(err) 389 } 390 391 // Corner cases 392 393 // empty data, shouldn't happen but just in case it slips through 394 ch := columnHelper{data: nil} 395 if got := ch.get(0, 0); got != 1 { 396 t.Fatal("empty data didn't return 1", got) 397 } 398 399 // Repeating a call to get should return the same value 400 // empty data, shouldn't happen but just in case it slips through 401 ch = columnHelper{data: []byte("hello\nworld")} 402 if got := ch.get(6, 8); got != 3 { 403 t.Fatal("unexpected value for third column on second line", got) 404 } 405 if got := ch.get(6, 8); got != 3 { 406 t.Fatal("unexpected value for repeated call for third column on second line", got) 407 } 408 409 // Now make sure if we go backwards we do not incorrectly use the cache 410 if got := ch.get(6, 6); got != 1 { 411 t.Fatal("unexpected value for backwards call for first column on second line", got) 412 } 413} 414 415func TestFindMaxOverlappingSection(t *testing.T) { 416 secs := []DocumentSection{ 417 {Start: 0, End: 5}, 418 {Start: 8, End: 19}, 419 {Start: 22, End: 26}, 420 } 421 // 012345678901234567890123456 422 // [....[ 423 // [..........[ 424 // [...[ 425 426 testcases := []struct { 427 name string 428 off uint32 429 sz uint32 430 wantSecIdx uint32 431 wantOverlap bool 432 }{ 433 {off: 0, sz: 1, wantSecIdx: 0, wantOverlap: true}, 434 {off: 0, sz: 5, wantSecIdx: 0, wantOverlap: true}, 435 {off: 2, sz: 5, wantSecIdx: 0, wantOverlap: true}, 436 {off: 2, sz: 50, wantSecIdx: 1, wantOverlap: true}, 437 {off: 4, sz: 10, wantSecIdx: 1, wantOverlap: true}, 438 {off: 5, sz: 15, wantSecIdx: 1, wantOverlap: true}, 439 {off: 18, sz: 10, wantSecIdx: 2, wantOverlap: true}, 440 441 // Prefer full overlap, break ties by preferring the earlier section 442 {off: 10, sz: 20, wantSecIdx: 2, wantOverlap: true}, 443 {off: 0, sz: 100, wantSecIdx: 0, wantOverlap: true}, 444 {off: 8, sz: 100, wantSecIdx: 1, wantOverlap: true}, 445 {off: 0, sz: 10, wantSecIdx: 0, wantOverlap: true}, 446 {off: 16, sz: 10, wantSecIdx: 2, wantOverlap: true}, 447 448 // No overlap 449 {off: 5, sz: 2, wantOverlap: false}, 450 {off: 20, sz: 1, wantOverlap: false}, 451 {off: 99, sz: 1, wantOverlap: false}, 452 {off: 0, sz: 0, wantOverlap: false}, 453 } 454 455 for _, tt := range testcases { 456 t.Run(fmt.Sprintf("off=%d size=%d", tt.off, tt.sz), func(t *testing.T) { 457 got, haveOverlap := findMaxOverlappingSection(secs, tt.off, tt.sz) 458 if haveOverlap != tt.wantOverlap { 459 t.Fatalf("expected overlap %v, got %v", tt.wantOverlap, haveOverlap) 460 } 461 if got != tt.wantSecIdx { 462 t.Fatalf("expected section %d, got %d", tt.wantSecIdx, got) 463 } 464 }) 465 } 466}