fork of https://github.com/sourcegraph/zoekt
0

Configure Feed

Select the types of activity you want to include in your feed.

1package zoekt 2 3import ( 4 "bytes" 5 "fmt" 6 "testing" 7 "testing/quick" 8 "unicode/utf8" 9 10 "github.com/google/go-cmp/cmp" 11) 12 13func getNewlines(data []byte) newlines { 14 var locs []uint32 15 for i, c := range data { 16 if c == '\n' { 17 locs = append(locs, uint32(i)) 18 } 19 } 20 return newlines{ 21 locs: locs, 22 fileSize: uint32(len(data)), 23 } 24} 25 26func TestGetLines(t *testing.T) { 27 contents := [][]byte{ 28 []byte("one\ntwo\nthree\nfour"), 29 []byte("one\ntwo\nthree\nfour\n"), 30 []byte("one"), 31 []byte(""), 32 } 33 34 for _, content := range contents { 35 t.Run("", func(t *testing.T) { 36 newLines := getNewlines(content) 37 // Trim the last newline before splitting because a trailing newline does not constitute a new line 38 lines := bytes.Split(bytes.TrimSuffix(content, []byte{'\n'}), []byte{'\n'}) 39 wantGetLines := func(low, high int) []byte { 40 low-- 41 high-- 42 if low < 0 { 43 low = 0 44 } 45 if low >= len(lines) { 46 return nil 47 } 48 if high <= 0 { 49 return nil 50 } 51 if high > len(lines) { 52 high = len(lines) 53 } 54 return bytes.Join(lines[low:high], []byte{'\n'}) 55 } 56 57 for low := -1; low <= len(lines)+2; low++ { 58 for high := low; high <= len(lines)+2; high++ { 59 want := wantGetLines(low, high) 60 got := newLines.getLines(content, low, high) 61 if d := cmp.Diff(string(want), string(got)); d != "" { 62 t.Fatal(d) 63 } 64 } 65 } 66 }) 67 } 68} 69 70func TestAtOffset(t *testing.T) { 71 cases := []struct { 72 data []byte 73 offset uint32 74 lineNumber int 75 lineStart int 76 lineEnd int 77 }{{ 78 data: []byte("0.2.4.\n7.9.11.\n"), 79 offset: 0, 80 lineNumber: 1, lineStart: 0, lineEnd: 6, 81 }, { 82 data: []byte("0.2.4.\n7.9.11.\n"), 83 offset: 6, 84 lineNumber: 1, lineStart: 0, lineEnd: 6, 85 }, { 86 data: []byte("0.2.4.\n7.9.11.\n"), 87 offset: 2, 88 lineNumber: 1, lineStart: 0, lineEnd: 6, 89 }, { 90 data: []byte("0.2.4.\n7.9.11.\n"), 91 offset: 2, 92 lineNumber: 1, lineStart: 0, lineEnd: 6, 93 }, { 94 data: []byte("0.2.4.\n7.9.11.\n"), 95 offset: 7, 96 lineNumber: 2, lineStart: 7, lineEnd: 14, 97 }, { 98 data: []byte("0.2.4.\n7.9.11.\n"), 99 offset: 11, 100 lineNumber: 2, lineStart: 7, lineEnd: 14, 101 }, { 102 data: []byte("0.2.4.\n7.9.11.\n"), 103 offset: 15, 104 lineNumber: 3, lineStart: 15, lineEnd: 15, 105 }, { 106 data: []byte("0.2.4.\n7.9.11."), 107 offset: 7, 108 lineNumber: 2, lineStart: 7, lineEnd: 14, 109 }, { 110 data: []byte("\n\n"), 111 offset: 0, 112 lineNumber: 1, lineStart: 0, lineEnd: 0, 113 }, { 114 data: []byte("\n\n"), 115 offset: 1, 116 lineNumber: 2, lineStart: 1, lineEnd: 1, 117 }, { 118 data: []byte("\n\n"), 119 offset: 3, 120 lineNumber: 3, lineStart: 2, lineEnd: 2, 121 }, { 122 data: []byte("line with no newlines"), 123 offset: 3, 124 lineNumber: 1, lineStart: 0, lineEnd: 21, 125 }} 126 127 for _, tt := range cases { 128 t.Run("", func(t *testing.T) { 129 nls := getNewlines(tt.data) 130 gotLineNumber, gotLineStart, gotLineEnd := nls.atOffset(tt.offset) 131 if gotLineNumber != tt.lineNumber { 132 t.Fatalf("expected line number %d, got %d", tt.lineNumber, gotLineNumber) 133 } 134 if gotLineStart != tt.lineStart { 135 t.Fatalf("expected line start %d, got %d", tt.lineStart, gotLineStart) 136 } 137 if gotLineEnd != tt.lineEnd { 138 t.Fatalf("expected line end %d, got %d", tt.lineEnd, gotLineEnd) 139 } 140 }) 141 } 142} 143 144func TestLineBounds(t *testing.T) { 145 cases := []struct { 146 data []byte 147 lineNumber int 148 start uint32 149 end uint32 150 }{{ 151 data: []byte("0.2.4.\n7.9.11.\n"), 152 lineNumber: 1, 153 start: 0, end: 6, 154 }, { 155 data: []byte("0.2.4.\n7.9.11.\n"), 156 lineNumber: 2, 157 start: 7, end: 14, 158 }, { 159 data: []byte("0.2.4.\n7.9.11.\n"), 160 lineNumber: 0, 161 start: 0, end: 0, 162 }, { 163 data: []byte("0.2.4.\n7.9.11.\n"), 164 lineNumber: -1, 165 start: 0, end: 0, 166 }, { 167 data: []byte("0.2.4.\n7.9.11.\n"), 168 lineNumber: 202002, 169 start: 15, end: 15, 170 }, { 171 data: []byte("\n\n"), 172 lineNumber: 1, 173 start: 0, end: 0, 174 }, { 175 data: []byte("\n\n"), 176 lineNumber: 2, 177 start: 1, end: 1, 178 }, { 179 data: []byte("\n\n"), 180 lineNumber: 3, 181 start: 2, end: 2, 182 }} 183 184 for _, tt := range cases { 185 t.Run("", func(t *testing.T) { 186 nls := getNewlines(tt.data) 187 gotStart, gotEnd := nls.lineBounds(tt.lineNumber) 188 if gotStart != tt.start { 189 t.Fatalf("expected line start %d, got %d", tt.start, gotStart) 190 } 191 if gotEnd != tt.end { 192 t.Fatalf("expected line end %d, got %d", tt.end, gotEnd) 193 } 194 }) 195 } 196} 197 198func TestChunkMatches(t *testing.T) { 199 content := []byte(`0.2.4.6.8.10. 20013.16.19.22. 20126.29.32.35. 20239.42.45.48. 20352.55.58.61. 20465.68.71.74. 20578.81.84.87. 206`) 207 match_0_2 := &candidateMatch{byteOffset: 0, byteMatchSz: 2} 208 match_6_10 := &candidateMatch{byteOffset: 6, byteMatchSz: 4} 209 match_10_16 := &candidateMatch{byteOffset: 10, byteMatchSz: 6} 210 match_19_42 := &candidateMatch{byteOffset: 19, byteMatchSz: 23} 211 match_45_48 := &candidateMatch{byteOffset: 45, byteMatchSz: 3} 212 match_71_72 := &candidateMatch{byteOffset: 71, byteMatchSz: 1} 213 214 cases := []struct { 215 candidateMatches []*candidateMatch 216 numContextLines int 217 want []candidateChunk 218 }{{ 219 candidateMatches: []*candidateMatch{match_0_2}, 220 numContextLines: 0, 221 want: []candidateChunk{{ 222 firstLine: 1, 223 minOffset: 0, 224 lastLine: 1, 225 maxOffset: 2, 226 candidates: []*candidateMatch{match_0_2}, 227 }}, 228 }, { 229 candidateMatches: []*candidateMatch{match_0_2}, 230 numContextLines: 5, 231 want: []candidateChunk{{ 232 firstLine: 1, 233 minOffset: 0, 234 lastLine: 1, 235 maxOffset: 2, 236 candidates: []*candidateMatch{match_0_2}, 237 }}, 238 }, { 239 candidateMatches: []*candidateMatch{match_0_2, match_6_10}, 240 numContextLines: 0, 241 want: []candidateChunk{{ 242 firstLine: 1, 243 minOffset: 0, 244 lastLine: 1, 245 maxOffset: 10, 246 candidates: []*candidateMatch{match_0_2, match_6_10}, 247 }}, 248 }, { 249 candidateMatches: []*candidateMatch{match_0_2, match_10_16}, 250 numContextLines: 0, 251 want: []candidateChunk{{ 252 firstLine: 1, 253 minOffset: 0, 254 lastLine: 2, 255 maxOffset: 16, 256 candidates: []*candidateMatch{match_0_2, match_10_16}, 257 }}, 258 }, { 259 candidateMatches: []*candidateMatch{match_0_2, match_19_42}, 260 numContextLines: 0, 261 want: []candidateChunk{{ 262 firstLine: 1, 263 minOffset: 0, 264 lastLine: 1, 265 maxOffset: 2, 266 candidates: []*candidateMatch{match_0_2}, 267 }, { 268 firstLine: 2, 269 minOffset: 19, 270 lastLine: 4, 271 maxOffset: 42, 272 candidates: []*candidateMatch{match_19_42}, 273 }}, 274 }, { 275 candidateMatches: []*candidateMatch{match_0_2, match_19_42}, 276 numContextLines: 1, 277 want: []candidateChunk{{ 278 firstLine: 1, 279 minOffset: 0, 280 lastLine: 4, 281 maxOffset: 42, 282 candidates: []*candidateMatch{match_0_2, match_19_42}, 283 }}, 284 }, { 285 candidateMatches: []*candidateMatch{ 286 match_0_2, match_19_42, match_45_48, match_71_72, 287 }, 288 numContextLines: 0, 289 want: []candidateChunk{{ 290 firstLine: 1, 291 minOffset: 0, 292 lastLine: 1, 293 maxOffset: 2, 294 candidates: []*candidateMatch{match_0_2}, 295 }, { 296 firstLine: 2, 297 minOffset: 19, 298 lastLine: 4, 299 maxOffset: 48, 300 candidates: []*candidateMatch{match_19_42, match_45_48}, 301 }, { 302 firstLine: 6, 303 minOffset: 71, 304 lastLine: 6, 305 maxOffset: 72, 306 candidates: []*candidateMatch{match_71_72}, 307 }}, 308 }, { 309 candidateMatches: []*candidateMatch{ 310 match_0_2, match_19_42, match_45_48, match_71_72, 311 }, 312 numContextLines: 100, 313 want: []candidateChunk{{ 314 firstLine: 1, 315 minOffset: 0, 316 lastLine: 6, 317 maxOffset: 72, 318 candidates: []*candidateMatch{match_0_2, match_19_42, match_45_48, match_71_72}, 319 }}, 320 }} 321 322 newlines := getNewlines(content) 323 for _, tt := range cases { 324 t.Run("", func(t *testing.T) { 325 got := chunkCandidates(tt.candidateMatches, newlines, tt.numContextLines) 326 if diff := cmp.Diff(fmt.Sprintf("%#v\n", tt.want), fmt.Sprintf("%#v\n", got)); diff != "" { 327 t.Fatal(diff) 328 } 329 }) 330 } 331} 332 333func BenchmarkColumnHelper(b *testing.B) { 334 // We simulate looking up columns of evenly spaced matches 335 const matches = 10_000 336 const match = "match" 337 const space = " " 338 const dist = uint32(len(match) + len(space)) 339 data := bytes.Repeat([]byte(match+space), matches) 340 341 b.ResetTimer() 342 343 for i := 0; i < b.N; i++ { 344 columnHelper := columnHelper{data: data} 345 346 lineOffset := 0 347 offset := uint32(0) 348 for offset < uint32(len(data)) { 349 col := columnHelper.get(lineOffset, offset) 350 if col != offset+1 { 351 b.Fatal("column is not offset even though data is ASCII") 352 } 353 offset += dist 354 } 355 } 356} 357 358func TestColumnHelper(t *testing.T) { 359 f := func(line0, line1 string) bool { 360 data := []byte(line0 + line1) 361 lineOffset := len(line0) 362 363 columnHelper := columnHelper{data: data} 364 365 // We check every second rune returns the correct answer 366 offset := lineOffset 367 column := 1 368 for offset < len(data) { 369 if column%2 == 0 { 370 got := columnHelper.get(lineOffset, uint32(offset)) 371 if got != uint32(column) { 372 return false 373 } 374 } 375 _, size := utf8.DecodeRune(data[offset:]) 376 offset += size 377 column++ 378 } 379 380 return true 381 } 382 383 if err := quick.Check(f, nil); err != nil { 384 t.Fatal(err) 385 } 386 387 // Corner cases 388 389 // empty data, shouldn't happen but just in case it slips through 390 ch := columnHelper{data: nil} 391 if got := ch.get(0, 0); got != 1 { 392 t.Fatal("empty data didn't return 1", got) 393 } 394 395 // Repeating a call to get should return the same value 396 // empty data, shouldn't happen but just in case it slips through 397 ch = columnHelper{data: []byte("hello\nworld")} 398 if got := ch.get(6, 8); got != 3 { 399 t.Fatal("unexpected value for third column on second line", got) 400 } 401 if got := ch.get(6, 8); got != 3 { 402 t.Fatal("unexpected value for repeated call for third column on second line", got) 403 } 404 405 // Now make sure if we go backwards we do not incorrectly use the cache 406 if got := ch.get(6, 6); got != 1 { 407 t.Fatal("unexpected value for backwards call for first column on second line", got) 408 } 409} 410 411func TestFindMaxOverlappingSection(t *testing.T) { 412 secs := []DocumentSection{ 413 {Start: 0, End: 5}, 414 {Start: 8, End: 19}, 415 {Start: 22, End: 26}, 416 } 417 // 012345678901234567890123456 418 // [....[ 419 // [..........[ 420 // [...[ 421 422 testcases := []struct { 423 name string 424 off uint32 425 sz uint32 426 wantSecIdx uint32 427 wantOverlap bool 428 }{ 429 {off: 0, sz: 1, wantSecIdx: 0, wantOverlap: true}, 430 {off: 0, sz: 5, wantSecIdx: 0, wantOverlap: true}, 431 {off: 2, sz: 5, wantSecIdx: 0, wantOverlap: true}, 432 {off: 2, sz: 50, wantSecIdx: 1, wantOverlap: true}, 433 {off: 4, sz: 10, wantSecIdx: 1, wantOverlap: true}, 434 {off: 5, sz: 15, wantSecIdx: 1, wantOverlap: true}, 435 {off: 18, sz: 10, wantSecIdx: 2, wantOverlap: true}, 436 437 // Prefer full overlap, break ties by preferring the earlier section 438 {off: 10, sz: 20, wantSecIdx: 2, wantOverlap: true}, 439 {off: 0, sz: 100, wantSecIdx: 0, wantOverlap: true}, 440 {off: 8, sz: 100, wantSecIdx: 1, wantOverlap: true}, 441 {off: 0, sz: 10, wantSecIdx: 0, wantOverlap: true}, 442 {off: 16, sz: 10, wantSecIdx: 2, wantOverlap: true}, 443 444 // No overlap 445 {off: 5, sz: 2, wantOverlap: false}, 446 {off: 20, sz: 1, wantOverlap: false}, 447 {off: 99, sz: 1, wantOverlap: false}, 448 {off: 0, sz: 0, wantOverlap: false}, 449 } 450 451 for _, tt := range testcases { 452 t.Run(fmt.Sprintf("off=%d size=%d", tt.off, tt.sz), func(t *testing.T) { 453 got, haveOverlap := findMaxOverlappingSection(secs, tt.off, tt.sz) 454 if haveOverlap != tt.wantOverlap { 455 t.Fatalf("expected overlap %v, got %v", tt.wantOverlap, haveOverlap) 456 } 457 if got != tt.wantSecIdx { 458 t.Fatalf("expected section %d, got %d", tt.wantSecIdx, got) 459 } 460 }) 461 } 462}