fork of https://github.com/sourcegraph/zoekt
0

Configure Feed

Select the types of activity you want to include in your feed.

Simplify trigram selection in distanceHitIterator (#782)

Follow up to #779. This PR removes the logic for trigrams with the same
frequency, because it will no longer have a big effect.

+16 -36
+16 -36
indexdata.go
··· 320 320 return sz 321 321 } 322 322 323 - const maxUInt32 = 0xffffffff 324 - 325 - func min2Index(xs []uint32) (idx0, idx1 int) { 326 - min0, min1 := uint32(maxUInt32), uint32(maxUInt32) 327 - for i, x := range xs { 328 - if x <= min0 { 329 - idx0, idx1 = i, idx0 330 - min0, min1 = x, min0 331 - } else if x <= min1 { 332 - idx1 = i 333 - min1 = x 334 - } 335 - } 336 - return 337 - } 338 - 339 323 // findSelectiveNgrams returns two ngrams to pass to the distance iterator, chosen to 340 - // produce a small file intersection. It finds the two lowest frequency ngrams, making 341 - // sure to maximize the distance between them in case of ties. It avoids overlapping 342 - // trigrams to keep their intersection as small as possible. 324 + // produce a small file intersection. It finds the two lowest frequency ngrams, but avoids 325 + // overlapping trigrams to keep their intersection as small as possible. 343 326 // 344 327 // Invariant: first will always have a smaller index than last. 345 328 func findSelectiveNgrams(ngramOffs []runeNgramOff, indexMap []int, frequencies []uint32) (first, last runeNgramOff) { ··· 361 344 return 362 345 } 363 346 347 + const maxUInt32 = 0xffffffff 348 + 364 349 func minFrequencyNgramOffsets(ngramOffs []runeNgramOff, frequencies []uint32) (first, last runeNgramOff) { 365 - firstI, lastI := min2Index(frequencies) 366 - // If the frequencies are equal lets maximise distance in the query 367 - // string. This optimization normally triggers for long repeated trigrams 368 - // in a string, eg a query like "AAAAA..." 369 - if frequencies[firstI] == frequencies[lastI] { 370 - for i, freq := range frequencies { 371 - if freq != frequencies[firstI] { 372 - continue 373 - } 374 - if ngramOffs[i].index < ngramOffs[firstI].index { 375 - firstI = i 376 - } 377 - if ngramOffs[i].index > ngramOffs[lastI].index { 378 - lastI = i 379 - } 350 + // Find the two lowest frequency ngrams. 351 + idx0, idx1 := 0, 0 352 + min0, min1 := uint32(maxUInt32), uint32(maxUInt32) 353 + for i, x := range frequencies { 354 + if x <= min0 { 355 + idx0, idx1 = i, idx0 356 + min0, min1 = x, min0 357 + } else if x <= min1 { 358 + idx1 = i 359 + min1 = x 380 360 } 381 361 } 382 362 383 - first = ngramOffs[firstI] 384 - last = ngramOffs[lastI] 363 + first = ngramOffs[idx0] 364 + last = ngramOffs[idx1] 385 365 386 366 // Ensure first appears before last as a helpful invariant. 387 367 if first.index > last.index {