···320320 return sz
321321}
322322323323-const maxUInt32 = 0xffffffff
324324-325325-func min2Index(xs []uint32) (idx0, idx1 int) {
326326- min0, min1 := uint32(maxUInt32), uint32(maxUInt32)
327327- for i, x := range xs {
328328- if x <= min0 {
329329- idx0, idx1 = i, idx0
330330- min0, min1 = x, min0
331331- } else if x <= min1 {
332332- idx1 = i
333333- min1 = x
334334- }
335335- }
336336- return
337337-}
338338-339323// findSelectiveNgrams returns two ngrams to pass to the distance iterator, chosen to
340340-// produce a small file intersection. It finds the two lowest frequency ngrams, making
341341-// sure to maximize the distance between them in case of ties. It avoids overlapping
342342-// trigrams to keep their intersection as small as possible.
324324+// produce a small file intersection. It finds the two lowest frequency ngrams, but avoids
325325+// overlapping trigrams to keep their intersection as small as possible.
343326//
344327// Invariant: first will always have a smaller index than last.
345328func findSelectiveNgrams(ngramOffs []runeNgramOff, indexMap []int, frequencies []uint32) (first, last runeNgramOff) {
···361344 return
362345}
363346347347+const maxUInt32 = 0xffffffff
348348+364349func minFrequencyNgramOffsets(ngramOffs []runeNgramOff, frequencies []uint32) (first, last runeNgramOff) {
365365- firstI, lastI := min2Index(frequencies)
366366- // If the frequencies are equal lets maximise distance in the query
367367- // string. This optimization normally triggers for long repeated trigrams
368368- // in a string, eg a query like "AAAAA..."
369369- if frequencies[firstI] == frequencies[lastI] {
370370- for i, freq := range frequencies {
371371- if freq != frequencies[firstI] {
372372- continue
373373- }
374374- if ngramOffs[i].index < ngramOffs[firstI].index {
375375- firstI = i
376376- }
377377- if ngramOffs[i].index > ngramOffs[lastI].index {
378378- lastI = i
379379- }
350350+ // Find the two lowest frequency ngrams.
351351+ idx0, idx1 := 0, 0
352352+ min0, min1 := uint32(maxUInt32), uint32(maxUInt32)
353353+ for i, x := range frequencies {
354354+ if x <= min0 {
355355+ idx0, idx1 = i, idx0
356356+ min0, min1 = x, min0
357357+ } else if x <= min1 {
358358+ idx1 = i
359359+ min1 = x
380360 }
381361 }
382362383383- first = ngramOffs[firstI]
384384- last = ngramOffs[lastI]
363363+ first = ngramOffs[idx0]
364364+ last = ngramOffs[idx1]
385365386366 // Ensure first appears before last as a helpful invariant.
387367 if first.index > last.index {