fork of https://github.com/sourcegraph/zoekt
0

Configure Feed

Select the types of activity you want to include in your feed.

1// Copyright 2016 Google Inc. All rights reserved. 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15package zoekt 16 17import ( 18 "bytes" 19 "fmt" 20) 21 22// candidateMatch is a candidate match for a substring. 23type candidateMatch struct { 24 caseSensitive bool 25 fileName bool 26 symbol bool 27 symbolIdx uint32 28 29 substrBytes []byte 30 substrLowered []byte 31 32 file uint32 33 34 // Offsets are relative to the start of the filename or file contents. 35 runeOffset uint32 36 byteOffset uint32 37 byteMatchSz uint32 38} 39 40// Matches content against the substring, and populates byteMatchSz on success 41func (m *candidateMatch) matchContent(content []byte) bool { 42 if m.caseSensitive { 43 comp := bytes.Equal(m.substrBytes, content[m.byteOffset:m.byteOffset+uint32(len(m.substrBytes))]) 44 45 m.byteMatchSz = uint32(len(m.substrBytes)) 46 return comp 47 } else { 48 // It is tempting to try a simple ASCII based 49 // comparison if possible, but we need more 50 // information. Simple ASCII chars have unicode upper 51 // case variants (the ASCII 'k' has the Kelvin symbol 52 // as upper case variant). We can only degrade to 53 // ASCII if we are sure that both the corpus and the 54 // query is ASCII only 55 sz, ok := caseFoldingEqualsRunes(m.substrLowered, content[m.byteOffset:]) 56 m.byteMatchSz = uint32(sz) 57 return ok 58 } 59} 60 61// matchIterator is a docIterator that produces candidateMatches for a given document 62type matchIterator interface { 63 docIterator 64 65 candidates() []*candidateMatch 66 updateStats(*Stats) 67} 68 69// noMatchTree is both matchIterator and matchTree that matches nothing. 70type noMatchTree struct { 71 Why string 72} 73 74func (t *noMatchTree) String() string { 75 return fmt.Sprintf("not(%q)", t.Why) 76} 77 78func (t *noMatchTree) candidates() []*candidateMatch { 79 return nil 80} 81 82func (t *noMatchTree) nextDoc() uint32 { 83 return maxUInt32 84} 85 86func (t *noMatchTree) prepare(uint32) {} 87 88func (t *noMatchTree) matches(cp *contentProvider, cost int, known map[matchTree]bool) (bool, bool) { 89 return false, true 90} 91 92func (t *noMatchTree) updateStats(*Stats) {} 93 94func (m *candidateMatch) String() string { 95 return fmt.Sprintf("%d:%d", m.file, m.runeOffset) 96} 97 98type ngramDocIterator struct { 99 leftPad uint32 100 rightPad uint32 101 102 iter hitIterator 103 ends []uint32 104 105 // mutable 106 fileIdx uint32 107 matchCount int 108} 109 110// nextFileIndex returns the smallest index j of ends such that 111// ends[j] > offset, assuming ends[f] <= offset. 112func nextFileIndex(offset, f uint32, ends []uint32) uint32 { 113 d := uint32(1) 114 for f < uint32(len(ends)) && ends[f] <= offset { 115 if f+d < uint32(len(ends)) && ends[f+d] <= offset { 116 f += d 117 d *= 2 118 } else if d > 1 { 119 d = d/4 + 1 120 } else { 121 f++ 122 } 123 } 124 return f 125} 126 127func (i *ngramDocIterator) nextDoc() uint32 { 128 i.fileIdx = nextFileIndex(i.iter.first(), i.fileIdx, i.ends) 129 if i.fileIdx >= uint32(len(i.ends)) { 130 return maxUInt32 131 } 132 return i.fileIdx 133} 134 135func (i *ngramDocIterator) String() string { 136 return fmt.Sprintf("ngram(L=%d,R=%d,%v)", i.leftPad, i.rightPad, i.iter) 137} 138 139func (i *ngramDocIterator) prepare(nextDoc uint32) { 140 var start uint32 141 if nextDoc > 0 { 142 start = i.ends[nextDoc-1] 143 } 144 if start > 0 { 145 i.iter.next(start + i.leftPad - 1) 146 } 147 i.fileIdx = nextDoc 148} 149 150func (i *ngramDocIterator) updateStats(s *Stats) { 151 i.iter.updateStats(s) 152 s.NgramMatches += i.matchCount 153} 154 155func (i *ngramDocIterator) candidates() []*candidateMatch { 156 if i.fileIdx >= uint32(len(i.ends)) { 157 return nil 158 } 159 160 var fileStart uint32 161 if i.fileIdx > 0 { 162 fileStart = i.ends[i.fileIdx-1] 163 } 164 fileEnd := i.ends[i.fileIdx] 165 166 var candidates []*candidateMatch 167 for { 168 p1 := i.iter.first() 169 if p1 == maxUInt32 || p1 >= i.ends[i.fileIdx] { 170 break 171 } 172 i.iter.next(p1) 173 174 if p1 < i.leftPad+fileStart || p1+i.rightPad > fileEnd { 175 continue 176 } 177 178 candidates = append(candidates, &candidateMatch{ 179 file: uint32(i.fileIdx), 180 runeOffset: p1 - fileStart - i.leftPad, 181 }) 182 } 183 i.matchCount += len(candidates) 184 return candidates 185}