fork of https://github.com/sourcegraph/zoekt
0

Configure Feed

Select the types of activity you want to include in your feed.

build: better support for symbols on the same line (#397)

The order in which universal-ctags outputs symbols reflects the
hierarchy defined by the detected language. In general, we cannot assume
that symbols on the same line appear in ctags in the order according to
their byte offset.

With this change we tolerate any order of symbols as long as they don't
overlap.

We should now detect more symbols at the cost of more CPU.

author
Stefan Hengl
committer
GitHub
date (Jul 14, 2022, 11:14 PM +0200) commit 94ab14fe parent d441288d
+184 -31
+43 -27
build/ctags.go
··· 206 206 return nil 207 207 } 208 208 209 + // overlaps finds the proper position to insert a zoekt.DocumentSection with 210 + // "start and "end" into "symOffsets". It returns -1 if the new section overlaps 211 + // with one of the existing ones. 212 + func overlaps(symOffsets []zoekt.DocumentSection, start, end uint32) int { 213 + var i = 0 214 + for i = len(symOffsets) - 1; i >= 0; i-- { 215 + // The most common case is that we exit here, because symOffsets is sorted by 216 + // construction and start is in many cases monotonically increasing. 217 + if start >= symOffsets[i].End { 218 + break 219 + } 220 + if end <= symOffsets[i].Start { 221 + continue 222 + } 223 + // overlap 224 + return -1 225 + } 226 + return i + 1 227 + } 228 + 229 + // tagsToSections converts ctags entries to byte ranges (zoekt.DocumentSection) 230 + // with corresponding metadata (zoekt.Symbol). 209 231 func tagsToSections(content []byte, tags []*ctags.Entry) ([]zoekt.DocumentSection, []*zoekt.Symbol, error) { 210 232 nls := newLinesIndices(content) 211 233 nls = append(nls, uint32(len(content))) 212 234 var symOffsets []zoekt.DocumentSection 213 235 var symMetaData []*zoekt.Symbol 214 - var lastEnd uint32 215 - var lastLine int 216 - var lastIntraEnd int 236 + 217 237 for _, t := range tags { 218 238 if t.Line <= 0 { 219 239 // Observed this with a .JS file. ··· 231 251 232 252 end := nls[lineIdx] 233 253 line := content[lineOff:end] 234 - if lastLine == lineIdx { 235 - line = line[lastIntraEnd:] 236 - } else { 237 - lastIntraEnd = 0 238 - } 239 254 240 - intraOff := lastIntraEnd + bytes.Index(line, []byte(t.Name)) 255 + // This is best-effort only. For short symbol names, we will often determine the 256 + // wrong offset. 257 + intraOff := bytes.Index(line, []byte(t.Name)) 241 258 if intraOff < 0 { 242 259 // for Go code, this is very common, since 243 260 // ctags barfs on multi-line declarations 244 261 continue 245 262 } 263 + 246 264 start := lineOff + uint32(intraOff) 247 - if start < lastEnd { 248 - // This can happen if we have multiple tags on the same line. 249 - // Give up. 265 + endSym := start + uint32(len(t.Name)) 266 + 267 + i := overlaps(symOffsets, start, endSym) 268 + if i == -1 { 269 + // Detected an overlap. Give up. 250 270 continue 251 271 } 252 272 253 - endSym := start + uint32(len(t.Name)) 254 - 255 - symOffsets = append(symOffsets, zoekt.DocumentSection{ 256 - Start: start, 257 - End: endSym, 258 - }) 259 - symMetaData = append(symMetaData, &zoekt.Symbol{ 260 - Sym: t.Name, 261 - Kind: t.Kind, 262 - Parent: t.Parent, 263 - ParentKind: t.ParentKind, 264 - }) 265 - lastEnd = endSym 266 - lastLine = lineIdx 267 - lastIntraEnd = intraOff + len(t.Name) 273 + symOffsets = append( 274 + symOffsets[:i], 275 + append([]zoekt.DocumentSection{{Start: start, End: endSym}}, symOffsets[i:]...)..., 276 + ) 277 + symMetaData = append( 278 + symMetaData[:i], 279 + append( 280 + []*zoekt.Symbol{{Sym: t.Name, Kind: t.Kind, Parent: t.Parent, ParentKind: t.ParentKind}}, 281 + symMetaData[i:]..., 282 + )..., 283 + ) 268 284 } 269 285 270 286 return symOffsets, symMetaData, nil
+141 -4
build/ctags_test.go
··· 44 44 } 45 45 46 46 func TestTagsToSectionsMultiple(t *testing.T) { 47 - c := []byte("class Foob { int x; int b; }") 48 - // ----------012345678901234567890123456789 47 + c := []byte("class Foo { int x; int b; }") 48 + // ----------012345678901234567890123456 49 49 50 50 tags := []*ctags.Entry{ 51 51 { ··· 64 64 } 65 65 66 66 want := []zoekt.DocumentSection{ 67 - {Start: 17, End: 18}, 68 - {Start: 24, End: 25}, 67 + {Start: 16, End: 17}, 68 + {Start: 23, End: 24}, 69 + } 70 + if !reflect.DeepEqual(got, want) { 71 + t.Errorf("got %v, want %v", got, want) 72 + } 73 + } 74 + 75 + func TestTagsToSectionsReverse(t *testing.T) { 76 + c := []byte("typedef enum { FOO, BAR } bas\n") 77 + // ----------01234567890123456789012345678 78 + 79 + tags := []*ctags.Entry{ 80 + { 81 + Name: "bas", 82 + Line: 1, 83 + }, 84 + { 85 + Name: "FOO", 86 + Line: 1, 87 + }, 88 + { 89 + Name: "BAR", 90 + Line: 1, 91 + }, 92 + } 93 + 94 + got, _, err := tagsToSections(c, tags) 95 + if err != nil { 96 + t.Fatal("tagsToSections", err) 97 + } 98 + 99 + want := []zoekt.DocumentSection{ 100 + {Start: 15, End: 18}, 101 + {Start: 20, End: 23}, 102 + {Start: 26, End: 29}, 69 103 } 70 104 if !reflect.DeepEqual(got, want) { 71 105 t.Errorf("got %v, want %v", got, want) ··· 92 126 t.Fatalf("got %#v, want 1 section (17,20)", secs) 93 127 } 94 128 } 129 + 130 + func TestOverlaps(t *testing.T) { 131 + tests := []struct { 132 + documentSections []zoekt.DocumentSection 133 + start uint32 134 + end uint32 135 + pos int 136 + }{ 137 + // 138 + // overlap 139 + // 140 + { 141 + documentSections: []zoekt.DocumentSection{{0, 3}, {5, 10}}, 142 + start: 6, 143 + end: 9, 144 + pos: -1, 145 + }, 146 + { 147 + documentSections: []zoekt.DocumentSection{{0, 3}, {5, 10}}, 148 + start: 6, 149 + end: 12, 150 + pos: -1, 151 + }, 152 + { 153 + documentSections: []zoekt.DocumentSection{{0, 3}, {5, 10}}, 154 + start: 4, 155 + end: 9, 156 + pos: -1, 157 + }, 158 + { 159 + documentSections: []zoekt.DocumentSection{{0, 3}, {5, 10}}, 160 + start: 1, 161 + end: 9, 162 + pos: -1, 163 + }, 164 + { 165 + documentSections: []zoekt.DocumentSection{{0, 3}, {5, 10}}, 166 + start: 0, 167 + end: 25, 168 + pos: -1, 169 + }, 170 + { 171 + documentSections: []zoekt.DocumentSection{{0, 3}}, 172 + start: 0, 173 + end: 1, 174 + pos: -1, 175 + }, 176 + // 177 + // NO overlap 178 + // 179 + { 180 + documentSections: []zoekt.DocumentSection{{2, 3}, {5, 10}}, 181 + start: 0, 182 + end: 2, 183 + pos: 0, 184 + }, 185 + { 186 + documentSections: []zoekt.DocumentSection{{0, 3}, {5, 10}}, 187 + start: 3, 188 + end: 4, 189 + pos: 1, 190 + }, 191 + { 192 + documentSections: []zoekt.DocumentSection{{0, 3}, {5, 10}}, 193 + start: 3, 194 + end: 5, 195 + pos: 1, 196 + }, 197 + { 198 + documentSections: []zoekt.DocumentSection{{0, 3}, {5, 10}}, 199 + start: 11, 200 + end: 14, 201 + pos: 2, 202 + }, 203 + { 204 + documentSections: []zoekt.DocumentSection{{0, 3}, {5, 10}, {14, 15}}, 205 + start: 11, 206 + end: 13, 207 + pos: 2, 208 + }, 209 + { 210 + documentSections: []zoekt.DocumentSection{{0, 3}, {5, 10}, {14, 15}}, 211 + start: 18, 212 + end: 19, 213 + pos: 3, 214 + }, 215 + { 216 + documentSections: nil, 217 + start: 1, 218 + end: 3, 219 + pos: 0, 220 + }, 221 + } 222 + 223 + for _, tt := range tests { 224 + t.Run("", func(t *testing.T) { 225 + got := overlaps(tt.documentSections, tt.start, tt.end) 226 + if got != tt.pos { 227 + t.Fatalf("want %d, got %d", tt.pos, got) 228 + } 229 + }) 230 + } 231 + }