fork of https://github.com/sourcegraph/zoekt
0

Configure Feed

Select the types of activity you want to include in your feed.

make LineMatches only contain single lines

Fixes https://github.com/google/zoekt/issues/88.

Change-Id: Ida958551f018b197c7c71412f4f737f6b9b43e95

+212 -3
+1
contentprovider.go
··· 150 150 result = []LineMatch{res} 151 151 } 152 152 } else { 153 + ms = breakMatchesOnNewlines(ms, p.data(false)) 153 154 result = p.fillContentMatches(ms) 154 155 } 155 156
+18 -1
index_test.go
··· 176 176 } 177 177 } 178 178 179 + // A result spanning multiple lines should have LineMatches that only cover 180 + // single lines. 181 + func TestQueryNewlines(t *testing.T) { 182 + text := "line1\nline2\nbla" 183 + b := testIndexBuilder(t, nil, 184 + Document{Name: "filename", Content: []byte(text)}) 185 + sres := searchForTest(t, b, &query.Substring{Pattern: "ine2\nbla"}) 186 + matches := sres.Files 187 + if len(matches) != 1 { 188 + t.Fatalf("got %d file matches, want exactly one", len(matches)) 189 + } 190 + m := matches[0] 191 + if len(m.LineMatches) != 2 { 192 + t.Fatalf("got %d line matches, want exactly two", len(m.LineMatches)) 193 + } 194 + } 195 + 179 196 func searchForTest(t *testing.T, b *IndexBuilder, q query.Q, o ...SearchOptions) *SearchResult { 180 197 searcher := searcherForTest(t, b) 181 198 var opts SearchOptions ··· 1197 1214 sres := searchForTest(t, b, &query.Regexp{Regexp: re, CaseSensitive: true}) 1198 1215 if len(sres.Files) != 1 { 1199 1216 t.Errorf("got %v, wanted 1 matches", sres.Files) 1200 - } else if l := sres.Files[0].LineMatches[0].Line; !bytes.Equal(l, content) { 1217 + } else if l := sres.Files[0].LineMatches[0].Line; !bytes.Equal(l, content[len("pqr\n"):]) { 1201 1218 t.Errorf("got match line %q, want %q", l, content) 1202 1219 } 1203 1220 }
+39 -2
matchtree.go
··· 391 391 idxs := t.regexp.FindAllIndex(cp.data(t.fileName), -1) 392 392 found := t.found[:0] 393 393 for _, idx := range idxs { 394 - found = append(found, &candidateMatch{ 394 + cm := &candidateMatch{ 395 395 byteOffset: uint32(idx[0]), 396 396 byteMatchSz: uint32(idx[1] - idx[0]), 397 397 fileName: t.fileName, 398 - }) 398 + } 399 + 400 + found = append(found, cm) 399 401 } 400 402 t.found = found 401 403 t.reEvaluated = true 402 404 403 405 return len(t.found) > 0, true 406 + } 407 + 408 + // breakMatchesOnNewlines returns matches resulting from breaking each element 409 + // of cms on newlines within text. 410 + func breakMatchesOnNewlines(cms []*candidateMatch, text []byte) []*candidateMatch { 411 + var lineCMs []*candidateMatch 412 + for _, cm := range cms { 413 + lineCMs = append(lineCMs, breakOnNewlines(cm, text)...) 414 + } 415 + return lineCMs 416 + } 417 + 418 + // breakOnNewlines returns matches resulting from breaking cm on newlines 419 + // within text. 420 + func breakOnNewlines(cm *candidateMatch, text []byte) []*candidateMatch { 421 + var cms []*candidateMatch 422 + addMe := &candidateMatch{} 423 + *addMe = *cm 424 + for i := uint32(cm.byteOffset); i < cm.byteOffset+cm.byteMatchSz; i++ { 425 + if text[i] == '\n' { 426 + addMe.byteMatchSz = i - addMe.byteOffset 427 + if addMe.byteMatchSz != 0 { 428 + cms = append(cms, addMe) 429 + } 430 + 431 + addMe = &candidateMatch{} 432 + *addMe = *cm 433 + addMe.byteOffset = i + 1 434 + } 435 + } 436 + addMe.byteMatchSz = cm.byteOffset + cm.byteMatchSz - addMe.byteOffset 437 + if addMe.byteMatchSz != 0 { 438 + cms = append(cms, addMe) 439 + } 440 + return cms 404 441 } 405 442 406 443 func evalMatchTree(cp *contentProvider, cost int, known map[matchTree]bool, mt matchTree) (bool, bool) {
+154
matchtree_test.go
··· 1 + // Copyright 2018 Google Inc. All rights reserved. 2 + // 3 + // Licensed under the Apache License, Version 2.0 (the "License"); 4 + // you may not use this file except in compliance with the License. 5 + // You may obtain a copy of the License at 6 + // 7 + // http://www.apache.org/licenses/LICENSE-2.0 8 + // 9 + // Unless required by applicable law or agreed to in writing, software 10 + // distributed under the License is distributed on an "AS IS" BASIS, 11 + // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 + // See the License for the specific language governing permissions and 13 + // limitations under the License. 14 + 15 + package zoekt 16 + 17 + import ( 18 + "reflect" 19 + "testing" 20 + ) 21 + 22 + func Test_breakOnNewlines(t *testing.T) { 23 + type args struct { 24 + cm *candidateMatch 25 + text []byte 26 + } 27 + tests := []struct { 28 + name string 29 + args args 30 + want []*candidateMatch 31 + }{ 32 + { 33 + name: "trivial case", 34 + args: args{ 35 + cm: &candidateMatch{ 36 + byteOffset: 0, 37 + byteMatchSz: 0, 38 + }, 39 + text: nil, 40 + }, 41 + want: nil, 42 + }, 43 + { 44 + name: "no newlines", 45 + args: args{ 46 + cm: &candidateMatch{ 47 + byteOffset: 0, 48 + byteMatchSz: 1, 49 + }, 50 + text: []byte("a"), 51 + }, 52 + want: []*candidateMatch{ 53 + { 54 + byteOffset: 0, 55 + byteMatchSz: 1, 56 + }, 57 + }, 58 + }, 59 + { 60 + name: "newline at start", 61 + args: args{ 62 + cm: &candidateMatch{ 63 + byteOffset: 0, 64 + byteMatchSz: 2, 65 + }, 66 + text: []byte("\na"), 67 + }, 68 + want: []*candidateMatch{ 69 + { 70 + byteOffset: 1, 71 + byteMatchSz: 1, 72 + }, 73 + }, 74 + }, 75 + { 76 + name: "newline at end", 77 + args: args{ 78 + cm: &candidateMatch{ 79 + byteOffset: 0, 80 + byteMatchSz: 2, 81 + }, 82 + text: []byte("a\n"), 83 + }, 84 + want: []*candidateMatch{ 85 + { 86 + byteOffset: 0, 87 + byteMatchSz: 1, 88 + }, 89 + }, 90 + }, 91 + { 92 + name: "newline in middle", 93 + args: args{ 94 + cm: &candidateMatch{ 95 + byteOffset: 0, 96 + byteMatchSz: 3, 97 + }, 98 + text: []byte("a\nb"), 99 + }, 100 + want: []*candidateMatch{ 101 + { 102 + byteOffset: 0, 103 + byteMatchSz: 1, 104 + }, 105 + { 106 + byteOffset: 2, 107 + byteMatchSz: 1, 108 + }, 109 + }, 110 + }, 111 + { 112 + name: "two newlines", 113 + args: args{ 114 + cm: &candidateMatch{ 115 + byteOffset: 0, 116 + byteMatchSz: 5, 117 + }, 118 + text: []byte("a\nb\nc"), 119 + }, 120 + want: []*candidateMatch{ 121 + { 122 + byteOffset: 0, 123 + byteMatchSz: 1, 124 + }, 125 + { 126 + byteOffset: 2, 127 + byteMatchSz: 1, 128 + }, 129 + { 130 + byteOffset: 4, 131 + byteMatchSz: 1, 132 + }, 133 + }, 134 + }, 135 + } 136 + for _, tt := range tests { 137 + t.Run(tt.name, func(t *testing.T) { 138 + if got := breakOnNewlines(tt.args.cm, tt.args.text); !reflect.DeepEqual(got, tt.want) { 139 + type PrintableCm struct { 140 + byteOffset uint32 141 + byteMatchSz uint32 142 + } 143 + var got2, want2 []PrintableCm 144 + for _, g := range got { 145 + got2 = append(got2, PrintableCm{byteOffset: g.byteOffset, byteMatchSz: g.byteMatchSz}) 146 + } 147 + for _, w := range tt.want { 148 + want2 = append(want2, PrintableCm{byteOffset: w.byteOffset, byteMatchSz: w.byteMatchSz}) 149 + } 150 + t.Errorf("breakMatchOnNewlines() = %+v, want %+v", got2, want2) 151 + } 152 + }) 153 + } 154 + }