Move root-level index code to index package (#902) · boltless.me/zoekt@d4c60f2

+17 -3

api.go

··· 33 33 sliceHeaderBytes uint64 = 24 34 34 stringHeaderBytes uint64 = 16 35 35 pointerSize uint64 = 8 36 - interfaceBytes uint64 = 16 37 36 ) 38 37 39 38 // FileMatch contains all the matches within a file. ··· 136 135 return 137 136 } 138 137 139 - // addScore increments the score of the FileMatch by the computed score. If 138 + // AddScore increments the score of the FileMatch by the computed score. If 140 139 // debugScore is true, it also adds a debug string to the FileMatch. If raw is 141 140 // -1, it is ignored. Otherwise, it is added to the debug string. 142 - func (m *FileMatch) addScore(what string, computed float64, raw float64, debugScore bool) { 141 + func (m *FileMatch) AddScore(what string, computed float64, raw float64, debugScore bool) { 143 142 if computed != 0 && debugScore { 144 143 var b strings.Builder 145 144 fmt.Fprintf(&b, "%s", what) ··· 695 694 return nil 696 695 } 697 696 697 + func (r *Repository) GetPriority() float64 { 698 + return r.priority 699 + } 700 + 698 701 // monthsSince1970 returns the number of months since 1970. It returns values in 699 702 // the range [0, maxUInt16]. The upper bound is reached in the year 7431, the 700 703 // lower bound for all dates before 1970. ··· 1012 1015 1013 1016 // SpanContext is the opentracing span context, if it exists, from the zoekt client 1014 1017 SpanContext map[string]string 1018 + } 1019 + 1020 + func (o *SearchOptions) SetDefaults() { 1021 + if o.ShardMaxMatchCount == 0 { 1022 + // We cap the total number of matches, so overly broad 1023 + // searches don't crash the machine. 1024 + o.ShardMaxMatchCount = 100000 1025 + } 1026 + if o.TotalMaxMatchCount == 0 { 1027 + o.TotalMaxMatchCount = 10 * o.ShardMaxMatchCount 1028 + } 1015 1029 } 1016 1030 1017 1031 // String returns a succinct representation of the options. This is meant for

-6

api_test.go

··· 150 150 }, { 151 151 v: ChunkMatch{}, 152 152 size: 120, 153 - }, { 154 - v: candidateMatch{}, 155 - size: 80, 156 - }, { 157 - v: candidateChunk{}, 158 - size: 40, 159 153 }} 160 154 for _, c := range cases { 161 155 got := reflect.TypeOf(c.v).Size()

+1 -1

bits.go index/bits.go

··· 12 12 // See the License for the specific language governing permissions and 13 13 // limitations under the License. 14 14 15 - package zoekt 15 + package index 16 16 17 17 import ( 18 18 "cmp"

+1 -1

bits_test.go index/bits_test.go

··· 12 12 // See the License for the specific language governing permissions and 13 13 // limitations under the License. 14 14 15 - package zoekt 15 + package index 16 16 17 17 import ( 18 18 "encoding/binary"

+6 -1

btree.go index/btree.go

··· 28 28 // Corpora, Proceedings of the ACL-HLT 2011 System Demonstrations, pages 29 29 // 103-108 30 30 31 - package zoekt 31 + package index 32 32 33 33 import ( 34 34 "encoding/binary" ··· 45 45 // 46 46 // On linux "getconf PAGESIZE" returns the number of bytes in a memory page. 47 47 const btreeBucketSize = (4096 * 2) / ngramEncoding 48 + 49 + const ( 50 + interfaceBytes uint64 = 16 51 + pointerSize uint64 = 8 52 + ) 48 53 49 54 type btree struct { 50 55 root node

+1 -1

btree_test.go index/btree_test.go

··· 1 - package zoekt 1 + package index 2 2 3 3 import ( 4 4 "fmt"

+47 -25

build/builder.go index/builder.go

··· 14 14 15 15 // package build implements a more convenient interface for building 16 16 // zoekt indices. 17 - package build 17 + package index 18 18 19 19 import ( 20 20 "cmp" ··· 40 40 "github.com/dustin/go-humanize" 41 41 "github.com/go-enry/go-enry/v2" 42 42 "github.com/rs/xid" 43 + 43 44 "github.com/sourcegraph/zoekt" 44 45 "github.com/sourcegraph/zoekt/internal/ctags" 45 46 ) ··· 246 247 throttle chan int 247 248 248 249 nextShardNum int 249 - todo []*zoekt.Document 250 - docChecker zoekt.DocChecker 250 + todo []*Document 251 + docChecker DocChecker 251 252 size int 252 253 253 254 parserBins ctags.ParserBinMap ··· 334 335 335 336 // ShardName returns the name the given index shard. 336 337 func (o *Options) shardName(n int) string { 337 - return o.shardNameVersion(zoekt.IndexFormatVersion, n) 338 + return o.shardNameVersion(IndexFormatVersion, n) 338 339 } 339 340 340 341 func (o *Options) shardNameVersion(version, n int) string { 341 - return zoekt.ShardName(o.IndexDir, cmp.Or(o.ShardPrefix, o.RepositoryDescription.Name), version, n) 342 + return ShardName(o.IndexDir, cmp.Or(o.ShardPrefix, o.RepositoryDescription.Name), version, n) 342 343 } 343 344 344 345 type IndexState string ··· 357 358 IndexFormatVersion int 358 359 FeatureVersion int 359 360 }{{ 360 - IndexFormatVersion: zoekt.IndexFormatVersion, 361 - FeatureVersion: zoekt.FeatureVersion, 361 + IndexFormatVersion: IndexFormatVersion, 362 + FeatureVersion: FeatureVersion, 362 363 }, { 363 - IndexFormatVersion: zoekt.NextIndexFormatVersion, 364 - FeatureVersion: zoekt.FeatureVersion, 364 + IndexFormatVersion: NextIndexFormatVersion, 365 + FeatureVersion: FeatureVersion, 365 366 }} 366 367 367 368 // IncrementalSkipIndexing returns true if the index present on disk matches ··· 380 381 return IndexStateMissing, fn 381 382 } 382 383 383 - repos, index, err := zoekt.ReadMetadataPathAlive(fn) 384 + repos, index, err := ReadMetadataPathAlive(fn) 384 385 if os.IsNotExist(err) { 385 386 return IndexStateMissing, fn 386 387 } else if err != nil { ··· 435 436 return nil, nil, false, nil 436 437 } 437 438 438 - repositories, metadata, err := zoekt.ReadMetadataPathAlive(shard) 439 + repositories, metadata, err := ReadMetadataPathAlive(shard) 439 440 if err != nil { 440 441 return nil, nil, false, fmt.Errorf("reading metadata for shard %q: %w", shard, err) 441 442 } ··· 475 476 return "" 476 477 } 477 478 for _, fn := range compoundShards { 478 - repos, _, err := zoekt.ReadMetadataPathAlive(fn) 479 + repos, _, err := ReadMetadataPathAlive(fn) 479 480 if err != nil { 480 481 continue 481 482 } ··· 591 592 592 593 // AddFile is a convenience wrapper for the Add method 593 594 func (b *Builder) AddFile(name string, content []byte) error { 594 - return b.Add(zoekt.Document{Name: name, Content: content}) 595 + return b.Add(Document{Name: name, Content: content}) 595 596 } 596 597 597 - func (b *Builder) Add(doc zoekt.Document) error { 598 + func (b *Builder) Add(doc Document) error { 598 599 if b.finishCalled { 599 600 return nil 600 601 } ··· 673 674 // Delta shard builds need to update FileTombstone and branch commit information for all 674 675 // existing shards 675 676 for _, shard := range oldShards { 676 - repositories, _, err := zoekt.ReadMetadataPathAlive(shard) 677 + repositories, _, err := ReadMetadataPathAlive(shard) 677 678 if err != nil { 678 679 return fmt.Errorf("reading metadata from shard %q: %w", shard, err) 679 680 } ··· 718 719 719 720 repository.LatestCommitDate = b.opts.RepositoryDescription.LatestCommitDate 720 721 721 - tempPath, finalPath, err := zoekt.JsonMarshalRepoMetaTemp(shard, repository) 722 + tempPath, finalPath, err := JsonMarshalRepoMetaTemp(shard, repository) 722 723 if err != nil { 723 724 return fmt.Errorf("writing repository metadta for shard %q: %w", shard, err) 724 725 } ··· 746 747 747 748 toDelete = make(map[string]struct{}) 748 749 for _, name := range oldShards { 749 - paths, err := zoekt.IndexFilePaths(name) 750 + paths, err := IndexFilePaths(name) 750 751 if err != nil { 751 752 b.buildError = fmt.Errorf("failed to find old paths for %s: %w", name, err) 752 753 } ··· 773 774 if !strings.HasSuffix(p, ".zoekt") { 774 775 continue 775 776 } 776 - err := zoekt.SetTombstone(p, b.opts.RepositoryDescription.ID) 777 + err := SetTombstone(p, b.opts.RepositoryDescription.ID) 777 778 b.buildError = err 778 779 continue 779 780 } ··· 870 871 } 871 872 872 873 type rankedDoc struct { 873 - *zoekt.Document 874 + *Document 874 875 rank []float64 875 876 } 876 877 ··· 878 879 // before writing them to disk. The order of documents in the shard is important 879 880 // at query time, because earlier documents receive a boost at query time and 880 881 // have a higher chance of being searched before limits kick in. 881 - func rank(d *zoekt.Document, origIdx int) []float64 { 882 + func rank(d *Document, origIdx int) []float64 { 882 883 skipped := 0.0 883 884 if d.SkipReason != "" { 884 885 skipped = 1.0 ··· 930 931 } 931 932 } 932 933 933 - func sortDocuments(todo []*zoekt.Document) { 934 + func sortDocuments(todo []*Document) { 934 935 rs := make([]rankedDoc, 0, len(todo)) 935 936 for i, t := range todo { 936 937 rd := rankedDoc{t, rank(t, i)} ··· 955 956 } 956 957 } 957 958 958 - func (b *Builder) buildShard(todo []*zoekt.Document, nextShardNum int) (*finishedShard, error) { 959 + func (b *Builder) buildShard(todo []*Document, nextShardNum int) (*finishedShard, error) { 959 960 if !b.opts.DisableCTags && (b.opts.CTagsPath != "" || b.opts.ScipCTagsPath != "") { 960 961 err := parseSymbols(todo, b.opts.LanguageMap, b.parserBins) 961 962 if b.opts.CTagsMustSucceed && err != nil { ··· 1019 1020 } 1020 1021 } 1021 1022 1022 - func (b *Builder) newShardBuilder() (*zoekt.IndexBuilder, error) { 1023 + func (b *Builder) newShardBuilder() (*IndexBuilder, error) { 1023 1024 desc := b.opts.RepositoryDescription 1024 1025 desc.HasSymbols = !b.opts.DisableCTags && b.opts.CTagsPath != "" 1025 1026 desc.SubRepoMap = b.opts.SubRepositories 1026 1027 desc.IndexOptions = b.opts.GetHash() 1027 1028 1028 - shardBuilder, err := zoekt.NewIndexBuilder(&desc) 1029 + shardBuilder, err := NewIndexBuilder(&desc) 1029 1030 if err != nil { 1030 1031 return nil, err 1031 1032 } ··· 1034 1035 return shardBuilder, nil 1035 1036 } 1036 1037 1037 - func (b *Builder) writeShard(fn string, ib *zoekt.IndexBuilder) (*finishedShard, error) { 1038 + func (b *Builder) writeShard(fn string, ib *IndexBuilder) (*finishedShard, error) { 1038 1039 dir := filepath.Dir(fn) 1039 1040 if err := os.MkdirAll(dir, 0o700); err != nil { 1040 1041 return nil, err ··· 1091 1092 1092 1093 // umask holds the Umask of the current process 1093 1094 var umask os.FileMode 1095 + 1096 + // Document holds a document (file) to index. 1097 + type Document struct { 1098 + Name string 1099 + Content []byte 1100 + Branches []string 1101 + SubRepositoryPath string 1102 + Language string 1103 + 1104 + // If set, something is wrong with the file contents, and this 1105 + // is the reason it wasn't indexed. 1106 + SkipReason string 1107 + 1108 + // Document sections for symbols. Offsets should use bytes. 1109 + Symbols []DocumentSection 1110 + SymbolsMetaData []*zoekt.Symbol 1111 + } 1112 + 1113 + type DocumentSection struct { 1114 + Start, End uint32 1115 + }

+143 -11

build/builder_test.go index/builder_test.go

··· 1 - package build 1 + package index 2 2 3 3 import ( 4 4 "errors" 5 5 "flag" 6 + "fmt" 6 7 "io" 7 8 "log" 8 9 "os" 9 10 "path/filepath" 11 + "reflect" 10 12 "strconv" 11 13 "strings" 12 14 "testing" ··· 14 16 15 17 "github.com/google/go-cmp/cmp" 16 18 "github.com/google/go-cmp/cmp/cmpopts" 17 - 18 19 "github.com/sourcegraph/zoekt" 19 20 ) 20 21 ··· 53 54 54 55 // fields indexTime and id depend on time. For this test, we copy the fields from 55 56 // the old shard. 56 - _, wantMetadata, err := zoekt.ReadMetadataPath(wantP) 57 + _, wantMetadata, err := ReadMetadataPath(wantP) 57 58 if err != nil { 58 59 t.Fatal(err) 59 60 } ··· 235 236 236 237 // content with at least 100 bytes 237 238 binary := append([]byte("abc def \x00"), make([]byte, 100)...) 238 - err = b.Add(zoekt.Document{ 239 + err = b.Add(Document{ 239 240 Name: "f1", 240 241 Content: binary, 241 242 }) ··· 253 254 } 254 255 } 255 256 257 + func TestPartialSuccess(t *testing.T) { 258 + dir := t.TempDir() 259 + 260 + opts := Options{ 261 + IndexDir: dir, 262 + ShardMax: 1024, 263 + SizeMax: 1 << 20, 264 + Parallelism: 1, 265 + } 266 + opts.RepositoryDescription.Name = "repo" 267 + opts.SetDefaults() 268 + 269 + b, err := NewBuilder(opts) 270 + if err != nil { 271 + t.Fatalf("NewBuilder: %v", err) 272 + } 273 + 274 + for i := 0; i < 4; i++ { 275 + nm := fmt.Sprintf("F%d", i) 276 + _ = b.AddFile(nm, []byte(strings.Repeat("01234567\n", 128))) 277 + } 278 + b.buildError = fmt.Errorf("any error") 279 + 280 + // No error checking. 281 + _ = b.Finish() 282 + 283 + // Finish cleans up temporary files. 284 + if fs, err := filepath.Glob(dir + "/*"); err != nil { 285 + t.Errorf("glob(%s): %v", dir, err) 286 + } else if len(fs) != 0 { 287 + t.Errorf("got shards %v, want []", fs) 288 + } 289 + } 290 + 256 291 func TestOptions_FindAllShards(t *testing.T) { 257 292 type simpleShard struct { 258 293 Repository zoekt.Repository ··· 371 406 if tt.expectedShardCount > 0 { 372 407 for _, s := range shards { 373 408 // all shards should contain the metadata for the desired repository 374 - repos, _, err := zoekt.ReadMetadataPathAlive(s) 409 + repos, _, err := ReadMetadataPathAlive(s) 375 410 if err != nil { 376 411 t.Fatalf("reading metadata from shard %q: %s", s, err) 377 412 } ··· 620 655 } 621 656 622 657 for _, s := range shards { 623 - repositories, _, err := zoekt.ReadMetadataPathAlive(s) 658 + repositories, _, err := ReadMetadataPathAlive(s) 624 659 if err != nil { 625 660 t.Fatalf("reading repository metadata from shard %q", s) 626 661 } ··· 766 801 "builder_test.go", 767 802 "test/TestQuery.java", 768 803 "search/vendor/thirdparty.cc", 769 - "search/node_modules/search/index.js", 804 + "search/node_modules/search/js", 770 805 "search.min.js", 771 806 "internal/search.js.map", 772 807 } ··· 838 873 // This (along with our shardMax setting of 75 bytes) means that each shard 839 874 // will contain at most one of these. 840 875 fileName := strconv.Itoa(i) 841 - document := zoekt.Document{Name: fileName, Content: []byte(strings.Repeat("A", 100))} 876 + document := Document{Name: fileName, Content: []byte(strings.Repeat("A", 100))} 842 877 for _, branch := range o.RepositoryDescription.Branches { 843 878 document.Branches = append(document.Branches, branch.Name) 844 879 } ··· 879 914 } 880 915 881 916 // load the normal shards that we created 882 - var files []zoekt.IndexFile 917 + var files []IndexFile 883 918 for _, shard := range shardNames { 884 919 f, err := os.Open(shard) 885 920 if err != nil { ··· 887 922 } 888 923 defer f.Close() 889 924 890 - indexFile, err := zoekt.NewIndexFile(f) 925 + indexFile, err := NewIndexFile(f) 891 926 if err != nil { 892 927 t.Fatalf("creating index file: %s", err) 893 928 } ··· 897 932 } 898 933 899 934 // merge all the simple shards into a compound shard 900 - tmpName, dstName, err := zoekt.Merge(indexDir, files...) 935 + tmpName, dstName, err := Merge(indexDir, files...) 901 936 if err != nil { 902 937 t.Fatalf("merging index files into compound shard: %s", err) 903 938 } ··· 1000 1035 }) 1001 1036 } 1002 1037 } 1038 + 1039 + type filerankCase struct { 1040 + name string 1041 + docs []*Document 1042 + want []int 1043 + } 1044 + 1045 + func testFileRankAspect(t *testing.T, c filerankCase) { 1046 + var want []*Document 1047 + for _, j := range c.want { 1048 + want = append(want, c.docs[j]) 1049 + } 1050 + 1051 + got := make([]*Document, len(c.docs)) 1052 + copy(got, c.docs) 1053 + sortDocuments(got) 1054 + 1055 + print := func(ds []*Document) string { 1056 + r := "" 1057 + for _, d := range ds { 1058 + r += fmt.Sprintf("%v, ", d) 1059 + } 1060 + return r 1061 + } 1062 + if !reflect.DeepEqual(got, want) { 1063 + t.Errorf("got docs [%v], want [%v]", print(got), print(want)) 1064 + } 1065 + } 1066 + 1067 + func TestFileRank(t *testing.T) { 1068 + for _, c := range []filerankCase{{ 1069 + name: "filename", 1070 + docs: []*Document{ 1071 + { 1072 + Name: "longlonglong", 1073 + Content: []byte("bla"), 1074 + }, 1075 + { 1076 + Name: "short", 1077 + Content: []byte("bla"), 1078 + }, 1079 + }, 1080 + want: []int{1, 0}, 1081 + }, { 1082 + name: "test", 1083 + docs: []*Document{ 1084 + { 1085 + Name: "foo_test.go", 1086 + Content: []byte("bla"), 1087 + }, 1088 + { 1089 + Name: "longlonglong", 1090 + Content: []byte("bla"), 1091 + }, 1092 + }, 1093 + want: []int{1, 0}, 1094 + }, { 1095 + name: "content", 1096 + docs: []*Document{ 1097 + { 1098 + Content: []byte("bla"), 1099 + }, 1100 + { 1101 + Content: []byte("blablablabla"), 1102 + }, 1103 + { 1104 + Content: []byte("blabla"), 1105 + }, 1106 + }, 1107 + want: []int{0, 2, 1}, 1108 + }, { 1109 + name: "skipped docs", 1110 + docs: []*Document{ 1111 + { 1112 + Name: "binary_file", 1113 + SkipReason: "binary file", 1114 + }, 1115 + { 1116 + Name: "some_test.go", 1117 + Content: []byte("bla"), 1118 + }, 1119 + { 1120 + Name: "large_file.go", 1121 + SkipReason: "too large", 1122 + }, 1123 + { 1124 + Name: "file.go", 1125 + Content: []byte("blabla"), 1126 + }, 1127 + }, 1128 + want: []int{3, 1, 0, 2}, 1129 + }} { 1130 + t.Run(c.name, func(t *testing.T) { 1131 + testFileRankAspect(t, c) 1132 + }) 1133 + } 1134 + }

+1 -1

build/builder_unix.go index/builder_unix.go

··· 15 15 //go:build !windows && !wasm 16 16 // +build !windows,!wasm 17 17 18 - package build 18 + package index 19 19 20 20 import ( 21 21 "os"

+8 -8

build/ctags.go index/ctags.go

··· 12 12 // See the License for the specific language governing permissions and 13 13 // limitations under the License. 14 14 15 - package build 15 + package index 16 16 17 17 import ( 18 18 "bytes" ··· 41 41 return normalized 42 42 } 43 43 44 - func parseSymbols(todo []*zoekt.Document, languageMap ctags.LanguageMap, parserBins ctags.ParserBinMap) error { 44 + func parseSymbols(todo []*Document, languageMap ctags.LanguageMap, parserBins ctags.ParserBinMap) error { 45 45 monitor := newMonitor() 46 46 defer monitor.Stop() 47 47 ··· 55 55 continue 56 56 } 57 57 58 - zoekt.DetermineLanguageIfUnknown(doc) 58 + DetermineLanguageIfUnknown(doc) 59 59 60 60 parserType := languageMap[normalizeLanguage(doc.Language)] 61 61 if parserType == ctags.NoCTags { ··· 92 92 // overlaps finds the proper position to insert a zoekt.DocumentSection with 93 93 // "start and "end" into "symOffsets". It returns -1 if the new section overlaps 94 94 // with one of the existing ones. 95 - func overlaps(symOffsets []zoekt.DocumentSection, start, end uint32) int { 95 + func overlaps(symOffsets []DocumentSection, start, end uint32) int { 96 96 i := 0 97 97 for i = len(symOffsets) - 1; i >= 0; i-- { 98 98 // The most common case is that we exit here, because symOffsets is sorted by ··· 120 120 // corresponding metadata (zoekt.Symbol). 121 121 // 122 122 // This can not be called concurrently. 123 - func (t *tagsToSections) Convert(content []byte, tags []*ctags.Entry) ([]zoekt.DocumentSection, []*zoekt.Symbol, error) { 123 + func (t *tagsToSections) Convert(content []byte, tags []*ctags.Entry) ([]DocumentSection, []*zoekt.Symbol, error) { 124 124 nls := t.newLinesIndices(content) 125 - symOffsets := make([]zoekt.DocumentSection, 0, len(tags)) 125 + symOffsets := make([]DocumentSection, 0, len(tags)) 126 126 symMetaData := make([]*zoekt.Symbol, 0, len(tags)) 127 127 128 128 for _, t := range tags { ··· 162 162 continue 163 163 } 164 164 165 - symOffsets = slices.Insert(symOffsets, i, zoekt.DocumentSection{ 165 + symOffsets = slices.Insert(symOffsets, i, DocumentSection{ 166 166 Start: start, 167 167 End: endSym, 168 168 }) ··· 242 242 return m 243 243 } 244 244 245 - func (m *monitor) BeginParsing(doc *zoekt.Document) { 245 + func (m *monitor) BeginParsing(doc *Document) { 246 246 now := time.Now() 247 247 m.mu.Lock() 248 248 m.lastUpdate = now

+16 -17

build/ctags_test.go index/ctags_test.go

··· 12 12 // See the License for the specific language governing permissions and 13 13 // limitations under the License. 14 14 15 - package build 15 + package index 16 16 17 17 import ( 18 18 "os" 19 19 "reflect" 20 20 "testing" 21 21 22 - "github.com/sourcegraph/zoekt" 23 22 "github.com/sourcegraph/zoekt/internal/ctags" 24 23 ) 25 24 ··· 64 63 t.Fatal("tagsToSections", err) 65 64 } 66 65 67 - want := []zoekt.DocumentSection{ 66 + want := []DocumentSection{ 68 67 {Start: 16, End: 17}, 69 68 {Start: 23, End: 24}, 70 69 } ··· 97 96 t.Fatal("tagsToSections", err) 98 97 } 99 98 100 - want := []zoekt.DocumentSection{ 99 + want := []DocumentSection{ 101 100 {Start: 15, End: 18}, 102 101 {Start: 20, End: 23}, 103 102 {Start: 26, End: 29}, ··· 152 151 153 152 func TestOverlaps(t *testing.T) { 154 153 tests := []struct { 155 - documentSections []zoekt.DocumentSection 154 + documentSections []DocumentSection 156 155 start uint32 157 156 end uint32 158 157 pos int ··· 161 160 // overlap 162 161 // 163 162 { 164 - documentSections: []zoekt.DocumentSection{{0, 3}, {5, 10}}, 163 + documentSections: []DocumentSection{{0, 3}, {5, 10}}, 165 164 start: 6, 166 165 end: 9, 167 166 pos: -1, 168 167 }, 169 168 { 170 - documentSections: []zoekt.DocumentSection{{0, 3}, {5, 10}}, 169 + documentSections: []DocumentSection{{0, 3}, {5, 10}}, 171 170 start: 6, 172 171 end: 12, 173 172 pos: -1, 174 173 }, 175 174 { 176 - documentSections: []zoekt.DocumentSection{{0, 3}, {5, 10}}, 175 + documentSections: []DocumentSection{{0, 3}, {5, 10}}, 177 176 start: 4, 178 177 end: 9, 179 178 pos: -1, 180 179 }, 181 180 { 182 - documentSections: []zoekt.DocumentSection{{0, 3}, {5, 10}}, 181 + documentSections: []DocumentSection{{0, 3}, {5, 10}}, 183 182 start: 1, 184 183 end: 9, 185 184 pos: -1, 186 185 }, 187 186 { 188 - documentSections: []zoekt.DocumentSection{{0, 3}, {5, 10}}, 187 + documentSections: []DocumentSection{{0, 3}, {5, 10}}, 189 188 start: 0, 190 189 end: 25, 191 190 pos: -1, 192 191 }, 193 192 { 194 - documentSections: []zoekt.DocumentSection{{0, 3}}, 193 + documentSections: []DocumentSection{{0, 3}}, 195 194 start: 0, 196 195 end: 1, 197 196 pos: -1, ··· 200 199 // NO overlap 201 200 // 202 201 { 203 - documentSections: []zoekt.DocumentSection{{2, 3}, {5, 10}}, 202 + documentSections: []DocumentSection{{2, 3}, {5, 10}}, 204 203 start: 0, 205 204 end: 2, 206 205 pos: 0, 207 206 }, 208 207 { 209 - documentSections: []zoekt.DocumentSection{{0, 3}, {5, 10}}, 208 + documentSections: []DocumentSection{{0, 3}, {5, 10}}, 210 209 start: 3, 211 210 end: 4, 212 211 pos: 1, 213 212 }, 214 213 { 215 - documentSections: []zoekt.DocumentSection{{0, 3}, {5, 10}}, 214 + documentSections: []DocumentSection{{0, 3}, {5, 10}}, 216 215 start: 3, 217 216 end: 5, 218 217 pos: 1, 219 218 }, 220 219 { 221 - documentSections: []zoekt.DocumentSection{{0, 3}, {5, 10}}, 220 + documentSections: []DocumentSection{{0, 3}, {5, 10}}, 222 221 start: 11, 223 222 end: 14, 224 223 pos: 2, 225 224 }, 226 225 { 227 - documentSections: []zoekt.DocumentSection{{0, 3}, {5, 10}, {14, 15}}, 226 + documentSections: []DocumentSection{{0, 3}, {5, 10}, {14, 15}}, 228 227 start: 11, 229 228 end: 13, 230 229 pos: 2, 231 230 }, 232 231 { 233 - documentSections: []zoekt.DocumentSection{{0, 3}, {5, 10}, {14, 15}}, 232 + documentSections: []DocumentSection{{0, 3}, {5, 10}, {14, 15}}, 234 233 start: 18, 235 234 end: 19, 236 235 pos: 3,

+63 -193

build/e2e_test.go internal/e2e/e2e_index_test.go

··· 12 12 // See the License for the specific language governing permissions and 13 13 // limitations under the License. 14 14 15 - package build 15 + package e2e 16 16 17 17 import ( 18 18 "bytes" ··· 22 22 "log" 23 23 "os" 24 24 "path/filepath" 25 - "reflect" 26 25 "runtime" 27 26 "sort" 28 27 "strconv" ··· 34 33 "github.com/google/go-cmp/cmp/cmpopts" 35 34 "github.com/grafana/regexp" 36 35 "github.com/sourcegraph/zoekt" 36 + "github.com/sourcegraph/zoekt/index" 37 37 "github.com/sourcegraph/zoekt/internal/shards" 38 38 "github.com/sourcegraph/zoekt/internal/tenant" 39 39 "github.com/sourcegraph/zoekt/internal/tenant/tenanttest" ··· 41 41 "github.com/stretchr/testify/require" 42 42 ) 43 43 44 - func TestBasic(t *testing.T) { 44 + func TestBasicIndexing(t *testing.T) { 45 45 dir := t.TempDir() 46 46 47 - opts := Options{ 47 + opts := index.Options{ 48 48 IndexDir: dir, 49 49 ShardMax: 1024, 50 50 RepositoryDescription: zoekt.Repository{ ··· 54 54 SizeMax: 1 << 20, 55 55 } 56 56 57 - b, err := NewBuilder(opts) 57 + b, err := index.NewBuilder(opts) 58 58 if err != nil { 59 59 t.Fatalf("NewBuilder: %v", err) 60 60 } ··· 75 75 t.Fatalf("want multiple shards, got %v", fs) 76 76 } 77 77 78 - _, md0, err := zoekt.ReadMetadataPath(fs[0]) 78 + _, md0, err := index.ReadMetadataPath(fs[0]) 79 79 if err != nil { 80 80 t.Fatal(err) 81 81 } 82 82 for _, f := range fs[1:] { 83 - _, md, err := zoekt.ReadMetadataPath(f) 83 + _, md, err := index.ReadMetadataPath(f) 84 84 if err != nil { 85 85 t.Fatal(err) 86 86 } ··· 126 126 // "repo-mutated". We do this inside retry helper since we have noticed 127 127 // some flakiness on github CI. 128 128 for _, p := range fs { 129 - repos, _, err := zoekt.ReadMetadataPath(p) 129 + repos, _, err := index.ReadMetadataPath(p) 130 130 if err != nil { 131 131 t.Fatal(err) 132 132 } ··· 166 166 tnt1, err := tenant.FromContext(ctx1) 167 167 require.NoError(t, err) 168 168 169 - opts := Options{ 169 + opts := index.Options{ 170 170 IndexDir: dir, 171 171 ShardMax: 1024, 172 172 RepositoryDescription: zoekt.Repository{ ··· 177 177 SizeMax: 1 << 20, 178 178 } 179 179 180 - b, err := NewBuilder(opts) 180 + b, err := index.NewBuilder(opts) 181 181 if err != nil { 182 182 t.Fatalf("NewBuilder: %v", err) 183 183 } ··· 198 198 t.Fatalf("want multiple shards, got %v", fs) 199 199 } 200 200 201 - _, md0, err := zoekt.ReadMetadataPath(fs[0]) 201 + _, md0, err := index.ReadMetadataPath(fs[0]) 202 202 if err != nil { 203 203 t.Fatal(err) 204 204 } 205 205 for _, f := range fs[1:] { 206 - _, md, err := zoekt.ReadMetadataPath(f) 206 + _, md, err := index.ReadMetadataPath(f) 207 207 if err != nil { 208 208 t.Fatal(err) 209 209 } ··· 249 249 tnt1, err := tenant.FromContext(ctx1) 250 250 require.NoError(t, err) 251 251 252 - opts := Options{ 252 + opts := index.Options{ 253 253 IndexDir: dir, 254 254 RepositoryDescription: zoekt.Repository{ 255 255 Name: "repo", ··· 258 258 } 259 259 opts.SetDefaults() 260 260 261 - b, err := NewBuilder(opts) 261 + b, err := index.NewBuilder(opts) 262 262 if err != nil { 263 263 t.Fatalf("NewBuilder: %v", err) 264 264 } ··· 334 334 dir := t.TempDir() 335 335 336 336 sizeMax := 1000 337 - opts := Options{ 337 + opts := index.Options{ 338 338 IndexDir: dir, 339 339 LargeFiles: []string{"F0", "F1", "F2", "!F1"}, 340 340 RepositoryDescription: zoekt.Repository{ ··· 343 343 SizeMax: sizeMax, 344 344 } 345 345 346 - b, err := NewBuilder(opts) 346 + b, err := index.NewBuilder(opts) 347 347 if err != nil { 348 348 t.Fatalf("NewBuilder: %v", err) 349 349 } ··· 385 385 func TestUpdate(t *testing.T) { 386 386 dir := t.TempDir() 387 387 388 - opts := Options{ 388 + opts := index.Options{ 389 389 IndexDir: dir, 390 390 ShardMax: 1024, 391 391 RepositoryDescription: zoekt.Repository{ ··· 396 396 SizeMax: 1 << 20, 397 397 } 398 398 399 - if b, err := NewBuilder(opts); err != nil { 399 + if b, err := index.NewBuilder(opts); err != nil { 400 400 t.Fatalf("NewBuilder: %v", err) 401 401 } else { 402 402 if err := b.AddFile("F", []byte("hoi")); err != nil { ··· 431 431 FileURLTemplate: "url2", 432 432 } 433 433 434 - if b, err := NewBuilder(opts); err != nil { 434 + if b, err := index.NewBuilder(opts); err != nil { 435 435 t.Fatalf("NewBuilder: %v", err) 436 436 } else { 437 437 if err := b.AddFile("F", []byte("hoi")); err != nil { ··· 478 478 func TestDeleteOldShards(t *testing.T) { 479 479 dir := t.TempDir() 480 480 481 - opts := Options{ 481 + opts := index.Options{ 482 482 IndexDir: dir, 483 483 ShardMax: 1024, 484 484 RepositoryDescription: zoekt.Repository{ ··· 489 489 } 490 490 opts.SetDefaults() 491 491 492 - b, err := NewBuilder(opts) 492 + b, err := index.NewBuilder(opts) 493 493 if err != nil { 494 494 t.Fatalf("NewBuilder: %v", err) 495 495 } ··· 520 520 521 521 // Do again, without sharding. 522 522 opts.ShardMax = 1 << 20 523 - b, err = NewBuilder(opts) 523 + b, err = index.NewBuilder(opts) 524 524 if err != nil { 525 525 t.Fatalf("NewBuilder: %v", err) 526 526 } ··· 542 542 } 543 543 544 544 // Again, but don't index anything; should leave old shards intact. 545 - b, err = NewBuilder(opts) 545 + b, err = index.NewBuilder(opts) 546 546 if err != nil { 547 547 t.Fatalf("NewBuilder: %v", err) 548 548 } ··· 558 558 } 559 559 } 560 560 561 - func TestPartialSuccess(t *testing.T) { 562 - dir := t.TempDir() 563 - 564 - opts := Options{ 565 - IndexDir: dir, 566 - ShardMax: 1024, 567 - SizeMax: 1 << 20, 568 - Parallelism: 1, 569 - } 570 - opts.RepositoryDescription.Name = "repo" 571 - opts.SetDefaults() 572 - 573 - b, err := NewBuilder(opts) 574 - if err != nil { 575 - t.Fatalf("NewBuilder: %v", err) 576 - } 577 - 578 - for i := 0; i < 4; i++ { 579 - nm := fmt.Sprintf("F%d", i) 580 - _ = b.AddFile(nm, []byte(strings.Repeat("01234567\n", 128))) 581 - } 582 - b.buildError = fmt.Errorf("any error") 583 - 584 - // No error checking. 585 - _ = b.Finish() 586 - 587 - // Finish cleans up temporary files. 588 - if fs, err := filepath.Glob(dir + "/*"); err != nil { 589 - t.Errorf("glob(%s): %v", dir, err) 590 - } else if len(fs) != 0 { 591 - t.Errorf("got shards %v, want []", fs) 592 - } 593 - } 594 - 595 - type filerankCase struct { 596 - name string 597 - docs []*zoekt.Document 598 - want []int 599 - } 600 - 601 - func testFileRankAspect(t *testing.T, c filerankCase) { 602 - var want []*zoekt.Document 603 - for _, j := range c.want { 604 - want = append(want, c.docs[j]) 605 - } 606 - 607 - got := make([]*zoekt.Document, len(c.docs)) 608 - copy(got, c.docs) 609 - sortDocuments(got) 610 - 611 - print := func(ds []*zoekt.Document) string { 612 - r := "" 613 - for _, d := range ds { 614 - r += fmt.Sprintf("%v, ", d) 615 - } 616 - return r 617 - } 618 - if !reflect.DeepEqual(got, want) { 619 - t.Errorf("got docs [%v], want [%v]", print(got), print(want)) 620 - } 621 - } 622 - 623 - func TestFileRank(t *testing.T) { 624 - for _, c := range []filerankCase{{ 625 - name: "filename", 626 - docs: []*zoekt.Document{ 627 - { 628 - Name: "longlonglong", 629 - Content: []byte("bla"), 630 - }, 631 - { 632 - Name: "short", 633 - Content: []byte("bla"), 634 - }, 635 - }, 636 - want: []int{1, 0}, 637 - }, { 638 - name: "test", 639 - docs: []*zoekt.Document{ 640 - { 641 - Name: "foo_test.go", 642 - Content: []byte("bla"), 643 - }, 644 - { 645 - Name: "longlonglong", 646 - Content: []byte("bla"), 647 - }, 648 - }, 649 - want: []int{1, 0}, 650 - }, { 651 - name: "content", 652 - docs: []*zoekt.Document{ 653 - { 654 - Content: []byte("bla"), 655 - }, 656 - { 657 - Content: []byte("blablablabla"), 658 - }, 659 - { 660 - Content: []byte("blabla"), 661 - }, 662 - }, 663 - want: []int{0, 2, 1}, 664 - }, { 665 - name: "skipped docs", 666 - docs: []*zoekt.Document{ 667 - { 668 - Name: "binary_file", 669 - SkipReason: "binary file", 670 - }, 671 - { 672 - Name: "some_test.go", 673 - Content: []byte("bla"), 674 - }, 675 - { 676 - Name: "large_file.go", 677 - SkipReason: "too large", 678 - }, 679 - { 680 - Name: "file.go", 681 - Content: []byte("blabla"), 682 - }, 683 - }, 684 - want: []int{3, 1, 0, 2}, 685 - }} { 686 - t.Run(c.name, func(t *testing.T) { 687 - testFileRankAspect(t, c) 688 - }) 689 - } 690 - } 691 - 692 561 func TestEmptyContent(t *testing.T) { 693 562 dir := t.TempDir() 694 563 695 - opts := Options{ 564 + opts := index.Options{ 696 565 IndexDir: dir, 697 566 RepositoryDescription: zoekt.Repository{ 698 567 Name: "repo", ··· 700 569 } 701 570 opts.SetDefaults() 702 571 703 - b, err := NewBuilder(opts) 572 + b, err := index.NewBuilder(opts) 704 573 if err != nil { 705 574 t.Fatalf("NewBuilder: %v", err) 706 575 } ··· 734 603 // TODO: Need to write a test for compound shards as well. 735 604 type step struct { 736 605 name string 737 - documents []zoekt.Document 738 - optFn func(t *testing.T, o *Options) 606 + documents []index.Document 607 + optFn func(t *testing.T, o *index.Options) 739 608 740 609 query string 741 - expectedDocuments []zoekt.Document 610 + changedFile string 611 + expectedDocuments []index.Document 742 612 } 743 613 744 614 var ( 745 - fooAtMain = zoekt.Document{Name: "foo.go", Branches: []string{"main"}, Content: []byte("common foo-main-v1")} 746 - fooAtMainV2 = zoekt.Document{Name: "foo.go", Branches: []string{"main"}, Content: []byte("common foo-main-v2")} 615 + fooAtMain = index.Document{Name: "foo.go", Branches: []string{"main"}, Content: []byte("common foo-main-v1")} 616 + fooAtMainV2 = index.Document{Name: "foo.go", Branches: []string{"main"}, Content: []byte("common foo-main-v2")} 747 617 748 - fooAtMainAndRelease = zoekt.Document{Name: "foo.go", Branches: []string{"main", "release"}, Content: []byte("common foo-main-and-release")} 618 + fooAtMainAndRelease = index.Document{Name: "foo.go", Branches: []string{"main", "release"}, Content: []byte("common foo-main-and-release")} 749 619 750 - barAtMain = zoekt.Document{Name: "bar.go", Branches: []string{"main"}, Content: []byte("common bar-main")} 751 - barAtMainV2 = zoekt.Document{Name: "bar.go", Branches: []string{"main"}, Content: []byte("common bar-main-v2")} 620 + barAtMain = index.Document{Name: "bar.go", Branches: []string{"main"}, Content: []byte("common bar-main")} 621 + barAtMainV2 = index.Document{Name: "bar.go", Branches: []string{"main"}, Content: []byte("common bar-main-v2")} 752 622 ) 753 623 754 624 for _, test := range []struct { ··· 760 630 steps: []step{ 761 631 { 762 632 name: "setup", 763 - documents: []zoekt.Document{barAtMain, fooAtMain}, 633 + documents: []index.Document{barAtMain, fooAtMain}, 764 634 query: "common", 765 - expectedDocuments: []zoekt.Document{barAtMain, fooAtMain}, 635 + expectedDocuments: []index.Document{barAtMain, fooAtMain}, 766 636 }, 767 637 { 768 638 name: "add new version of foo, tombstone older ones", 769 - documents: []zoekt.Document{fooAtMainV2}, 770 - optFn: func(t *testing.T, o *Options) { 639 + documents: []index.Document{fooAtMainV2}, 640 + optFn: func(t *testing.T, o *index.Options) { 771 641 o.IsDelta = true 772 - o.changedOrRemovedFiles = []string{"foo.go"} 773 642 }, 774 643 query: "common", 775 - expectedDocuments: []zoekt.Document{barAtMain, fooAtMainV2}, 644 + changedFile: "foo.go", 645 + expectedDocuments: []index.Document{barAtMain, fooAtMainV2}, 776 646 }, 777 647 { 778 648 name: "add new version of bar, tombstone older ones", 779 - documents: []zoekt.Document{barAtMainV2}, 780 - optFn: func(t *testing.T, o *Options) { 649 + documents: []index.Document{barAtMainV2}, 650 + optFn: func(t *testing.T, o *index.Options) { 781 651 o.IsDelta = true 782 - o.changedOrRemovedFiles = []string{"bar.go"} 783 652 }, 784 653 query: "common", 785 - expectedDocuments: []zoekt.Document{barAtMainV2, fooAtMainV2}, 654 + changedFile: "bar.go", 655 + expectedDocuments: []index.Document{barAtMainV2, fooAtMainV2}, 786 656 }, 787 - }, 788 - }, 657 + }}, 789 658 { 790 659 name: "tombstone older documents even if the latest shard has no documents", 791 660 steps: []step{ 792 661 { 793 662 name: "setup", 794 - documents: []zoekt.Document{barAtMain, fooAtMain}, 663 + documents: []index.Document{barAtMain, fooAtMain}, 795 664 query: "common", 796 - expectedDocuments: []zoekt.Document{barAtMain, fooAtMain}, 665 + expectedDocuments: []index.Document{barAtMain, fooAtMain}, 797 666 }, 798 667 { 799 668 // a build with no documents could represent a deletion 800 669 name: "tombstone older documents", 801 670 documents: nil, 802 - optFn: func(t *testing.T, o *Options) { 671 + optFn: func(t *testing.T, o *index.Options) { 803 672 o.IsDelta = true 804 - o.changedOrRemovedFiles = []string{"foo.go"} 805 673 }, 806 674 query: "common", 807 - expectedDocuments: []zoekt.Document{barAtMain}, 675 + changedFile: "foo.go", 676 + expectedDocuments: []index.Document{barAtMain}, 808 677 }, 809 678 }, 810 679 }, ··· 813 682 steps: []step{ 814 683 { 815 684 name: "setup", 816 - documents: []zoekt.Document{barAtMain, fooAtMainAndRelease}, 685 + documents: []index.Document{barAtMain, fooAtMainAndRelease}, 817 686 query: "common", 818 - expectedDocuments: []zoekt.Document{barAtMain, fooAtMainAndRelease}, 687 + expectedDocuments: []index.Document{barAtMain, fooAtMainAndRelease}, 819 688 }, 820 689 { 821 690 name: "tombstone foo", 822 691 documents: nil, 823 - optFn: func(t *testing.T, o *Options) { 692 + optFn: func(t *testing.T, o *index.Options) { 824 693 o.IsDelta = true 825 - o.changedOrRemovedFiles = []string{"foo.go"} 826 694 }, 827 695 query: "common", 828 - expectedDocuments: []zoekt.Document{barAtMain}, 696 + changedFile: "foo.go", 697 + expectedDocuments: []index.Document{barAtMain}, 829 698 }, 830 699 }, 831 700 }, ··· 856 725 return a.Name < b.Name 857 726 }) 858 727 859 - buildOpts := Options{ 728 + buildOpts := index.Options{ 860 729 IndexDir: indexDir, 861 730 RepositoryDescription: repository, 862 731 } ··· 866 735 step.optFn(t, &buildOpts) 867 736 } 868 737 869 - b, err := NewBuilder(buildOpts) 738 + b, err := index.NewBuilder(buildOpts) 739 + b.MarkFileAsChangedOrRemoved(step.changedFile) 870 740 if err != nil { 871 741 t.Fatalf("step %q: NewBuilder: %s", step.name, err) 872 742 } ··· 893 763 } 894 764 895 765 state, _ := buildOpts.IndexState() 896 - if diff := cmp.Diff(IndexStateEqual, state); diff != "" { 766 + if diff := cmp.Diff(index.IndexStateEqual, state); diff != "" { 897 767 t.Errorf("unexpected diff in index state (-want +got):\n%s", diff) 898 768 } 899 769 ··· 911 781 t.Fatalf("step %q: Search(%q): %s", step.name, step.query, err) 912 782 } 913 783 914 - var receivedDocuments []zoekt.Document 784 + var receivedDocuments []index.Document 915 785 for _, f := range result.Files { 916 - receivedDocuments = append(receivedDocuments, zoekt.Document{ 786 + receivedDocuments = append(receivedDocuments, index.Document{ 917 787 Name: f.FileName, 918 788 Content: f.Content, 919 789 }) 920 790 } 921 791 922 792 cmpOpts := []cmp.Option{ 923 - cmpopts.IgnoreFields(zoekt.Document{}, "Branches"), 924 - cmpopts.SortSlices(func(a, b zoekt.Document) bool { 793 + cmpopts.IgnoreFields(index.Document{}, "Branches"), 794 + cmpopts.SortSlices(func(a, b index.Document) bool { 925 795 if a.Name < b.Name { 926 796 return true 927 797 }

+18 -19

build/scoring_test.go internal/e2e/scoring_test.go

··· 12 12 // See the License for the specific language governing permissions and 13 13 // limitations under the License. 14 14 15 - package build 15 + package e2e 16 16 17 17 import ( 18 18 "context" ··· 21 21 "testing" 22 22 23 23 "github.com/sourcegraph/zoekt" 24 + "github.com/sourcegraph/zoekt/index" 24 25 "github.com/sourcegraph/zoekt/internal/ctags" 25 26 "github.com/sourcegraph/zoekt/internal/shards" 26 27 "github.com/sourcegraph/zoekt/query" ··· 66 67 } 67 68 68 69 func TestBM25(t *testing.T) { 69 - exampleJava, err := os.ReadFile("./testdata/example.java") 70 + exampleJava, err := os.ReadFile("./examples/example.java") 70 71 if err != nil { 71 72 t.Fatal(err) 72 73 } ··· 97 98 // line 54: private static <A, B> B runInnerInterface(InnerInterface<A, B> fn, A a) { 98 99 wantBestLineMatch: 54, 99 100 }, { 100 - // Another content-only match 101 + // another content-only match 101 102 fileName: "example.java", 102 103 query: &query.And{Children: []query.Q{ 103 104 &query.Substring{Pattern: "system"}, ··· 135 136 } 136 137 137 138 func TestJava(t *testing.T) { 138 - exampleJava, err := os.ReadFile("./testdata/example.java") 139 + exampleJava, err := os.ReadFile("./examples/example.java") 139 140 if err != nil { 140 141 t.Fatal(err) 141 142 } ··· 300 301 } 301 302 302 303 func TestKotlin(t *testing.T) { 303 - exampleKotlin, err := os.ReadFile("./testdata/example.kt") 304 + exampleKotlin, err := os.ReadFile("./examples/example.kt") 304 305 if err != nil { 305 306 t.Fatal(err) 306 307 } ··· 365 366 } 366 367 367 368 func TestCpp(t *testing.T) { 368 - exampleCpp, err := os.ReadFile("./testdata/example.cc") 369 + exampleCpp, err := os.ReadFile("./examples/example.cc") 369 370 if err != nil { 370 371 t.Fatal(err) 371 372 } ··· 422 423 } 423 424 424 425 func TestPython(t *testing.T) { 425 - examplePython, err := os.ReadFile("./testdata/example.py") 426 + examplePython, err := os.ReadFile("./examples/example.py") 426 427 if err != nil { 427 428 t.Fatal(err) 428 429 } ··· 466 467 } 467 468 468 469 func TestRuby(t *testing.T) { 469 - exampleRuby, err := os.ReadFile("./testdata/example.rb") 470 + exampleRuby, err := os.ReadFile("./examples/example.rb") 470 471 if err != nil { 471 472 t.Fatal(err) 472 473 } ··· 506 507 } 507 508 508 509 func TestScala(t *testing.T) { 509 - exampleScala, err := os.ReadFile("./testdata/example.scala") 510 + exampleScala, err := os.ReadFile("./examples/example.scala") 510 511 if err != nil { 511 512 t.Fatal(err) 512 513 } ··· 641 642 t.Run(name, func(t *testing.T) { 642 643 dir := t.TempDir() 643 644 644 - opts := Options{ 645 + opts := index.Options{ 645 646 IndexDir: dir, 646 647 RepositoryDescription: zoekt.Repository{ 647 648 Name: "repo", 648 649 }, 649 - LanguageMap: ctags.LanguageMap{ 650 - normalizeLanguage(c.language): parserType, 651 - }, 650 + LanguageMap: ctags.LanguageMap{c.language: parserType}, 652 651 } 653 652 654 653 epsilon := 0.01 655 654 656 - b, err := NewBuilder(opts) 655 + b, err := index.NewBuilder(opts) 657 656 if err != nil { 658 657 t.Fatalf("NewBuilder: %v", err) 659 658 } ··· 707 706 if useBM25 { 708 707 return fullScore 709 708 } 710 - return math.Trunc(fullScore / zoekt.ScoreOffset) 709 + return math.Trunc(fullScore / index.ScoreOffset) 711 710 } 712 711 713 712 func TestRepoRanks(t *testing.T) { 714 713 requireCTags(t) 715 714 dir := t.TempDir() 716 715 717 - opts := Options{ 716 + opts := index.Options{ 718 717 IndexDir: dir, 719 718 RepositoryDescription: zoekt.Repository{ 720 719 Name: "repo", ··· 722 721 } 723 722 724 723 searchQuery := &query.Substring{Content: true, Pattern: "Inner"} 725 - exampleJava, err := os.ReadFile("./testdata/example.java") 724 + exampleJava, err := os.ReadFile("./examples/example.java") 726 725 if err != nil { 727 726 t.Fatal(err) 728 727 } ··· 758 757 Rank: c.repoRank, 759 758 } 760 759 761 - b, err := NewBuilder(opts) 760 + b, err := index.NewBuilder(opts) 762 761 if err != nil { 763 762 t.Fatalf("NewBuilder: %v", err) 764 763 } 765 764 766 - err = b.Add(zoekt.Document{Name: "example.java", Content: exampleJava}) 765 + err = b.Add(index.Document{Name: "example.java", Content: exampleJava}) 767 766 if err != nil { 768 767 t.Fatal(err) 769 768 }

build/testdata/example.cc internal/e2e/examples/example.cc

build/testdata/example.java internal/e2e/examples/example.java

build/testdata/example.kt internal/e2e/examples/example.kt

build/testdata/example.py internal/e2e/examples/example.py

build/testdata/example.rb internal/e2e/examples/example.rb

build/testdata/example.scala internal/e2e/examples/example.scala

build/testdata/large_file.cc internal/e2e/examples/large_file.cc

+4 -5

cmd/flags.go

··· 20 20 "os" 21 21 "path/filepath" 22 22 23 - "github.com/sourcegraph/zoekt" 24 - "github.com/sourcegraph/zoekt/build" 23 + "github.com/sourcegraph/zoekt/index" 25 24 ) 26 25 27 26 var ( 28 27 version = flag.Bool("version", false, "Print version number") 29 - opts = &build.Options{} 28 + opts = &index.Options{} 30 29 ) 31 30 32 31 func init() { 33 32 opts.Flags(flag.CommandLine) 34 33 } 35 34 36 - func OptionsFromFlags() *build.Options { 35 + func OptionsFromFlags() *index.Options { 37 36 if *version { 38 37 name := filepath.Base(os.Args[0]) 39 - fmt.Printf("%s version %q\n", name, zoekt.Version) 38 + fmt.Printf("%s version %q\n", name, index.Version) 40 39 os.Exit(0) 41 40 } 42 41

+4 -5

cmd/zoekt-index/main.go

··· 23 23 "runtime/pprof" 24 24 "strings" 25 25 26 - "github.com/sourcegraph/zoekt" 27 - "github.com/sourcegraph/zoekt/build" 28 26 "github.com/sourcegraph/zoekt/cmd" 27 + "github.com/sourcegraph/zoekt/index" 29 28 "go.uber.org/automaxprocs/maxprocs" 30 29 ) 31 30 ··· 103 102 } 104 103 } 105 104 106 - func indexArg(arg string, opts build.Options, ignore map[string]struct{}) error { 105 + func indexArg(arg string, opts index.Options, ignore map[string]struct{}) error { 107 106 dir, err := filepath.Abs(filepath.Clean(arg)) 108 107 if err != nil { 109 108 return err 110 109 } 111 110 112 111 opts.RepositoryDescription.Name = filepath.Base(dir) 113 - builder, err := build.NewBuilder(opts) 112 + builder, err := index.NewBuilder(opts) 114 113 if err != nil { 115 114 return err 116 115 } ··· 135 134 for f := range comm { 136 135 displayName := strings.TrimPrefix(f.name, dir+"/") 137 136 if f.size > int64(opts.SizeMax) && !opts.IgnoreSizeMax(displayName) { 138 - if err := builder.Add(zoekt.Document{ 137 + if err := builder.Add(index.Document{ 139 138 Name: displayName, 140 139 SkipReason: fmt.Sprintf("document size %d larger than limit %d", f.size, opts.SizeMax), 141 140 }); err != nil {

+3 -3

cmd/zoekt-indexserver/main.go

··· 33 33 "strings" 34 34 "time" 35 35 36 - "github.com/sourcegraph/zoekt" 36 + "github.com/sourcegraph/zoekt/index" 37 37 "github.com/sourcegraph/zoekt/internal/gitindex" 38 38 ) 39 39 ··· 206 206 } 207 207 defer f.Close() 208 208 209 - ifile, err := zoekt.NewIndexFile(f) 209 + ifile, err := index.NewIndexFile(f) 210 210 if err != nil { 211 211 return nil 212 212 } 213 213 defer ifile.Close() 214 214 215 - repos, _, err := zoekt.ReadMetadata(ifile) 215 + repos, _, err := index.ReadMetadata(ifile) 216 216 if err != nil { 217 217 return nil 218 218 }

+8 -8

cmd/zoekt-merge-index/main.go

··· 8 8 "path/filepath" 9 9 "strings" 10 10 11 - "github.com/sourcegraph/zoekt" 11 + "github.com/sourcegraph/zoekt/index" 12 12 ) 13 13 14 14 // merge merges the input shards into a compound shard in dstDir. It returns the 15 15 // full path to the compound shard. The input shards are removed on success. 16 16 func merge(dstDir string, names []string) (string, error) { 17 - var files []zoekt.IndexFile 17 + var files []index.IndexFile 18 18 for _, fn := range names { 19 19 f, err := os.Open(fn) 20 20 if err != nil { ··· 22 22 } 23 23 defer f.Close() 24 24 25 - indexFile, err := zoekt.NewIndexFile(f) 25 + indexFile, err := index.NewIndexFile(f) 26 26 if err != nil { 27 27 return "", err 28 28 } ··· 31 31 files = append(files, indexFile) 32 32 } 33 33 34 - tmpName, dstName, err := zoekt.Merge(dstDir, files...) 34 + tmpName, dstName, err := index.Merge(dstDir, files...) 35 35 if err != nil { 36 36 return "", err 37 37 } 38 38 39 39 // Delete input shards. 40 40 for _, name := range names { 41 - paths, err := zoekt.IndexFilePaths(name) 41 + paths, err := index.IndexFilePaths(name) 42 42 if err != nil { 43 43 return "", fmt.Errorf("zoekt-merge-index: %w", err) 44 44 } ··· 83 83 } 84 84 defer f.Close() 85 85 86 - indexFile, err := zoekt.NewIndexFile(f) 86 + indexFile, err := index.NewIndexFile(f) 87 87 if err != nil { 88 88 return err 89 89 } 90 90 defer indexFile.Close() 91 91 92 - exploded, err := zoekt.Explode(dstDir, indexFile) 92 + exploded, err := index.Explode(dstDir, indexFile) 93 93 defer func() { 94 94 // best effort removal of tmp files. If os.Remove fails, indexserver will delete 95 95 // the leftover tmp files during the next cleanup. ··· 104 104 // remove the input shard first to avoid duplicate indexes. In the worst case, 105 105 // the process is interrupted just after we delete the compound shard, in which 106 106 // case we have to reindex the lost repos. 107 - paths, err := zoekt.IndexFilePaths(inputShard) 107 + paths, err := index.IndexFilePaths(inputShard) 108 108 if err != nil { 109 109 return err 110 110 }

+5 -5

cmd/zoekt-repo-index/main.go

··· 41 41 42 42 "github.com/google/slothfs/manifest" 43 43 "github.com/sourcegraph/zoekt" 44 - "github.com/sourcegraph/zoekt/build" 45 44 "github.com/sourcegraph/zoekt/ignore" 45 + "github.com/sourcegraph/zoekt/index" 46 46 "github.com/sourcegraph/zoekt/internal/gitindex" 47 47 "go.uber.org/automaxprocs/maxprocs" 48 48 ··· 127 127 revPrefix := flag.String("rev_prefix", "refs/remotes/origin/", "prefix for references") 128 128 baseURLStr := flag.String("base_url", "", "base url to interpret repository names") 129 129 repoCacheDir := flag.String("repo_cache", "", "root for repository cache") 130 - indexDir := flag.String("index", build.DefaultDir, "index directory for *.zoekt files") 130 + indexDir := flag.String("index", index.DefaultDir, "index directory for *.zoekt files") 131 131 manifestRepoURL := flag.String("manifest_repo_url", "", "set a URL for a git repository holding manifest XML file. Provide the BRANCH:XML-FILE as further command-line arguments") 132 132 manifestRevPrefix := flag.String("manifest_rev_prefix", "refs/remotes/origin/", "prefixes for branches in manifest repository") 133 133 repoName := flag.String("name", "", "set repository name") ··· 150 150 *repoName = filepath.Join(u.Host, u.Path) 151 151 } 152 152 153 - opts := build.Options{ 153 + opts := index.Options{ 154 154 Parallelism: *parallelism, 155 155 SizeMax: *sizeMax, 156 156 ShardMax: *shardLimit, ··· 258 258 return 259 259 } 260 260 261 - builder, err := build.NewBuilder(opts) 261 + builder, err := index.NewBuilder(opts) 262 262 if err != nil { 263 263 log.Fatal(err) 264 264 } ··· 269 269 log.Fatal(err) 270 270 } 271 271 272 - doc := zoekt.Document{ 272 + doc := index.Document{ 273 273 Name: k.FullPath(), 274 274 Content: data, 275 275 SubRepositoryPath: k.SubRepoPath,

+16 -16

cmd/zoekt-sourcegraph-indexserver/cleanup.go

··· 12 12 "github.com/grafana/regexp" 13 13 "github.com/prometheus/client_golang/prometheus" 14 14 "github.com/prometheus/client_golang/prometheus/promauto" 15 - 16 15 "github.com/sourcegraph/zoekt" 16 + "github.com/sourcegraph/zoekt/index" 17 17 ) 18 18 19 19 var metricCleanupDuration = promauto.NewHistogram(prometheus.HistogramOpts{ ··· 35 35 36 36 trash := getShards(trashDir) 37 37 tombtones := getTombstonedRepos(indexDir) 38 - index := getShards(indexDir) 38 + indexShards := getShards(indexDir) 39 39 40 40 // trash: Remove old shards and conflicts with index 41 41 minAge := now.Add(-24 * time.Hour) ··· 50 50 } 51 51 } 52 52 53 - if _, conflicts := index[repo]; !conflicts && !old { 53 + if _, conflicts := indexShards[repo]; !conflicts && !old { 54 54 continue 55 55 } 56 56 ··· 62 62 // tombstones: Remove tombstones that conflict with index or trash. After this, 63 63 // tombstones only contain repos that are neither in the trash nor in the index. 64 64 for repo := range tombtones { 65 - if _, conflicts := index[repo]; conflicts { 65 + if _, conflicts := indexShards[repo]; conflicts { 66 66 delete(tombtones, repo) 67 67 } 68 68 // Trash takes precedence over tombstones. ··· 75 75 // shards that have the same ID but different names delete and start over. 76 76 // This can happen when a repository is renamed. In future we should make 77 77 // shard file names based on ID. 78 - for repo, shards := range index { 78 + for repo, shards := range indexShards { 79 79 if consistentRepoName(shards) { 80 80 continue 81 81 } 82 82 83 83 // prevent further processing since we will delete 84 - delete(index, repo) 84 + delete(indexShards, repo) 85 85 86 86 // This should be rare, so give an informative log message. 87 87 var paths []string ··· 113 113 for _, repo := range repos { 114 114 // Delete from index so that index will only contain shards to be 115 115 // trashed. 116 - delete(index, repo) 116 + delete(indexShards, repo) 117 117 118 118 if shards, ok := trash[repo]; ok { 119 119 infoLog.Printf("restoring shards from trash for %v", repo) ··· 123 123 124 124 if s, ok := tombtones[repo]; ok { 125 125 infoLog.Printf("removing tombstone for %v", repo) 126 - err := zoekt.UnsetTombstone(s.Path, repo) 126 + err := index.UnsetTombstone(s.Path, repo) 127 127 if err != nil { 128 128 errorLog.Printf("error removing tombstone for %v: %s", repo, err) 129 129 } ··· 131 131 } 132 132 133 133 // index: Move non-existent repos into trash 134 - for repo, shards := range index { 134 + for repo, shards := range indexShards { 135 135 // Best-effort touch. If touch fails, we will just remove from the 136 136 // trash sooner. 137 137 for _, shard := range shards { ··· 197 197 continue 198 198 } 199 199 200 - repos, _, err := zoekt.ReadMetadataPathAlive(path) 200 + repos, _, err := index.ReadMetadataPathAlive(path) 201 201 if err != nil { 202 202 debugLog.Printf("failed to read shard: %v", err) 203 203 continue ··· 231 231 m := make(map[uint32]shard) 232 232 233 233 for _, p := range paths { 234 - repos, _, err := zoekt.ReadMetadataPath(p) 234 + repos, _, err := index.ReadMetadataPath(p) 235 235 if err != nil { 236 236 continue 237 237 } ··· 285 285 // exceedingly rare due to it being a mix of partial failure on something in 286 286 // trash + an admin re-adding a repository. 287 287 for _, shard := range shards { 288 - paths, err := zoekt.IndexFilePaths(shard.Path) 288 + paths, err := index.IndexFilePaths(shard.Path) 289 289 if err != nil { 290 290 debugLog.Printf("failed to remove shard %s: %v", shard.Path, err) 291 291 } ··· 299 299 300 300 func moveAll(dstDir string, shards []shard) { 301 301 for i, shard := range shards { 302 - paths, err := zoekt.IndexFilePaths(shard.Path) 302 + paths, err := index.IndexFilePaths(shard.Path) 303 303 if err != nil { 304 304 errorLog.Printf("failed to stat shard paths, deleting all shards for %s: %v", shard.RepoName, err) 305 305 removeAll(shards...) ··· 367 367 return false 368 368 } 369 369 370 - if err := zoekt.SetTombstone(shards[0].Path, repoID); err != nil { 370 + if err := index.SetTombstone(shards[0].Path, repoID); err != nil { 371 371 errorLog.Printf("error setting tombstone for %d in shard %s: %s. Removing shard\n", repoID, shards[0].Path, err) 372 372 _ = os.Remove(shards[0].Path) 373 373 } ··· 454 454 runMerge = exec.Command("zoekt-merge-index", "merge", fn).Run 455 455 } 456 456 457 - repos, _, err := zoekt.ReadMetadataPath(fn) 457 + repos, _, err := index.ReadMetadataPath(fn) 458 458 if err != nil { 459 459 return nil, fmt.Errorf("zoekt.ReadMetadataPath: %s", err) 460 460 } ··· 470 470 } 471 471 472 472 defer func() { 473 - paths, err := zoekt.IndexFilePaths(fn) 473 + paths, err := index.IndexFilePaths(fn) 474 474 if err != nil { 475 475 return 476 476 }

+13 -13

cmd/zoekt-sourcegraph-indexserver/cleanup_test.go

··· 14 14 "github.com/google/go-cmp/cmp/cmpopts" 15 15 16 16 "github.com/sourcegraph/zoekt" 17 - "github.com/sourcegraph/zoekt/build" 17 + "github.com/sourcegraph/zoekt/index" 18 18 ) 19 19 20 20 func TestCleanup(t *testing.T) { ··· 22 22 return shard{ 23 23 RepoID: fakeID(name), 24 24 RepoName: name, 25 - Path: zoekt.ShardName("", name, 15, n), 25 + Path: index.ShardName("", name, 15, n), 26 26 ModTime: mtime, 27 27 RepoTombstone: false, 28 28 } ··· 37 37 if filepath.Ext(path) != ".zoekt" { 38 38 continue 39 39 } 40 - repos, _, _ := zoekt.ReadMetadataPathAlive(path) 40 + repos, _, _ := index.ReadMetadataPathAlive(path) 41 41 fi, _ := os.Stat(path) 42 42 for _, repo := range repos { 43 43 shards = append(shards, shard{ ··· 173 173 for _, optFn := range optFns { 174 174 optFn(r) 175 175 } 176 - b, err := zoekt.NewIndexBuilder(r) 176 + b, err := index.NewIndexBuilder(r) 177 177 if err != nil { 178 178 t.Fatal(err) 179 179 } ··· 234 234 tmpDir := t.TempDir() 235 235 fn := createCompoundShard(t, tmpDir, []uint32{1, 2, 3, 4}) 236 236 237 - err := zoekt.SetTombstone(fn, 2) 237 + err := index.SetTombstone(fn, 2) 238 238 if err != nil { 239 239 t.Fatal(err) 240 240 } ··· 258 258 t.Fatalf("expected 1 shard, but instead got %d", len(shards)) 259 259 } 260 260 261 - repos, _, err := zoekt.ReadMetadataPath(shards[0]) 261 + repos, _, err := index.ReadMetadataPath(shards[0]) 262 262 if err != nil { 263 263 t.Fatal(err) 264 264 } ··· 286 286 dir := t.TempDir() 287 287 var repoID uint32 = 2 288 288 csOld := createCompoundShard(t, dir, []uint32{1, 2, 3, 4}, setLastCommitDate(time.Now().Add(-1*time.Hour))) 289 - if err := zoekt.SetTombstone(csOld, repoID); err != nil { 289 + if err := index.SetTombstone(csOld, repoID); err != nil { 290 290 t.Fatal(err) 291 291 } 292 292 293 293 now := time.Now() 294 294 csNew := createCompoundShard(t, dir, []uint32{5, 2, 6, 7}, setLastCommitDate(now)) 295 - if err := zoekt.SetTombstone(csNew, repoID); err != nil { 295 + if err := index.SetTombstone(csNew, repoID); err != nil { 296 296 t.Fatal(err) 297 297 } 298 298 ··· 380 380 381 381 setTombstone := func(shardPath string, repoID uint32) { 382 382 t.Helper() 383 - if err := zoekt.SetTombstone(shardPath, repoID); err != nil { 383 + if err := index.SetTombstone(shardPath, repoID); err != nil { 384 384 t.Fatal(err) 385 385 } 386 386 } ··· 473 473 optsFn(&repo) 474 474 } 475 475 476 - opts := build.Options{ 476 + opts := index.Options{ 477 477 IndexDir: dir, 478 478 RepositoryDescription: repo, 479 479 } 480 480 opts.SetDefaults() 481 - b, err := build.NewBuilder(opts) 481 + b, err := index.NewBuilder(opts) 482 482 if err != nil { 483 483 t.Fatalf("NewBuilder: %v", err) 484 484 } ··· 517 517 } 518 518 defer f.Close() 519 519 520 - indexFile, err := zoekt.NewIndexFile(f) 520 + indexFile, err := index.NewIndexFile(f) 521 521 if err != nil { 522 522 return fmt.Errorf("zoekt.NewIndexFile: %s ", err) 523 523 } 524 524 defer indexFile.Close() 525 525 526 - _, _, err = zoekt.Merge(filepath.Dir(fn), indexFile) 526 + _, _, err = index.Merge(filepath.Dir(fn), indexFile) 527 527 return err 528 528 }

+4 -4

cmd/zoekt-sourcegraph-indexserver/index.go

··· 18 18 19 19 sglog "github.com/sourcegraph/log" 20 20 "github.com/sourcegraph/zoekt" 21 - "github.com/sourcegraph/zoekt/build" 21 + "github.com/sourcegraph/zoekt/index" 22 22 "github.com/sourcegraph/zoekt/internal/ctags" 23 23 "github.com/sourcegraph/zoekt/internal/tenant" 24 24 ) ··· 99 99 ShardMerging bool 100 100 } 101 101 102 - // BuildOptions returns a build.Options represented by indexArgs. Note: it 102 + // BuildOptions returns a index.Options represented by indexArgs. Note: it 103 103 // doesn't set fields like repository/branch. 104 - func (o *indexArgs) BuildOptions() *build.Options { 104 + func (o *indexArgs) BuildOptions() *index.Options { 105 105 shardPrefix := "" 106 106 if tenant.EnforceTenant() { 107 107 shardPrefix = tenant.SrcPrefix(o.TenantID, o.RepoID) 108 108 } 109 109 110 - return &build.Options{ 110 + return &index.Options{ 111 111 // It is important that this RepositoryDescription exactly matches what 112 112 // the indexer we call will produce. This is to ensure that 113 113 // IncrementalSkipIndexing and IndexState can correctly calculate if

+14 -13

cmd/zoekt-sourcegraph-indexserver/main.go

··· 39 39 sglog "github.com/sourcegraph/log" 40 40 "github.com/sourcegraph/mountinfo" 41 41 "github.com/sourcegraph/zoekt" 42 - "github.com/sourcegraph/zoekt/build" 43 42 proto "github.com/sourcegraph/zoekt/cmd/zoekt-sourcegraph-indexserver/protos/sourcegraph/zoekt/configuration/v1" 44 43 "github.com/sourcegraph/zoekt/grpc/internalerrs" 45 44 "github.com/sourcegraph/zoekt/grpc/messagesize" 45 + "github.com/sourcegraph/zoekt/index" 46 46 "github.com/sourcegraph/zoekt/internal/debugserver" 47 47 "github.com/sourcegraph/zoekt/internal/profiler" 48 48 "github.com/sourcegraph/zoekt/internal/tenant" 49 + 49 50 "go.uber.org/automaxprocs/maxprocs" 50 51 "golang.org/x/net/trace" 51 52 "golang.org/x/sys/unix" ··· 110 111 metricIndexIncrementalIndexState = promauto.NewCounterVec(prometheus.CounterOpts{ 111 112 Name: "index_incremental_index_state", 112 113 Help: "A count of the state on disk vs what we want to build. See zoekt/build.IndexState.", 113 - }, []string{"state"}) // state is build.IndexState 114 + }, []string{"state"}) // state is index.IndexState 114 115 115 116 metricNumIndexed = promauto.NewGauge(prometheus.GaugeOpts{ 116 117 Name: "index_num_indexed", ··· 595 596 metricIndexIncrementalIndexState.WithLabelValues(string(incrementalState)).Inc() 596 597 597 598 switch incrementalState { 598 - case build.IndexStateEqual: 599 + case index.IndexStateEqual: 599 600 debugLog.Printf("%s index already up to date. Shard=%s", args.String(), fn) 600 601 return indexStateNoop, nil 601 602 602 - case build.IndexStateMeta: 603 + case index.IndexStateMeta: 603 604 infoLog.Printf("updating index.meta %s", args.String()) 604 605 605 606 // TODO(stefan) handle mergeMeta for tenant id. ··· 609 610 return indexStateSuccessMeta, nil 610 611 } 611 612 612 - case build.IndexStateCorrupt: 613 + case index.IndexStateCorrupt: 613 614 infoLog.Printf("falling back to full update: corrupt index: %s", args.String()) 614 615 } 615 616 } ··· 723 724 return nil 724 725 } 725 726 726 - builder, err := build.NewBuilder(*bo) 727 + builder, err := index.NewBuilder(*bo) 727 728 if err != nil { 728 729 return err 729 730 } ··· 1070 1071 } 1071 1072 1072 1073 func printMetaData(fn string) error { 1073 - repo, indexMeta, err := zoekt.ReadMetadataPath(fn) 1074 + repo, indexMeta, err := index.ReadMetadataPath(fn) 1074 1075 if err != nil { 1075 1076 return err 1076 1077 } ··· 1093 1094 return err 1094 1095 } 1095 1096 1096 - iFile, err := zoekt.NewIndexFile(f) 1097 + iFile, err := index.NewIndexFile(f) 1097 1098 if err != nil { 1098 1099 return err 1099 1100 } 1100 1101 1101 - return zoekt.PrintNgramStats(iFile) 1102 + return index.PrintNgramStats(iFile) 1102 1103 } 1103 1104 1104 1105 func srcLogLevelIsDebug() bool { ··· 1272 1273 fs.StringVar(&rc.root, "sourcegraph_url", os.Getenv("SRC_FRONTEND_INTERNAL"), "http://sourcegraph-frontend-internal or http://localhost:3090. If a path to a directory, we fake the Sourcegraph API and index all repos rooted under path.") 1273 1274 fs.DurationVar(&rc.interval, "interval", time.Minute, "sync with sourcegraph this often") 1274 1275 fs.Int64Var(&rc.indexConcurrency, "index_concurrency", getEnvWithDefaultInt64("SRC_INDEX_CONCURRENCY", 1), "the number of repos to index concurrently") 1275 - fs.StringVar(&rc.index, "index", getEnvWithDefaultString("DATA_DIR", build.DefaultDir), "set index directory to use") 1276 + fs.StringVar(&rc.index, "index", getEnvWithDefaultString("DATA_DIR", index.DefaultDir), "set index directory to use") 1276 1277 fs.StringVar(&rc.listen, "listen", ":6072", "listen on this address.") 1277 - fs.StringVar(&rc.hostname, "hostname", zoekt.HostnameBestEffort(), "the name we advertise to Sourcegraph when asking for the list of repositories to index. Can also be set via the NODE_NAME environment variable.") 1278 + fs.StringVar(&rc.hostname, "hostname", index.HostnameBestEffort(), "the name we advertise to Sourcegraph when asking for the list of repositories to index. Can also be set via the NODE_NAME environment variable.") 1278 1279 fs.Float64Var(&rc.cpuFraction, "cpu_fraction", 1.0, "use this fraction of the cores for indexing.") 1279 1280 fs.DurationVar(&rc.backoffDuration, "backoff_duration", getEnvWithDefaultDuration("BACKOFF_DURATION", 10*time.Minute), "for the given duration we backoff from enqueue operations for a repository that's failed its previous indexing attempt. Consecutive failures increase the duration of the delay linearly up to the maxBackoffDuration. A negative value disables indexing backoff.") 1280 1281 fs.DurationVar(&rc.maxBackoffDuration, "max_backoff_duration", getEnvWithDefaultDuration("MAX_BACKOFF_DURATION", 120*time.Minute), "the maximum duration to backoff from enqueueing a repo for indexing. A negative value disables indexing backoff.") ··· 1633 1634 func main() { 1634 1635 liblog := sglog.Init(sglog.Resource{ 1635 1636 Name: "zoekt-indexserver", 1636 - Version: zoekt.Version, 1637 - InstanceID: zoekt.HostnameBestEffort(), 1637 + Version: index.Version, 1638 + InstanceID: index.HostnameBestEffort(), 1638 1639 }) 1639 1640 defer liblog.Sync() 1640 1641

+2 -3

cmd/zoekt-sourcegraph-indexserver/merge.go

··· 11 11 "github.com/grafana/regexp" 12 12 "github.com/prometheus/client_golang/prometheus" 13 13 "github.com/prometheus/client_golang/prometheus/promauto" 14 + "github.com/sourcegraph/zoekt/index" 14 15 "go.uber.org/atomic" 15 - 16 - "github.com/sourcegraph/zoekt" 17 16 ) 18 17 19 18 var metricShardMergingRunning = promauto.NewGauge(prometheus.GaugeOpts{ ··· 203 202 return true 204 203 } 205 204 206 - repos, _, err := zoekt.ReadMetadataPath(path) 205 + repos, _, err := index.ReadMetadataPath(path) 207 206 if err != nil { 208 207 debugLog.Printf("failed to load metadata for %s\n", fi.Name()) 209 208 return true

+3 -3

cmd/zoekt-sourcegraph-indexserver/merge_test.go

··· 11 11 "testing" 12 12 13 13 "github.com/sourcegraph/zoekt" 14 - "github.com/sourcegraph/zoekt/build" 14 + "github.com/sourcegraph/zoekt/index" 15 15 ) 16 16 17 17 func TestHasMultipleShards(t *testing.T) { ··· 48 48 dir := t.TempDir() 49 49 50 50 // Create a test shard. 51 - opts := build.Options{ 51 + opts := index.Options{ 52 52 IndexDir: dir, 53 53 RepositoryDescription: zoekt.Repository{Name: "test-repo"}, 54 54 } 55 55 opts.SetDefaults() 56 - b, err := build.NewBuilder(opts) 56 + b, err := index.NewBuilder(opts) 57 57 if err != nil { 58 58 t.Fatalf("NewBuilder: %v", err) 59 59 }

+3 -3

cmd/zoekt-sourcegraph-indexserver/meta.go

··· 7 7 "path/filepath" 8 8 9 9 "github.com/sourcegraph/zoekt" 10 - "github.com/sourcegraph/zoekt/build" 10 + "github.com/sourcegraph/zoekt/index" 11 11 ) 12 12 13 13 // mergeMeta updates the .meta files for the shards on disk for o. ··· 16 16 // failure. This means you might have an inconsistent state on disk if an 17 17 // error is returned. It is recommended to fallback to re-indexing in that 18 18 // case. 19 - func mergeMeta(o *build.Options) error { 19 + func mergeMeta(o *index.Options) error { 20 20 todo := map[string]string{} 21 21 for _, fn := range o.FindAllShards() { 22 - repos, md, err := zoekt.ReadMetadataPath(fn) 22 + repos, md, err := index.ReadMetadataPath(fn) 23 23 if err != nil { 24 24 return err 25 25 }

+10 -10

cmd/zoekt-sourcegraph-indexserver/meta_test.go

··· 7 7 8 8 "github.com/google/go-cmp/cmp" 9 9 "github.com/sourcegraph/zoekt" 10 - "github.com/sourcegraph/zoekt/build" 10 + "github.com/sourcegraph/zoekt/index" 11 11 ) 12 12 13 13 func TestMergeMeta(t *testing.T) { ··· 17 17 var repoFns []string 18 18 19 19 for _, name := range repoNames { 20 - opts := build.Options{ 20 + opts := index.Options{ 21 21 IndexDir: dir, 22 22 RepositoryDescription: zoekt.Repository{ 23 23 Name: name, ··· 27 27 }, 28 28 } 29 29 opts.SetDefaults() 30 - b, err := build.NewBuilder(opts) 30 + b, err := index.NewBuilder(opts) 31 31 if err != nil { 32 32 t.Fatalf("NewBuilder: %v", err) 33 33 } ··· 41 41 } 42 42 43 43 // update meta on repo3 then test it changed 44 - opts := &build.Options{ 44 + opts := &index.Options{ 45 45 IndexDir: dir, 46 46 RepositoryDescription: zoekt.Repository{ 47 47 Name: "repo3", ··· 54 54 if err := mergeMeta(opts); err != nil { 55 55 t.Fatal(err) 56 56 } 57 - repos, _, _ := zoekt.ReadMetadataPath(repoFns[3]) 57 + repos, _, _ := index.ReadMetadataPath(repoFns[3]) 58 58 if got, want := repos[0].RawConfig["public"], "0"; got != want { 59 59 t.Fatalf("failed to update metadata of repo3. Got public %q want %q", got, want) 60 60 } ··· 72 72 73 73 readPublic := func() []string { 74 74 var public []string 75 - repos, _, _ := zoekt.ReadMetadataPath(dstFn) 75 + repos, _, _ := index.ReadMetadataPath(dstFn) 76 76 for _, r := range repos { 77 77 public = append(public, r.RawConfig["public"]) 78 78 } ··· 84 84 } 85 85 86 86 // Update a repo1 in compound shard to be private 87 - opts = &build.Options{ 87 + opts = &index.Options{ 88 88 IndexDir: dir, 89 89 RepositoryDescription: zoekt.Repository{ 90 90 Name: "repo1", ··· 105 105 func merge(t *testing.T, dstDir string, names []string) (string, string, error) { 106 106 t.Helper() 107 107 108 - var files []zoekt.IndexFile 108 + var files []index.IndexFile 109 109 for _, fn := range names { 110 110 f, err := os.Open(fn) 111 111 if err != nil { ··· 113 113 } 114 114 defer f.Close() 115 115 116 - indexFile, err := zoekt.NewIndexFile(f) 116 + indexFile, err := index.NewIndexFile(f) 117 117 if err != nil { 118 118 return "", "", err 119 119 } ··· 122 122 files = append(files, indexFile) 123 123 } 124 124 125 - return zoekt.Merge(dstDir, files...) 125 + return index.Merge(dstDir, files...) 126 126 }

+3 -3

cmd/zoekt-test/main.go

··· 32 32 "time" 33 33 34 34 "github.com/sourcegraph/zoekt" 35 - "github.com/sourcegraph/zoekt/build" 35 + "github.com/sourcegraph/zoekt/index" 36 36 "github.com/sourcegraph/zoekt/internal/shards" 37 37 "github.com/sourcegraph/zoekt/query" 38 38 ) ··· 72 72 } 73 73 defer os.RemoveAll(indexDir) 74 74 75 - var opts build.Options 75 + var opts index.Options 76 76 opts.SetDefaults() 77 77 opts.IndexDir = indexDir 78 78 ··· 84 84 return fmt.Errorf("no contents") 85 85 } 86 86 87 - builder, err := build.NewBuilder(opts) 87 + builder, err := index.NewBuilder(opts) 88 88 if err != nil { 89 89 return err 90 90 }

+23 -23

cmd/zoekt-webserver/main.go

··· 41 41 42 42 grpcprom "github.com/grpc-ecosystem/go-grpc-middleware/providers/prometheus" 43 43 "github.com/sourcegraph/mountinfo" 44 - "github.com/sourcegraph/zoekt" 45 - "github.com/sourcegraph/zoekt/build" 46 - zoektgrpc "github.com/sourcegraph/zoekt/cmd/zoekt-webserver/grpc/server" 47 - "github.com/sourcegraph/zoekt/grpc/internalerrs" 48 - "github.com/sourcegraph/zoekt/grpc/messagesize" 49 - "github.com/sourcegraph/zoekt/grpc/propagator" 50 - proto "github.com/sourcegraph/zoekt/grpc/protos/zoekt/webserver/v1" 51 44 "github.com/sourcegraph/zoekt/internal/debugserver" 52 - "github.com/sourcegraph/zoekt/internal/profiler" 53 45 "github.com/sourcegraph/zoekt/internal/shards" 54 - "github.com/sourcegraph/zoekt/internal/tenant" 55 - "github.com/sourcegraph/zoekt/internal/trace" 56 - "github.com/sourcegraph/zoekt/internal/tracer" 57 - "github.com/sourcegraph/zoekt/query" 58 - "github.com/sourcegraph/zoekt/web" 59 46 "go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc" 60 47 "golang.org/x/net/http2" 61 48 "golang.org/x/net/http2/h2c" ··· 66 53 "github.com/prometheus/client_golang/prometheus/promauto" 67 54 "github.com/shirou/gopsutil/v3/disk" 68 55 sglog "github.com/sourcegraph/log" 56 + "github.com/sourcegraph/zoekt" 57 + zoektgrpc "github.com/sourcegraph/zoekt/cmd/zoekt-webserver/grpc/server" 58 + "github.com/sourcegraph/zoekt/grpc/internalerrs" 59 + "github.com/sourcegraph/zoekt/grpc/messagesize" 60 + "github.com/sourcegraph/zoekt/grpc/propagator" 61 + proto "github.com/sourcegraph/zoekt/grpc/protos/zoekt/webserver/v1" 62 + "github.com/sourcegraph/zoekt/index" 63 + "github.com/sourcegraph/zoekt/internal/profiler" 64 + "github.com/sourcegraph/zoekt/internal/tenant" 65 + "github.com/sourcegraph/zoekt/internal/trace" 66 + "github.com/sourcegraph/zoekt/internal/tracer" 67 + "github.com/sourcegraph/zoekt/query" 68 + "github.com/sourcegraph/zoekt/web" 69 69 "github.com/uber/jaeger-client-go" 70 70 oteltrace "go.opentelemetry.io/otel/trace" 71 71 "go.uber.org/automaxprocs/maxprocs" ··· 139 139 logRefresh := flag.Duration("log_refresh", 24*time.Hour, "if using --log_dir, start writing a new file this often.") 140 140 141 141 listen := flag.String("listen", ":6070", "listen on this address.") 142 - index := flag.String("index", build.DefaultDir, "set index directory to use") 142 + indexDir := flag.String("index", index.DefaultDir, "set index directory to use") 143 143 html := flag.Bool("html", true, "enable HTML interface") 144 144 enableRPC := flag.Bool("rpc", false, "enable go/net RPC") 145 145 enableIndexserverProxy := flag.Bool("indexserver_proxy", false, "proxy requests with URLs matching the path /indexserver/ to <index>/indexserver.sock") ··· 158 158 flag.Parse() 159 159 160 160 if *version { 161 - fmt.Printf("zoekt-webserver version %q\n", zoekt.Version) 161 + fmt.Printf("zoekt-webserver version %q\n", index.Version) 162 162 os.Exit(0) 163 163 } 164 164 ··· 171 171 172 172 resource := sglog.Resource{ 173 173 Name: "zoekt-webserver", 174 - Version: zoekt.Version, 175 - InstanceID: zoekt.HostnameBestEffort(), 174 + Version: index.Version, 175 + InstanceID: index.HostnameBestEffort(), 176 176 } 177 177 178 178 liblog := sglog.Init(resource) ··· 193 193 // Tune GOMAXPROCS to match Linux container CPU quota. 194 194 _, _ = maxprocs.Set() 195 195 196 - if err := os.MkdirAll(*index, 0o755); err != nil { 196 + if err := os.MkdirAll(*indexDir, 0o755); err != nil { 197 197 log.Fatal(err) 198 198 } 199 199 200 - mustRegisterDiskMonitor(*index) 200 + mustRegisterDiskMonitor(*indexDir) 201 201 202 202 metricsLogger := sglog.Scoped("metricsRegistration") 203 203 204 204 mustRegisterMemoryMapMetrics(metricsLogger) 205 205 206 206 opts := mountinfo.CollectorOpts{Namespace: "zoekt_webserver"} 207 - c := mountinfo.NewCollector(metricsLogger, opts, map[string]string{"indexDir": *index}) 207 + c := mountinfo.NewCollector(metricsLogger, opts, map[string]string{"indexDir": *indexDir}) 208 208 209 209 prometheus.DefaultRegisterer.MustRegister(c) 210 210 211 211 // Do not block on loading shards so we can become partially available 212 212 // sooner. Otherwise on large instances zoekt can be unavailable on the 213 213 // order of minutes. 214 - searcher, err := shards.NewDirectorySearcherFast(*index) 214 + searcher, err := shards.NewDirectorySearcherFast(*indexDir) 215 215 if err != nil { 216 216 log.Fatal(err) 217 217 } ··· 224 224 s := &web.Server{ 225 225 Searcher: searcher, 226 226 Top: web.Top, 227 - Version: zoekt.Version, 227 + Version: index.Version, 228 228 } 229 229 230 230 if *templateDir != "" { ··· 260 260 debugserver.AddHandlers(serveMux, *enablePprof) 261 261 262 262 if *enableIndexserverProxy { 263 - socket := filepath.Join(*index, "indexserver.sock") 263 + socket := filepath.Join(*indexDir, "indexserver.sock") 264 264 sglog.Scoped("server").Info("adding reverse proxy", sglog.String("socket", socket)) 265 265 addProxyHandler(serveMux, socket) 266 266 }

+4 -3

cmd/zoekt/main.go

··· 29 29 30 30 "github.com/felixge/fgprof" 31 31 "github.com/sourcegraph/zoekt" 32 + "github.com/sourcegraph/zoekt/index" 32 33 "github.com/sourcegraph/zoekt/internal/shards" 33 34 "github.com/sourcegraph/zoekt/query" 34 35 ) ··· 64 65 return nil, err 65 66 } 66 67 67 - iFile, err := zoekt.NewIndexFile(f) 68 + iFile, err := index.NewIndexFile(f) 68 69 if err != nil { 69 70 return nil, err 70 71 } 71 72 72 - s, err := zoekt.NewSearcher(iFile) 73 + s, err := index.NewSearcher(iFile) 73 74 if err != nil { 74 75 iFile.Close() 75 76 return nil, fmt.Errorf("NewSearcher(%s): %v", fn, err) 76 77 } 77 78 78 79 if verbose { 79 - repo, index, err := zoekt.ReadMetadata(iFile) 80 + repo, index, err := index.ReadMetadata(iFile) 80 81 if err != nil { 81 82 iFile.Close() 82 83 return nil, fmt.Errorf("ReadMetadata(%s): %v", fn, err)

+37 -36

contentprovider.go index/contentprovider.go

··· 12 12 // See the License for the specific language governing permissions and 13 13 // limitations under the License. 14 14 15 - package zoekt 15 + package index 16 16 17 17 import ( 18 18 "bytes" ··· 23 23 "unicode" 24 24 "unicode/utf8" 25 25 26 + "github.com/sourcegraph/zoekt" 26 27 "github.com/sourcegraph/zoekt/internal/ctags" 27 28 ) 28 29 ··· 32 33 // content with the same code. 33 34 type contentProvider struct { 34 35 id *indexData 35 - stats *Stats 36 + stats *zoekt.Stats 36 37 37 38 // mutable 38 39 err error ··· 143 144 // 144 145 // Note: the byte slices may be backed by mmapped data, so before being 145 146 // returned by the API it needs to be copied. 146 - func (p *contentProvider) fillMatches(ms []*candidateMatch, numContextLines int, language string, opts *SearchOptions) []LineMatch { 147 + func (p *contentProvider) fillMatches(ms []*candidateMatch, numContextLines int, language string, opts *zoekt.SearchOptions) []zoekt.LineMatch { 147 148 var filenameMatches []*candidateMatch 148 149 contentMatches := make([]*candidateMatch, 0, len(ms)) 149 150 ··· 161 162 return p.fillContentMatches(contentMatches, numContextLines, language, opts) 162 163 } 163 164 164 - // Otherwise, we return a single line containing the filematch match. 165 + // Otherwise, we return a single line containing the filematch index. 165 166 lineScore, _ := p.scoreLine(filenameMatches, language, -1 /* must pass -1 for filenames */, opts) 166 - res := LineMatch{ 167 + res := zoekt.LineMatch{ 167 168 Line: p.id.fileName(p.idx), 168 169 FileName: true, 169 170 Score: lineScore.score, ··· 171 172 } 172 173 173 174 for _, m := range ms { 174 - res.LineFragments = append(res.LineFragments, LineFragmentMatch{ 175 + res.LineFragments = append(res.LineFragments, zoekt.LineFragmentMatch{ 175 176 LineOffset: int(m.byteOffset), 176 177 MatchLength: int(m.byteMatchSz), 177 178 Offset: m.byteOffset, 178 179 }) 179 180 } 180 181 181 - return []LineMatch{res} 182 + return []zoekt.LineMatch{res} 182 183 183 184 } 184 185 ··· 190 191 // 191 192 // Note: the byte slices may be backed by mmapped data, so before being 192 193 // returned by the API it needs to be copied. 193 - func (p *contentProvider) fillChunkMatches(ms []*candidateMatch, numContextLines int, language string, opts *SearchOptions) []ChunkMatch { 194 + func (p *contentProvider) fillChunkMatches(ms []*candidateMatch, numContextLines int, language string, opts *zoekt.SearchOptions) []zoekt.ChunkMatch { 194 195 var filenameMatches []*candidateMatch 195 196 contentMatches := make([]*candidateMatch, 0, len(ms)) 196 197 ··· 207 208 return p.fillContentChunkMatches(contentMatches, numContextLines, language, opts) 208 209 } 209 210 210 - // Otherwise, we return a single chunk representing the filename match. 211 + // Otherwise, we return a single chunk representing the filename index. 211 212 lineScore, _ := p.scoreLine(filenameMatches, language, -1 /* must pass -1 for filenames */, opts) 212 213 fileName := p.id.fileName(p.idx) 213 - ranges := make([]Range, 0, len(ms)) 214 + ranges := make([]zoekt.Range, 0, len(ms)) 214 215 for _, m := range ms { 215 - ranges = append(ranges, Range{ 216 - Start: Location{ 216 + ranges = append(ranges, zoekt.Range{ 217 + Start: zoekt.Location{ 217 218 ByteOffset: m.byteOffset, 218 219 LineNumber: 1, 219 220 Column: uint32(utf8.RuneCount(fileName[:m.byteOffset]) + 1), 220 221 }, 221 - End: Location{ 222 + End: zoekt.Location{ 222 223 ByteOffset: m.byteOffset + m.byteMatchSz, 223 224 LineNumber: 1, 224 225 Column: uint32(utf8.RuneCount(fileName[:m.byteOffset+m.byteMatchSz]) + 1), ··· 226 227 }) 227 228 } 228 229 229 - return []ChunkMatch{{ 230 + return []zoekt.ChunkMatch{{ 230 231 Content: fileName, 231 - ContentStart: Location{ByteOffset: 0, LineNumber: 1, Column: 1}, 232 + ContentStart: zoekt.Location{ByteOffset: 0, LineNumber: 1, Column: 1}, 232 233 Ranges: ranges, 233 234 FileName: true, 234 235 Score: lineScore.score, ··· 236 237 }} 237 238 } 238 239 239 - func (p *contentProvider) fillContentMatches(ms []*candidateMatch, numContextLines int, language string, opts *SearchOptions) []LineMatch { 240 - var result []LineMatch 240 + func (p *contentProvider) fillContentMatches(ms []*candidateMatch, numContextLines int, language string, opts *zoekt.SearchOptions) []zoekt.LineMatch { 241 + var result []zoekt.LineMatch 241 242 for len(ms) > 0 { 242 243 m := ms[0] 243 244 num := p.newlines().atOffset(m.byteOffset) ··· 271 272 272 273 // Due to merging matches, we may have a match that 273 274 // crosses a line boundary. Prevent confusion by 274 - // taking lines until we pass the last match 275 + // taking lines until we pass the last index 275 276 for nextLineStart < len(data) && endMatch > uint32(nextLineStart) { 276 277 next := bytes.IndexByte(data[nextLineStart:], '\n') 277 278 if next == -1 { ··· 282 283 } 283 284 } 284 285 285 - finalMatch := LineMatch{ 286 + finalMatch := zoekt.LineMatch{ 286 287 LineStart: lineStart, 287 288 LineEnd: nextLineStart, 288 289 LineNumber: num, ··· 299 300 finalMatch.DebugScore = lineScore.debugScore 300 301 301 302 for i, m := range lineCands { 302 - fragment := LineFragmentMatch{ 303 + fragment := zoekt.LineFragmentMatch{ 303 304 Offset: m.byteOffset, 304 305 LineOffset: int(m.byteOffset) - lineStart, 305 306 MatchLength: int(m.byteMatchSz), ··· 316 317 return result 317 318 } 318 319 319 - func (p *contentProvider) fillContentChunkMatches(ms []*candidateMatch, numContextLines int, language string, opts *SearchOptions) []ChunkMatch { 320 + func (p *contentProvider) fillContentChunkMatches(ms []*candidateMatch, numContextLines int, language string, opts *zoekt.SearchOptions) []zoekt.ChunkMatch { 320 321 data := p.data(false) 321 322 322 323 // columnHelper prevents O(len(ms) * len(data)) lookups for all columns. ··· 332 333 333 334 newlines := p.newlines() 334 335 chunks := chunkCandidates(ms, newlines, numContextLines) 335 - chunkMatches := make([]ChunkMatch, 0, len(chunks)) 336 + chunkMatches := make([]zoekt.ChunkMatch, 0, len(chunks)) 336 337 for _, chunk := range chunks { 337 - ranges := make([]Range, 0, len(chunk.candidates)) 338 + ranges := make([]zoekt.Range, 0, len(chunk.candidates)) 338 339 for _, cm := range chunk.candidates { 339 340 startOffset := cm.byteOffset 340 341 endOffset := cm.byteOffset + cm.byteMatchSz 341 342 startLine, endLine := newlines.offsetRangeToLineRange(startOffset, endOffset) 342 343 343 - ranges = append(ranges, Range{ 344 - Start: Location{ 344 + ranges = append(ranges, zoekt.Range{ 345 + Start: zoekt.Location{ 345 346 ByteOffset: startOffset, 346 347 LineNumber: uint32(startLine), 347 348 Column: columnHelper.get(int(newlines.lineStart(startLine)), startOffset), 348 349 }, 349 - End: Location{ 350 + End: zoekt.Location{ 350 351 ByteOffset: endOffset, 351 352 LineNumber: uint32(endLine), 352 353 Column: columnHelper.get(int(newlines.lineStart(endLine)), endOffset), ··· 361 362 firstLineStart := newlines.lineStart(firstLineNumber) 362 363 363 364 chunkScore, symbolInfo := p.scoreChunk(chunk.candidates, language, opts) 364 - chunkMatches = append(chunkMatches, ChunkMatch{ 365 + chunkMatches = append(chunkMatches, zoekt.ChunkMatch{ 365 366 Content: newlines.getLines(data, firstLineNumber, int(chunk.lastLine)+numContextLines+1), 366 - ContentStart: Location{ 367 + ContentStart: zoekt.Location{ 367 368 ByteOffset: firstLineStart, 368 369 LineNumber: uint32(firstLineNumber), 369 370 Column: 1, ··· 599 600 return ok 600 601 } 601 602 602 - func (p *contentProvider) findSymbol(cm *candidateMatch) (DocumentSection, *Symbol, bool) { 603 + func (p *contentProvider) findSymbol(cm *candidateMatch) (DocumentSection, *zoekt.Symbol, bool) { 603 604 if cm.fileName { 604 605 return DocumentSection{}, nil, false 605 606 } ··· 871 872 return factor * scoreKindMatch 872 873 } 873 874 874 - type matchScoreSlice []LineMatch 875 + type matchScoreSlice []zoekt.LineMatch 875 876 876 877 func (m matchScoreSlice) Len() int { return len(m) } 877 878 func (m matchScoreSlice) Swap(i, j int) { m[i], m[j] = m[j], m[i] } 878 879 func (m matchScoreSlice) Less(i, j int) bool { return m[i].Score > m[j].Score } 879 880 880 - type chunkMatchScoreSlice []ChunkMatch 881 + type chunkMatchScoreSlice []zoekt.ChunkMatch 881 882 882 883 func (m chunkMatchScoreSlice) Len() int { return len(m) } 883 884 func (m chunkMatchScoreSlice) Swap(i, j int) { m[i], m[j] = m[j], m[i] } 884 885 func (m chunkMatchScoreSlice) Less(i, j int) bool { return m[i].Score > m[j].Score } 885 886 886 - type fileMatchesByScore []FileMatch 887 + type fileMatchesByScore []zoekt.FileMatch 887 888 888 889 func (m fileMatchesByScore) Len() int { return len(m) } 889 890 func (m fileMatchesByScore) Swap(i, j int) { m[i], m[j] = m[j], m[i] } 890 891 func (m fileMatchesByScore) Less(i, j int) bool { return m[i].Score > m[j].Score } 891 892 892 - func sortMatchesByScore(ms []LineMatch) { 893 + func sortMatchesByScore(ms []zoekt.LineMatch) { 893 894 sort.Sort(matchScoreSlice(ms)) 894 895 } 895 896 896 - func sortChunkMatchesByScore(ms []ChunkMatch) { 897 + func sortChunkMatchesByScore(ms []zoekt.ChunkMatch) { 897 898 sort.Sort(chunkMatchScoreSlice(ms)) 898 899 } 899 900 ··· 904 905 // 905 906 // We don't only use the scores, we will also boost some results to present 906 907 // files with novel extensions. 907 - func SortFiles(ms []FileMatch) { 908 + func SortFiles(ms []zoekt.FileMatch) { 908 909 sort.Sort(fileMatchesByScore(ms)) 909 910 910 911 // Boost a file extension not in the top 3 to the third filematch. 911 912 boostNovelExtension(ms, 2, 0.9) 912 913 } 913 914 914 - func boostNovelExtension(ms []FileMatch, boostOffset int, minScoreRatio float64) { 915 + func boostNovelExtension(ms []zoekt.FileMatch, boostOffset int, minScoreRatio float64) { 915 916 if len(ms) <= boostOffset+1 { 916 917 return 917 918 }

+1 -1

contentprovider_test.go index/contentprovider_test.go

··· 1 - package zoekt 1 + package index 2 2 3 3 import ( 4 4 "bytes"

+29 -40

eval.go index/eval.go

··· 12 12 // See the License for the specific language governing permissions and 13 13 // limitations under the License. 14 14 15 - package zoekt 15 + package index 16 16 17 17 import ( 18 18 "context" ··· 25 25 26 26 enry_data "github.com/go-enry/go-enry/v2/data" 27 27 "github.com/grafana/regexp" 28 - 28 + "github.com/sourcegraph/zoekt" 29 29 "github.com/sourcegraph/zoekt/internal/tenant" 30 30 "github.com/sourcegraph/zoekt/query" 31 31 ) ··· 33 33 // simplifyMultiRepo takes a query and a predicate. It returns Const(true) if all 34 34 // repository names fulfill the predicate, Const(false) if none of them do, and q 35 35 // otherwise. 36 - func (d *indexData) simplifyMultiRepo(q query.Q, predicate func(*Repository) bool) query.Q { 36 + func (d *indexData) simplifyMultiRepo(q query.Q, predicate func(*zoekt.Repository) bool) query.Q { 37 37 count := 0 38 38 alive := len(d.repoMetaData) 39 39 for i := range d.repoMetaData { ··· 56 56 eval := query.Map(in, func(q query.Q) query.Q { 57 57 switch r := q.(type) { 58 58 case *query.Repo: 59 - return d.simplifyMultiRepo(q, func(repo *Repository) bool { 59 + return d.simplifyMultiRepo(q, func(repo *zoekt.Repository) bool { 60 60 return r.Regexp.MatchString(repo.Name) 61 61 }) 62 62 case *query.RepoRegexp: 63 - return d.simplifyMultiRepo(q, func(repo *Repository) bool { 63 + return d.simplifyMultiRepo(q, func(repo *zoekt.Repository) bool { 64 64 return r.Regexp.MatchString(repo.Name) 65 65 }) 66 66 case *query.BranchesRepos: ··· 73 73 } 74 74 return &query.Const{Value: false} 75 75 case *query.RepoSet: 76 - return d.simplifyMultiRepo(q, func(repo *Repository) bool { 76 + return d.simplifyMultiRepo(q, func(repo *zoekt.Repository) bool { 77 77 return r.Set[repo.Name] 78 78 }) 79 79 case query.RawConfig: 80 - return d.simplifyMultiRepo(q, func(repo *Repository) bool { return uint8(r)&encodeRawConfig(repo.RawConfig) == uint8(r) }) 80 + return d.simplifyMultiRepo(q, func(repo *zoekt.Repository) bool { return uint8(r)&encodeRawConfig(repo.RawConfig) == uint8(r) }) 81 81 case *query.RepoIDs: 82 - return d.simplifyMultiRepo(q, func(repo *Repository) bool { 82 + return d.simplifyMultiRepo(q, func(repo *zoekt.Repository) bool { 83 83 return r.Repos.Contains(repo.ID) 84 84 }) 85 85 case *query.Language: ··· 123 123 return query.Simplify(eval) 124 124 } 125 125 126 - func (o *SearchOptions) SetDefaults() { 127 - if o.ShardMaxMatchCount == 0 { 128 - // We cap the total number of matches, so overly broad 129 - // searches don't crash the machine. 130 - o.ShardMaxMatchCount = 100000 131 - } 132 - if o.TotalMaxMatchCount == 0 { 133 - o.TotalMaxMatchCount = 10 * o.ShardMaxMatchCount 134 - } 135 - } 136 - 137 - func (d *indexData) Search(ctx context.Context, q query.Q, opts *SearchOptions) (sr *SearchResult, err error) { 126 + func (d *indexData) Search(ctx context.Context, q query.Q, opts *zoekt.SearchOptions) (sr *zoekt.SearchResult, err error) { 138 127 timer := newTimer() 139 128 140 129 copyOpts := *opts 141 130 opts = &copyOpts 142 131 opts.SetDefaults() 143 132 144 - var res SearchResult 133 + var res zoekt.SearchResult 145 134 if len(d.fileNameIndex) == 0 { 146 135 return &res, nil 147 136 } ··· 203 192 // document frequency per term 204 193 df := make(termDocumentFrequency) 205 194 206 - // term frequency per file match 195 + // term frequency per file index 207 196 var tfs []termFrequency 208 197 209 198 nextFileMatch: ··· 293 282 } 294 283 } 295 284 296 - fileMatch := FileMatch{ 285 + fileMatch := zoekt.FileMatch{ 297 286 Repository: md.Name, 298 287 RepositoryID: md.ID, 299 - RepositoryPriority: md.priority, 288 + RepositoryPriority: md.GetPriority(), 300 289 FileName: string(d.fileName(nextDoc)), 301 290 Checksum: d.getChecksum(nextDoc), 302 291 Language: d.languageMap[d.getLanguage(nextDoc)], ··· 397 386 return &res, nil 398 387 } 399 388 400 - func addRepo(res *SearchResult, repo *Repository) { 389 + func addRepo(res *zoekt.SearchResult, repo *zoekt.Repository) { 401 390 if res.RepoURLs == nil { 402 391 res.RepoURLs = map[string]string{} 403 392 } ··· 414 403 // returned, with filename matches first. 415 404 // 416 405 // If `merge` is set, overlapping and adjacent matches will be merged 417 - // into a single match. Otherwise, overlapping matches will be removed, 406 + // into a single index. Otherwise, overlapping matches will be removed, 418 407 // but adjacent matches will remain. 419 408 func (d *indexData) gatherMatches(nextDoc uint32, mt matchTree, known map[matchTree]bool) []*candidateMatch { 420 409 var cands []*candidateMatch ··· 548 537 return branches 549 538 } 550 539 551 - func (d *indexData) List(ctx context.Context, q query.Q, opts *ListOptions) (rl *RepoList, err error) { 552 - var include func(rle *RepoListEntry) bool 540 + func (d *indexData) List(ctx context.Context, q query.Q, opts *zoekt.ListOptions) (rl *zoekt.RepoList, err error) { 541 + var include func(rle *zoekt.RepoListEntry) bool 553 542 554 543 q = d.simplify(q) 555 544 if c, ok := q.(*query.Const); ok { 556 545 if !c.Value { 557 - return &RepoList{}, nil 546 + return &zoekt.RepoList{}, nil 558 547 } 559 - include = func(rle *RepoListEntry) bool { 548 + include = func(rle *zoekt.RepoListEntry) bool { 560 549 return true 561 550 } 562 551 } else { 563 - sr, err := d.Search(ctx, q, &SearchOptions{ 552 + sr, err := d.Search(ctx, q, &zoekt.SearchOptions{ 564 553 ShardRepoMaxMatchCount: 1, 565 554 }) 566 555 if err != nil { ··· 572 561 foundRepos[file.Repository] = struct{}{} 573 562 } 574 563 575 - include = func(rle *RepoListEntry) bool { 564 + include = func(rle *zoekt.RepoListEntry) bool { 576 565 _, ok := foundRepos[rle.Repository.Name] 577 566 return ok 578 567 } 579 568 } 580 569 581 - var l RepoList 570 + var l zoekt.RepoList 582 571 583 572 field, err := opts.GetField() 584 573 if err != nil { 585 574 return nil, err 586 575 } 587 576 switch field { 588 - case RepoListFieldRepos: 589 - l.Repos = make([]*RepoListEntry, 0, len(d.repoListEntry)) 590 - case RepoListFieldReposMap: 591 - l.ReposMap = make(ReposMap, len(d.repoListEntry)) 577 + case zoekt.RepoListFieldRepos: 578 + l.Repos = make([]*zoekt.RepoListEntry, 0, len(d.repoListEntry)) 579 + case zoekt.RepoListFieldReposMap: 580 + l.ReposMap = make(zoekt.ReposMap, len(d.repoListEntry)) 592 581 } 593 582 594 583 for i := range d.repoListEntry { ··· 614 603 } 615 604 616 605 switch field { 617 - case RepoListFieldRepos: 606 + case zoekt.RepoListFieldRepos: 618 607 l.Repos = append(l.Repos, rle) 619 - case RepoListFieldReposMap: 620 - l.ReposMap[rle.Repository.ID] = MinimalRepoListEntry{ 608 + case zoekt.RepoListFieldReposMap: 609 + l.ReposMap[rle.Repository.ID] = zoekt.MinimalRepoListEntry{ 621 610 HasSymbols: rle.Repository.HasSymbols, 622 611 Branches: rle.Repository.Branches, 623 612 IndexTimeUnix: rle.IndexMetadata.IndexTime.Unix(),

+10 -10

eval_test.go index/eval_test.go

··· 12 12 // See the License for the specific language governing permissions and 13 13 // limitations under the License. 14 14 15 - package zoekt 15 + package index 16 16 17 17 import ( 18 18 "context" ··· 26 26 "github.com/google/go-cmp/cmp" 27 27 "github.com/google/go-cmp/cmp/cmpopts" 28 28 "github.com/grafana/regexp" 29 - 29 + "github.com/sourcegraph/zoekt" 30 30 "github.com/sourcegraph/zoekt/query" 31 31 ) 32 32 ··· 154 154 155 155 ctx := context.Background() 156 156 q := &query.Const{Value: true} 157 - opts := &SearchOptions{ShardRepoMaxMatchCount: 1} 157 + opts := &zoekt.SearchOptions{ShardRepoMaxMatchCount: 1} 158 158 159 159 sr, err := cs.Search(ctx, q, opts) 160 160 if err != nil { ··· 174 174 }) 175 175 176 176 t.Run("stats", func(t *testing.T) { 177 - got, want := sr.Stats, Stats{ 177 + got, want := sr.Stats, zoekt.Stats{ 178 178 ContentBytesLoaded: 0, 179 179 FileCount: 2, 180 180 FilesConsidered: 2, ··· 182 182 ShardsScanned: 1, 183 183 MatchCount: 2, 184 184 } 185 - if diff := cmp.Diff(want, got, cmpopts.IgnoreFields(Stats{}, "MatchTreeConstruction", "MatchTreeSearch")); diff != "" { 185 + if diff := cmp.Diff(want, got, cmpopts.IgnoreFields(zoekt.Stats{}, "MatchTreeConstruction", "MatchTreeSearch")); diff != "" { 186 186 t.Errorf("mismatch (-want, +got): %s", diff) 187 187 } 188 188 }) ··· 191 191 func compoundReposShard(t *testing.T, names ...string) *indexData { 192 192 t.Helper() 193 193 194 - repos := make([]*Repository, 0, len(names)) 194 + repos := make([]*zoekt.Repository, 0, len(names)) 195 195 docs := make([][]Document, 0, len(names)) 196 196 for _, name := range names { 197 - repos = append(repos, &Repository{ID: hash(name), Name: name}) 197 + repos = append(repos, &zoekt.Repository{ID: hash(name), Name: name}) 198 198 ds := []Document{ 199 199 {Name: name + ".txt", Content: []byte(name + " content")}, 200 200 {Name: name + ".2.txt", Content: []byte(name + " content 2")}, ··· 375 375 376 376 func TestGatherBranches(t *testing.T) { 377 377 content := []byte("dummy") 378 - b := testIndexBuilder(t, &Repository{ 379 - Branches: []RepositoryBranch{ 378 + b := testIndexBuilder(t, &zoekt.Repository{ 379 + Branches: []zoekt.RepositoryBranch{ 380 380 {"foo", "v1"}, 381 381 {"foo-2", "v1"}, 382 382 {"main", "v1"}, ··· 396 396 &query.Branch{Pattern: "foo"}, 397 397 &query.Branch{Pattern: "quz"}, 398 398 }}, 399 - &SearchOptions{}, 399 + &zoekt.SearchOptions{}, 400 400 ) 401 401 if err != nil { 402 402 t.Fatal(err)

+8 -6

hititer.go index/hititer.go

··· 12 12 // See the License for the specific language governing permissions and 13 13 // limitations under the License. 14 14 15 - package zoekt 15 + package index 16 16 17 17 import ( 18 18 "encoding/binary" 19 19 "fmt" 20 + 21 + "github.com/sourcegraph/zoekt" 20 22 ) 21 23 22 24 // hitIterator finds potential search matches, measured in offsets of ··· 30 32 next(limit uint32) 31 33 32 34 // Return how many bytes were read. 33 - updateStats(s *Stats) 35 + updateStats(s *zoekt.Stats) 34 36 } 35 37 36 38 // distanceHitIterator looks for hits at a fixed distance apart. ··· 73 75 return i.i1.first() 74 76 } 75 77 76 - func (i *distanceHitIterator) updateStats(s *Stats) { 78 + func (i *distanceHitIterator) updateStats(s *zoekt.Stats) { 77 79 i.i1.updateStats(s) 78 80 i.i2.updateStats(s) 79 81 } ··· 159 161 return maxUInt32 160 162 } 161 163 162 - func (i *inMemoryIterator) updateStats(s *Stats) { 164 + func (i *inMemoryIterator) updateStats(s *zoekt.Stats) { 163 165 } 164 166 165 167 func (i *inMemoryIterator) next(limit uint32) { ··· 219 221 } 220 222 } 221 223 222 - func (i *compressedPostingIterator) updateStats(s *Stats) { 224 + func (i *compressedPostingIterator) updateStats(s *zoekt.Stats) { 223 225 s.IndexBytesLoaded += int64(i.indexBytesLoaded) 224 226 s.NgramLookups += i.ngramLookups 225 227 i.indexBytesLoaded = 0 ··· 237 239 return fmt.Sprintf("merge:%v", i.iters) 238 240 } 239 241 240 - func (i *mergingIterator) updateStats(s *Stats) { 242 + func (i *mergingIterator) updateStats(s *zoekt.Stats) { 241 243 s.NgramLookups += i.ngramLookups 242 244 i.ngramLookups = 0 243 245 for _, j := range i.iters {

+3 -2

hititer_test.go index/hititer_test.go

··· 12 12 // See the License for the specific language governing permissions and 13 13 // limitations under the License. 14 14 15 - package zoekt 15 + package index 16 16 17 17 import ( 18 18 "fmt" ··· 23 23 "testing/quick" 24 24 25 25 "github.com/google/go-cmp/cmp" 26 + "github.com/sourcegraph/zoekt" 26 27 ) 27 28 28 29 func TestCompressedPostingIterator_limit(t *testing.T) { ··· 94 95 it.next(limit) 95 96 _ = it.first() 96 97 } 97 - var s Stats 98 + var s zoekt.Stats 98 99 it.updateStats(&s) 99 100 b.SetBytes(s.IndexBytesLoaded) 100 101 }

+40

index/matchiter_test.go

··· 1 + // Licensed under the Apache License, Version 2.0 (the "License"); 2 + // you may not use this file except in compliance with the License. 3 + // You may obtain a copy of the License at 4 + // 5 + // http://www.apache.org/licenses/LICENSE-2.0 6 + // 7 + // Unless required by applicable law or agreed to in writing, software 8 + // distributed under the License is distributed on an "AS IS" BASIS, 9 + // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 10 + // See the License for the specific language governing permissions and 11 + // limitations under the License. 12 + 13 + package index 14 + 15 + import ( 16 + "reflect" 17 + "testing" 18 + ) 19 + 20 + func TestMatchSize(t *testing.T) { 21 + cases := []struct { 22 + v any 23 + size int 24 + }{{ 25 + v: candidateMatch{}, 26 + size: 80, 27 + }, { 28 + v: candidateChunk{}, 29 + size: 40, 30 + }} 31 + for _, c := range cases { 32 + got := reflect.TypeOf(c.v).Size() 33 + if int(got) != c.size { 34 + t.Errorf(`sizeof struct %T has changed from %d to %d. 35 + These are match structs that occur a lot in memory, so we optimize size. 36 + When changing, please ensure there isn't unnecessary padding via the 37 + tool fieldalignment then update this test.`, c.v, c.size, got) 38 + } 39 + } 40 + }

+178 -178

index_test.go index/index_test.go

··· 12 12 // See the License for the specific language governing permissions and 13 13 // limitations under the License. 14 14 15 - package zoekt 15 + package index 16 16 17 17 import ( 18 18 "bytes" ··· 26 26 "github.com/google/go-cmp/cmp" 27 27 "github.com/google/go-cmp/cmp/cmpopts" 28 28 "github.com/grafana/regexp" 29 - 29 + "github.com/sourcegraph/zoekt" 30 30 "github.com/sourcegraph/zoekt/query" 31 31 ) 32 32 33 - func clearScores(r *SearchResult) { 33 + func clearScores(r *zoekt.SearchResult) { 34 34 for i := range r.Files { 35 35 r.Files[i].Score = 0.0 36 36 for j := range r.Files[i].LineMatches { ··· 45 45 } 46 46 } 47 47 48 - func testIndexBuilder(tb testing.TB, repo *Repository, docs ...Document) *IndexBuilder { 48 + func testIndexBuilder(tb testing.TB, repo *zoekt.Repository, docs ...Document) *IndexBuilder { 49 49 tb.Helper() 50 50 51 51 b, err := NewIndexBuilder(repo) ··· 62 62 return b 63 63 } 64 64 65 - func testIndexBuilderCompound(t *testing.T, repos []*Repository, docs [][]Document) *IndexBuilder { 65 + func testIndexBuilderCompound(t *testing.T, repos []*zoekt.Repository, docs [][]Document) *IndexBuilder { 66 66 t.Helper() 67 67 68 68 b := newIndexBuilder() ··· 169 169 b := testIndexBuilder(t, nil) 170 170 searcher := searcherForTest(t, b) 171 171 172 - var opts SearchOptions 172 + var opts zoekt.SearchOptions 173 173 if _, err := searcher.Search(context.Background(), &query.Substring{}, &opts); err != nil { 174 174 t.Fatalf("Search: %v", err) 175 175 } ··· 209 209 sres := searchForTest(t, b, &query.Substring{Pattern: "ne2"}) 210 210 211 211 matches := sres.Files 212 - want := []FileMatch{{ 212 + want := []zoekt.FileMatch{{ 213 213 FileName: "filename", 214 - LineMatches: []LineMatch{{ 215 - LineFragments: []LineFragmentMatch{{ 214 + LineMatches: []zoekt.LineMatch{{ 215 + LineFragments: []zoekt.LineFragmentMatch{{ 216 216 Offset: 8, 217 217 LineOffset: 2, 218 218 MatchLength: 3, ··· 233 233 sres := searchForTest(t, b, &query.Substring{Pattern: "ne2"}, chunkOpts) 234 234 235 235 matches := sres.Files 236 - want := []FileMatch{{ 236 + want := []zoekt.FileMatch{{ 237 237 FileName: "filename", 238 - ChunkMatches: []ChunkMatch{{ 238 + ChunkMatches: []zoekt.ChunkMatch{{ 239 239 Content: []byte("line2\n"), 240 - ContentStart: Location{ 240 + ContentStart: zoekt.Location{ 241 241 ByteOffset: 6, 242 242 LineNumber: 2, 243 243 Column: 1, 244 244 }, 245 - Ranges: []Range{{ 246 - Start: Location{ByteOffset: 8, LineNumber: 2, Column: 3}, 247 - End: Location{ByteOffset: 11, LineNumber: 2, Column: 6}, 245 + Ranges: []zoekt.Range{{ 246 + Start: zoekt.Location{ByteOffset: 8, LineNumber: 2, Column: 3}, 247 + End: zoekt.Location{ByteOffset: 11, LineNumber: 2, Column: 6}, 248 248 }}, 249 249 }}, 250 250 }} ··· 287 287 }) 288 288 } 289 289 290 - var chunkOpts = SearchOptions{ChunkMatches: true} 290 + var chunkOpts = zoekt.SearchOptions{ChunkMatches: true} 291 291 292 - func searchForTest(t *testing.T, b *IndexBuilder, q query.Q, o ...SearchOptions) *SearchResult { 292 + func searchForTest(t *testing.T, b *IndexBuilder, q query.Q, o ...zoekt.SearchOptions) *zoekt.SearchResult { 293 293 searcher := searcherForTest(t, b) 294 - var opts SearchOptions 294 + var opts zoekt.SearchOptions 295 295 if len(o) > 0 { 296 296 opts = o[0] 297 297 } ··· 303 303 return res 304 304 } 305 305 306 - func searcherForTest(t testing.TB, b *IndexBuilder) Searcher { 306 + func searcherForTest(t testing.TB, b *IndexBuilder) zoekt.Searcher { 307 307 var buf bytes.Buffer 308 308 if err := b.Write(&buf); err != nil { 309 309 t.Fatal(err) ··· 375 375 func wordsAsSymbols(doc Document) Document { 376 376 re := regexp.MustCompile(`\b\w{2,}\b`) 377 377 var symbols []DocumentSection 378 - var symbolsMetadata []*Symbol 378 + var symbolsMetadata []*zoekt.Symbol 379 379 for _, match := range re.FindAllIndex(doc.Content, -1) { 380 380 symbols = append(symbols, DocumentSection{ 381 381 Start: uint32(match[0]), 382 382 End: uint32(match[1]), 383 383 }) 384 - symbolsMetadata = append(symbolsMetadata, &Symbol{Kind: "method"}) 384 + symbolsMetadata = append(symbolsMetadata, &zoekt.Symbol{Kind: "method"}) 385 385 } 386 386 doc.Symbols = symbols 387 387 doc.SymbolsMetaData = symbolsMetadata ··· 407 407 ) 408 408 409 409 t.Run("LineMatches", func(t *testing.T) { 410 - sres, err := searcher.Search(ctx, andQuery, &SearchOptions{}) 410 + sres, err := searcher.Search(ctx, andQuery, &zoekt.SearchOptions{}) 411 411 if err != nil { 412 412 t.Fatal(err) 413 413 } ··· 438 438 cases := []struct { 439 439 Name string 440 440 Q query.Q 441 - Want Stats 441 + Want zoekt.Stats 442 442 }{{ 443 443 Name: "and-query", 444 444 Q: andQuery, 445 - Want: Stats{ 445 + Want: zoekt.Stats{ 446 446 FilesLoaded: 1, 447 447 ContentBytesLoaded: 22, 448 448 IndexBytesLoaded: 10, ··· 460 460 Content: true, 461 461 CaseSensitive: true, 462 462 }, 463 - Want: Stats{ 463 + Want: zoekt.Stats{ 464 464 ContentBytesLoaded: 14, 465 465 IndexBytesLoaded: 1, 466 466 FileCount: 1, ··· 477 477 Pattern: "a y", 478 478 Content: true, 479 479 }, 480 - Want: Stats{ 480 + Want: zoekt.Stats{ 481 481 ContentBytesLoaded: 14, 482 482 IndexBytesLoaded: 1, 483 483 FileCount: 1, ··· 495 495 Content: true, 496 496 CaseSensitive: true, 497 497 }, 498 - Want: Stats{ 498 + Want: zoekt.Stats{ 499 499 ShardsSkippedFilter: 1, 500 500 NgramLookups: 1, // only had to lookup once 501 501 }, ··· 513 513 CaseSensitive: true, 514 514 }, 515 515 ), 516 - Want: Stats{ 516 + Want: zoekt.Stats{ 517 517 IndexBytesLoaded: 1, // we created an iterator for "a y" before pruning. 518 518 ShardsSkippedFilter: 1, 519 519 NgramLookups: 3, // we lookedup "foo" once (1), but lookedup and created "a y" (2). ··· 525 525 Content: true, 526 526 CaseSensitive: true, 527 527 }}, 528 - Want: Stats{ 528 + Want: zoekt.Stats{ 529 529 IndexBytesLoaded: 3, 530 530 FilesConsidered: 1, // important that we only check 1 file to ensure we are using the index 531 531 MatchCount: 0, // even though there is a match it doesn't align with a symbol ··· 540 540 Content: true, 541 541 CaseSensitive: true, 542 542 }}, 543 - Want: Stats{ 543 + Want: zoekt.Stats{ 544 544 ContentBytesLoaded: 35, 545 545 IndexBytesLoaded: 4, 546 546 FileCount: 2, ··· 558 558 Content: true, 559 559 CaseSensitive: true, 560 560 }}, 561 - Want: Stats{ 561 + Want: zoekt.Stats{ 562 562 ContentBytesLoaded: 33, // we still have to run regex since "app" matches two documents 563 563 IndexBytesLoaded: 10, 564 564 FilesConsidered: 2, // important that we don't check 3 to ensure we are using the index ··· 575 575 Content: true, 576 576 CaseSensitive: true, 577 577 }}, 578 - Want: Stats{ 578 + Want: zoekt.Stats{ 579 579 ContentBytesLoaded: 35, 580 580 IndexBytesLoaded: 2, 581 581 FileCount: 2, ··· 594 594 if err != nil { 595 595 t.Fatal(err) 596 596 } 597 - if diff := cmp.Diff(tc.Want, sres.Stats, cmpopts.IgnoreFields(Stats{}, "MatchTreeConstruction", "MatchTreeSearch")); diff != "" { 597 + if diff := cmp.Diff(tc.Want, sres.Stats, cmpopts.IgnoreFields(zoekt.Stats{}, "MatchTreeConstruction", "MatchTreeSearch")); diff != "" { 598 598 t.Errorf("unexpected Stats (-want +got):\n%s", diff) 599 599 } 600 600 }) ··· 713 713 } 714 714 715 715 got := matches[0].LineMatches[0] 716 - want := LineMatch{ 716 + want := zoekt.LineMatch{ 717 717 Line: []byte("banana"), 718 - LineFragments: []LineFragmentMatch{{ 718 + LineFragments: []zoekt.LineFragmentMatch{{ 719 719 Offset: 1, 720 720 LineOffset: 1, 721 721 MatchLength: 4, ··· 740 740 } 741 741 742 742 got := matches[0].ChunkMatches[0] 743 - want := ChunkMatch{ 743 + want := zoekt.ChunkMatch{ 744 744 Content: []byte("banana"), 745 - ContentStart: Location{ByteOffset: 0, LineNumber: 1, Column: 1}, 746 - Ranges: []Range{{ 747 - Start: Location{ByteOffset: 1, LineNumber: 1, Column: 2}, 748 - End: Location{ByteOffset: 5, LineNumber: 1, Column: 6}, 745 + ContentStart: zoekt.Location{ByteOffset: 0, LineNumber: 1, Column: 1}, 746 + Ranges: []zoekt.Range{{ 747 + Start: zoekt.Location{ByteOffset: 1, LineNumber: 1, Column: 2}, 748 + End: zoekt.Location{ByteOffset: 5, LineNumber: 1, Column: 6}, 749 749 }}, 750 750 FileName: true, 751 751 } ··· 764 764 } 765 765 766 766 got := matches[0].ChunkMatches[0] 767 - want := ChunkMatch{ 767 + want := zoekt.ChunkMatch{ 768 768 Content: []byte("banana"), 769 - ContentStart: Location{ByteOffset: 0, LineNumber: 1, Column: 1}, 770 - Ranges: []Range{{ 771 - Start: Location{ByteOffset: 0, LineNumber: 1, Column: 1}, 772 - End: Location{ByteOffset: 6, LineNumber: 1, Column: 7}, 769 + ContentStart: zoekt.Location{ByteOffset: 0, LineNumber: 1, Column: 1}, 770 + Ranges: []zoekt.Range{{ 771 + Start: zoekt.Location{ByteOffset: 0, LineNumber: 1, Column: 1}, 772 + End: zoekt.Location{ByteOffset: 6, LineNumber: 1, Column: 7}, 773 773 }}, 774 774 FileName: true, 775 775 } ··· 1005 1005 1006 1006 t.Run("LineMatches", func(t *testing.T) { 1007 1007 q := &query.Substring{Pattern: "two"} 1008 - sres, err := searcher.Search(ctx, q, &SearchOptions{UseBM25Scoring: true}) 1008 + sres, err := searcher.Search(ctx, q, &zoekt.SearchOptions{UseBM25Scoring: true}) 1009 1009 if err != nil { 1010 1010 t.Fatal(err) 1011 1011 } 1012 1012 matches := sres.Files 1013 1013 if len(matches) != 1 { 1014 - t.Fatalf("want 1 file match, got %d", len(matches)) 1014 + t.Fatalf("want 1 file index, got %d", len(matches)) 1015 1015 } 1016 1016 1017 1017 if len(matches[0].LineMatches) != 2 { ··· 1025 1025 1026 1026 t.Run("ChunkMatches", func(t *testing.T) { 1027 1027 q := &query.Substring{Pattern: "five"} 1028 - sres, err := searcher.Search(ctx, q, &SearchOptions{UseBM25Scoring: true, ChunkMatches: true, NumContextLines: 1}) 1028 + sres, err := searcher.Search(ctx, q, &zoekt.SearchOptions{UseBM25Scoring: true, ChunkMatches: true, NumContextLines: 1}) 1029 1029 if err != nil { 1030 1030 t.Fatal(err) 1031 1031 } 1032 1032 1033 1033 matches := sres.Files 1034 1034 if len(matches) != 1 { 1035 - t.Fatalf("want 1 file match, got %d", len(matches)) 1035 + t.Fatalf("want 1 file index, got %d", len(matches)) 1036 1036 } 1037 1037 1038 1038 if len(matches[0].ChunkMatches) != 2 { ··· 1052 1052 }, 1053 1053 } 1054 1054 1055 - sres, err := searcher.Search(ctx, q, &SearchOptions{UseBM25Scoring: true, ChunkMatches: true, NumContextLines: 1}) 1055 + sres, err := searcher.Search(ctx, q, &zoekt.SearchOptions{UseBM25Scoring: true, ChunkMatches: true, NumContextLines: 1}) 1056 1056 if err != nil { 1057 1057 t.Fatal(err) 1058 1058 } 1059 1059 1060 1060 matches := sres.Files 1061 1061 if len(matches) != 2 { 1062 - t.Fatalf("want 2 file match, got %d", len(matches)) 1062 + t.Fatalf("want 2 file index, got %d", len(matches)) 1063 1063 } 1064 1064 1065 1065 foundSymbolInfo := false ··· 1205 1205 } 1206 1206 1207 1207 func TestBranchMask(t *testing.T) { 1208 - b := testIndexBuilder(t, &Repository{ 1209 - Branches: []RepositoryBranch{ 1208 + b := testIndexBuilder(t, &zoekt.Repository{ 1209 + Branches: []zoekt.RepositoryBranch{ 1210 1210 {"master", "v-master"}, 1211 1211 {"stable", "v-stable"}, 1212 1212 {"bonzai", "v-bonzai"}, ··· 1258 1258 1259 1259 func TestBranchLimit(t *testing.T) { 1260 1260 for limit := 64; limit <= 65; limit++ { 1261 - r := &Repository{} 1261 + r := &zoekt.Repository{} 1262 1262 for i := 0; i < limit; i++ { 1263 1263 s := fmt.Sprintf("b%d", i) 1264 - r.Branches = append(r.Branches, RepositoryBranch{ 1264 + r.Branches = append(r.Branches, zoekt.RepositoryBranch{ 1265 1265 s, "v-" + s, 1266 1266 }) 1267 1267 } ··· 1276 1276 1277 1277 func TestBranchReport(t *testing.T) { 1278 1278 branches := []string{"stable", "master"} 1279 - b := testIndexBuilder(t, &Repository{ 1280 - Branches: []RepositoryBranch{ 1279 + b := testIndexBuilder(t, &zoekt.Repository{ 1280 + Branches: []zoekt.RepositoryBranch{ 1281 1281 {"stable", "vs"}, 1282 1282 {"master", "vm"}, 1283 1283 }, ··· 1314 1314 } 1315 1315 1316 1316 func TestBranchVersions(t *testing.T) { 1317 - b := testIndexBuilder(t, &Repository{ 1318 - Branches: []RepositoryBranch{ 1317 + b := testIndexBuilder(t, &zoekt.Repository{ 1318 + Branches: []zoekt.RepositoryBranch{ 1319 1319 {"stable", "v-stable"}, 1320 1320 {"master", "v-master"}, 1321 1321 }, ··· 1380 1380 } 1381 1381 1382 1382 got := sres.Files[0].LineMatches[0] 1383 - want := LineMatch{ 1384 - LineFragments: []LineFragmentMatch{{ 1383 + want := zoekt.LineMatch{ 1384 + LineFragments: []zoekt.LineFragmentMatch{{ 1385 1385 LineOffset: 3, 1386 1386 Offset: 3, 1387 1387 MatchLength: 11, ··· 1409 1409 } 1410 1410 1411 1411 got := sres.Files[0].ChunkMatches[0] 1412 - want := ChunkMatch{ 1412 + want := zoekt.ChunkMatch{ 1413 1413 Content: content, 1414 - ContentStart: Location{ByteOffset: 0, LineNumber: 1, Column: 1}, 1415 - Ranges: []Range{{ 1416 - Start: Location{ByteOffset: 3, LineNumber: 1, Column: 4}, 1417 - End: Location{ByteOffset: 14, LineNumber: 1, Column: 15}, 1414 + ContentStart: zoekt.Location{ByteOffset: 0, LineNumber: 1, Column: 1}, 1415 + Ranges: []zoekt.Range{{ 1416 + Start: zoekt.Location{ByteOffset: 3, LineNumber: 1, Column: 4}, 1417 + End: zoekt.Location{ByteOffset: 14, LineNumber: 1, Column: 15}, 1418 1418 }}, 1419 1419 } 1420 1420 ··· 1499 1499 content := []byte("bla the needle") 1500 1500 // ----------------01234567890123 1501 1501 1502 - b := testIndexBuilder(t, &Repository{Name: "bla"}, 1502 + b := testIndexBuilder(t, &zoekt.Repository{Name: "bla"}, 1503 1503 Document{Name: "f1", Content: content}) 1504 1504 1505 1505 t.Run("LineMatches", func(t *testing.T) { ··· 1629 1629 1630 1630 func TestRepoURL(t *testing.T) { 1631 1631 content := []byte("blablabla") 1632 - b := testIndexBuilder(t, &Repository{ 1632 + b := testIndexBuilder(t, &zoekt.Repository{ 1633 1633 Name: "name", 1634 1634 URL: "URL", 1635 1635 CommitURLTemplate: "commit", ··· 1662 1662 }) 1663 1663 1664 1664 if len(res.Files) != 1 { 1665 - t.Fatalf("got %v, want one match", res.Files) 1665 + t.Fatalf("got %v, want one index", res.Files) 1666 1666 } 1667 1667 }) 1668 1668 ··· 1676 1676 ) 1677 1677 1678 1678 if len(res.Files) != 1 { 1679 - t.Fatalf("got %v, want one match", res.Files) 1679 + t.Fatalf("got %v, want one index", res.Files) 1680 1680 } 1681 1681 }) 1682 1682 } ··· 1693 1693 }) 1694 1694 1695 1695 if len(res.Files) != 1 { 1696 - t.Fatalf("got %v, want one match", res.Files) 1696 + t.Fatalf("got %v, want one index", res.Files) 1697 1697 } 1698 1698 } 1699 1699 ··· 1800 1800 t.Fatalf("got %d files, want 3 files. Full data: %v", len(res.Files), res.Files) 1801 1801 } 1802 1802 if res.Files[0].FileName != "f2" { 1803 - t.Errorf("got %#v, want 'f2' as top match", res.Files[0]) 1803 + t.Errorf("got %#v, want 'f2' as top index", res.Files[0]) 1804 1804 } 1805 1805 }) 1806 1806 ··· 1815 1815 t.Fatalf("got %d files, want 3 files. Full data: %v", len(res.Files), res.Files) 1816 1816 } 1817 1817 if res.Files[0].FileName != "f2" { 1818 - t.Errorf("got %#v, want 'f2' as top match", res.Files[0]) 1818 + t.Errorf("got %#v, want 'f2' as top index", res.Files[0]) 1819 1819 } 1820 1820 }) 1821 1821 } ··· 1850 1850 t.Fatalf("got %#v, want 3 files", res.Files) 1851 1851 } 1852 1852 if res.Files[0].FileName != "f2" { 1853 - t.Errorf("got %#v, want 'f2' as top match", res.Files[0]) 1853 + t.Errorf("got %#v, want 'f2' as top index", res.Files[0]) 1854 1854 } 1855 1855 }) 1856 1856 ··· 1864 1864 t.Fatalf("got %#v, want 3 files", res.Files) 1865 1865 } 1866 1866 if res.Files[0].FileName != "f2" { 1867 - t.Errorf("got %#v, want 'f2' as top match", res.Files[0]) 1867 + t.Errorf("got %#v, want 'f2' as top index", res.Files[0]) 1868 1868 } 1869 1869 }) 1870 1870 } ··· 1900 1900 t.Fatalf("got %#v, want 3 files", res.Files) 1901 1901 } 1902 1902 if res.Files[0].FileName != "f2" { 1903 - t.Errorf("got %#v, want 'f2' as top match", res.Files[0]) 1903 + t.Errorf("got %#v, want 'f2' as top index", res.Files[0]) 1904 1904 } 1905 1905 }) 1906 1906 ··· 1914 1914 t.Fatalf("got %#v, want 3 files", res.Files) 1915 1915 } 1916 1916 if res.Files[0].FileName != "f2" { 1917 - t.Errorf("got %#v, want 'f2' as top match", res.Files[0]) 1917 + t.Errorf("got %#v, want 'f2' as top index", res.Files[0]) 1918 1918 } 1919 1919 }) 1920 1920 } ··· 1922 1922 func TestNegativeRepo(t *testing.T) { 1923 1923 content := []byte("bla the needle") 1924 1924 // ----------------01234567890123 1925 - b := testIndexBuilder(t, &Repository{ 1925 + b := testIndexBuilder(t, &zoekt.Repository{ 1926 1926 Name: "bla", 1927 1927 }, Document{Name: "f1", Content: content}) 1928 1928 ··· 1956 1956 // ----------------012345678901234- 1957 1957 1958 1958 t.Run("default and minimal fallback", func(t *testing.T) { 1959 - repo := &Repository{ 1959 + repo := &zoekt.Repository{ 1960 1960 Name: "reponame", 1961 - Branches: []RepositoryBranch{{Name: "main"}, {Name: "dev"}}, 1961 + Branches: []zoekt.RepositoryBranch{{Name: "main"}, {Name: "dev"}}, 1962 1962 } 1963 1963 b := testIndexBuilder(t, repo, 1964 1964 Document{Name: "f1", Content: content, Branches: []string{"main", "dev"}}, ··· 1968 1968 1969 1969 searcher := searcherForTest(t, b) 1970 1970 1971 - for _, opts := range []*ListOptions{ 1971 + for _, opts := range []*zoekt.ListOptions{ 1972 1972 nil, 1973 1973 {}, 1974 - {Field: RepoListFieldRepos}, 1975 - {Field: RepoListFieldReposMap}, 1974 + {Field: zoekt.RepoListFieldRepos}, 1975 + {Field: zoekt.RepoListFieldReposMap}, 1976 1976 } { 1977 1977 t.Run(fmt.Sprint(opts), func(t *testing.T) { 1978 1978 q := &query.Repo{Regexp: regexp.MustCompile("epo")} ··· 1982 1982 t.Fatalf("List(%v): %v", q, err) 1983 1983 } 1984 1984 1985 - want := &RepoList{ 1986 - Repos: []*RepoListEntry{{ 1985 + want := &zoekt.RepoList{ 1986 + Repos: []*zoekt.RepoListEntry{{ 1987 1987 Repository: *repo, 1988 - Stats: RepoStats{ 1988 + Stats: zoekt.RepoStats{ 1989 1989 Documents: 4, 1990 1990 ContentBytes: 68, // (15 bytes of content and 2 bytes of filename) x 4 1991 1991 Shards: 1, ··· 1995 1995 OtherBranchesNewLinesCount: 3, 1996 1996 }, 1997 1997 }}, 1998 - Stats: RepoStats{ 1998 + Stats: zoekt.RepoStats{ 1999 1999 Repos: 1, 2000 2000 Documents: 4, 2001 2001 ContentBytes: 68, ··· 2008 2008 } 2009 2009 ignored := []cmp.Option{ 2010 2010 cmpopts.EquateEmpty(), 2011 - cmpopts.IgnoreFields(RepoListEntry{}, "IndexMetadata"), 2012 - cmpopts.IgnoreFields(RepoStats{}, "IndexBytes"), 2013 - cmpopts.IgnoreFields(Repository{}, "SubRepoMap"), 2014 - cmpopts.IgnoreFields(Repository{}, "priority"), 2011 + cmpopts.IgnoreFields(zoekt.RepoListEntry{}, "IndexMetadata"), 2012 + cmpopts.IgnoreFields(zoekt.RepoStats{}, "IndexBytes"), 2013 + cmpopts.IgnoreFields(zoekt.Repository{}, "SubRepoMap"), 2014 + cmpopts.IgnoreFields(zoekt.Repository{}, "priority"), 2015 2015 } 2016 2016 if diff := cmp.Diff(want, res, ignored...); diff != "" { 2017 2017 t.Fatalf("mismatch (-want +got):\n%s", diff) ··· 2030 2030 }) 2031 2031 2032 2032 t.Run("minimal", func(t *testing.T) { 2033 - repo := &Repository{ 2033 + repo := &zoekt.Repository{ 2034 2034 ID: 1234, 2035 2035 Name: "reponame", 2036 - Branches: []RepositoryBranch{{Name: "main"}, {Name: "dev"}}, 2036 + Branches: []zoekt.RepositoryBranch{{Name: "main"}, {Name: "dev"}}, 2037 2037 RawConfig: map[string]string{"repoid": "1234"}, 2038 2038 } 2039 2039 b := testIndexBuilder(t, repo, ··· 2045 2045 searcher := searcherForTest(t, b) 2046 2046 2047 2047 q := &query.Repo{Regexp: regexp.MustCompile("epo")} 2048 - res, err := searcher.List(context.Background(), q, &ListOptions{Field: RepoListFieldReposMap}) 2048 + res, err := searcher.List(context.Background(), q, &zoekt.ListOptions{Field: zoekt.RepoListFieldReposMap}) 2049 2049 if err != nil { 2050 2050 t.Fatalf("List(%v): %v", q, err) 2051 2051 } 2052 2052 2053 - want := &RepoList{ 2054 - ReposMap: ReposMap{ 2053 + want := &zoekt.RepoList{ 2054 + ReposMap: zoekt.ReposMap{ 2055 2055 repo.ID: { 2056 2056 HasSymbols: repo.HasSymbols, 2057 2057 Branches: repo.Branches, 2058 2058 }, 2059 2059 }, 2060 - Stats: RepoStats{ 2060 + Stats: zoekt.RepoStats{ 2061 2061 Repos: 1, 2062 2062 Shards: 1, 2063 2063 Documents: 4, ··· 2070 2070 } 2071 2071 2072 2072 ignored := []cmp.Option{ 2073 - cmpopts.IgnoreFields(MinimalRepoListEntry{}, "IndexTimeUnix"), 2073 + cmpopts.IgnoreFields(zoekt.MinimalRepoListEntry{}, "IndexTimeUnix"), 2074 2074 } 2075 2075 if diff := cmp.Diff(want, res, ignored...); diff != "" { 2076 2076 t.Fatalf("mismatch (-want +got):\n%s", diff) 2077 2077 } 2078 2078 2079 2079 q = &query.Repo{Regexp: regexp.MustCompile("bla")} 2080 - res, err = searcher.List(context.Background(), q, &ListOptions{Field: RepoListFieldReposMap}) 2080 + res, err = searcher.List(context.Background(), q, &zoekt.ListOptions{Field: zoekt.RepoListFieldReposMap}) 2081 2081 if err != nil { 2082 2082 t.Fatalf("List(%v): %v", q, err) 2083 2083 } ··· 2090 2090 func TestListReposByContent(t *testing.T) { 2091 2091 content := []byte("bla the needle") 2092 2092 2093 - b := testIndexBuilder(t, &Repository{ 2093 + b := testIndexBuilder(t, &zoekt.Repository{ 2094 2094 Name: "reponame", 2095 2095 }, 2096 2096 Document{Name: "f1", Content: content}, ··· 2121 2121 func TestMetadata(t *testing.T) { 2122 2122 content := []byte("bla the needle") 2123 2123 2124 - b := testIndexBuilder(t, &Repository{ 2124 + b := testIndexBuilder(t, &zoekt.Repository{ 2125 2125 Name: "reponame", 2126 2126 }, Document{Name: "f1", Content: content}, 2127 2127 Document{Name: "f2", Content: content}) ··· 2225 2225 } 2226 2226 2227 2227 func TestSubRepo(t *testing.T) { 2228 - subRepos := map[string]*Repository{ 2228 + subRepos := map[string]*zoekt.Repository{ 2229 2229 "sub": { 2230 2230 Name: "sub-name", 2231 2231 LineFragmentTemplate: "sub-line", ··· 2234 2234 2235 2235 content := []byte("pqr\nalex") 2236 2236 2237 - b := testIndexBuilder(t, &Repository{ 2237 + b := testIndexBuilder(t, &zoekt.Repository{ 2238 2238 SubRepoMap: subRepos, 2239 2239 }, Document{ 2240 2240 Name: "sub/f1", ··· 2270 2270 2271 2271 sres = searchForTest(t, b, &query.Substring{Pattern: "needle", Content: true}) 2272 2272 if len(sres.Files) != 1 { 2273 - t.Fatalf("got %v, wanted 1 match", sres.Files) 2273 + t.Fatalf("got %v, wanted 1 index", sres.Files) 2274 2274 } 2275 2275 2276 2276 if got, want := sres.Files[0].FileName, "f1"; got != want { ··· 2286 2286 2287 2287 sres = searchForTest(t, b, &query.Substring{Pattern: "needle", Content: true}, chunkOpts) 2288 2288 if len(sres.Files) != 1 { 2289 - t.Fatalf("got %v, wanted 1 match", sres.Files) 2289 + t.Fatalf("got %v, wanted 1 index", sres.Files) 2290 2290 } 2291 2291 2292 2292 if got, want := sres.Files[0].FileName, "f1"; got != want { ··· 2304 2304 2305 2305 t.Run("LineMatches", func(t *testing.T) { 2306 2306 if res := searchForTest(t, b, &query.Substring{Pattern: needle, CaseSensitive: true}); len(res.Files) != 1 { 2307 - t.Fatalf("case sensitive: got %v, wanted 1 match", res.Files) 2307 + t.Fatalf("case sensitive: got %v, wanted 1 index", res.Files) 2308 2308 } 2309 2309 }) 2310 2310 2311 2311 t.Run("ChunkMatches", func(t *testing.T) { 2312 2312 res := searchForTest(t, b, &query.Substring{Pattern: needle, CaseSensitive: true}, chunkOpts) 2313 2313 if len(res.Files) != 1 { 2314 - t.Fatalf("case sensitive: got %v, wanted 1 match", res.Files) 2314 + t.Fatalf("case sensitive: got %v, wanted 1 index", res.Files) 2315 2315 } 2316 2316 }) 2317 2317 } ··· 2325 2325 2326 2326 t.Run("LineMatches", func(t *testing.T) { 2327 2327 if res := searchForTest(t, b, &query.Substring{Pattern: "NÉÉDLÉ", CaseSensitive: true}); len(res.Files) != 0 { 2328 - t.Fatalf("case sensitive: got %v, wanted 0 match", res.Files) 2328 + t.Fatalf("case sensitive: got %v, wanted 0 index", res.Files) 2329 2329 } 2330 2330 2331 2331 res := searchForTest(t, b, &query.Substring{Pattern: "NÉÉDLÉ"}) 2332 2332 if len(res.Files) != 1 { 2333 - t.Fatalf("case insensitive: got %v, wanted 1 match", res.Files) 2333 + t.Fatalf("case insensitive: got %v, wanted 1 index", res.Files) 2334 2334 } 2335 2335 2336 2336 if got, want := res.Files[0].LineMatches[0].LineFragments[0].Offset, uint32(strings.Index(string(content), needle)); got != want { ··· 2341 2341 t.Run("ChunkMatches", func(t *testing.T) { 2342 2342 res := searchForTest(t, b, &query.Substring{Pattern: "NÉÉDLÉ", CaseSensitive: true}, chunkOpts) 2343 2343 if len(res.Files) != 0 { 2344 - t.Fatalf("case sensitive: got %v, wanted 0 match", res.Files) 2344 + t.Fatalf("case sensitive: got %v, wanted 0 index", res.Files) 2345 2345 } 2346 2346 2347 2347 res = searchForTest(t, b, &query.Substring{Pattern: "NÉÉDLÉ"}, chunkOpts) 2348 2348 if len(res.Files) != 1 { 2349 - t.Fatalf("case insensitive: got %v, wanted 1 match", res.Files) 2349 + t.Fatalf("case insensitive: got %v, wanted 1 index", res.Files) 2350 2350 } 2351 2351 2352 2352 got := res.Files[0].ChunkMatches[0].Ranges[0].Start.ByteOffset ··· 2367 2367 t.Run("LineMatches", func(t *testing.T) { 2368 2368 res := searchForTest(t, b, &query.Substring{Pattern: "NÉÉÁÁDLÉ", Content: true}) 2369 2369 if len(res.Files) != 1 { 2370 - t.Fatalf("got %v, wanted 1 match", res.Files) 2370 + t.Fatalf("got %v, wanted 1 index", res.Files) 2371 2371 } 2372 2372 2373 2373 if got, want := res.Files[0].LineMatches[0].LineFragments[0].Offset, uint32(strings.Index(string(content), needle)); got != want { ··· 2378 2378 t.Run("ChunkMatches", func(t *testing.T) { 2379 2379 res := searchForTest(t, b, &query.Substring{Pattern: "NÉÉÁÁDLÉ", Content: true}, chunkOpts) 2380 2380 if len(res.Files) != 1 { 2381 - t.Fatalf("got %v, wanted 1 match", res.Files) 2381 + t.Fatalf("got %v, wanted 1 index", res.Files) 2382 2382 } 2383 2383 2384 2384 got := res.Files[0].ChunkMatches[0].Ranges[0].Start.ByteOffset ··· 2406 2406 2407 2407 res := searchForTest(t, b, &query.Substring{Pattern: needle, Content: true}) 2408 2408 if len(res.Files) != 1 { 2409 - t.Fatalf("got %v, wanted 1 match", res.Files) 2409 + t.Fatalf("got %v, wanted 1 index", res.Files) 2410 2410 } 2411 2411 }) 2412 2412 ··· 2416 2416 2417 2417 res := searchForTest(t, b, &query.Substring{Pattern: needle, Content: true}, chunkOpts) 2418 2418 if len(res.Files) != 1 { 2419 - t.Fatalf("got %v, wanted 1 match", res.Files) 2419 + t.Fatalf("got %v, wanted 1 index", res.Files) 2420 2420 } 2421 2421 }) 2422 2422 } ··· 2437 2437 q := &query.Substring{Pattern: wat, Content: true} 2438 2438 res := searchForTest(t, b, q) 2439 2439 if len(res.Files) != 1 { 2440 - t.Fatalf("got %v, wanted 1 match", res.Files) 2440 + t.Fatalf("got %v, wanted 1 index", res.Files) 2441 2441 } 2442 2442 } 2443 2443 ··· 2476 2476 2477 2477 func TestEstimateDocCount(t *testing.T) { 2478 2478 content := []byte("bla needle bla") 2479 - b := testIndexBuilder(t, &Repository{Name: "reponame"}, 2479 + b := testIndexBuilder(t, &zoekt.Repository{Name: "reponame"}, 2480 2480 Document{Name: "f1", Content: content}, 2481 2481 Document{Name: "f2", Content: content}, 2482 2482 ) ··· 2486 2486 query.NewAnd( 2487 2487 &query.Substring{Pattern: "needle"}, 2488 2488 &query.Repo{Regexp: regexp.MustCompile("reponame")}, 2489 - ), SearchOptions{ 2489 + ), zoekt.SearchOptions{ 2490 2490 EstimateDocCount: true, 2491 2491 }); sres.Stats.ShardFilesConsidered != 2 { 2492 2492 t.Errorf("got FilesConsidered = %d, want 2", sres.Stats.FilesConsidered) ··· 2495 2495 query.NewAnd( 2496 2496 &query.Substring{Pattern: "needle"}, 2497 2497 &query.Repo{Regexp: regexp.MustCompile("nomatch")}, 2498 - ), SearchOptions{ 2498 + ), zoekt.SearchOptions{ 2499 2499 EstimateDocCount: true, 2500 2500 }); sres.Stats.ShardFilesConsidered != 0 { 2501 2501 t.Errorf("got FilesConsidered = %d, want 0", sres.Stats.FilesConsidered) ··· 2507 2507 query.NewAnd( 2508 2508 &query.Substring{Pattern: "needle"}, 2509 2509 &query.Repo{Regexp: regexp.MustCompile("reponame")}, 2510 - ), SearchOptions{ 2510 + ), zoekt.SearchOptions{ 2511 2511 EstimateDocCount: true, 2512 2512 ChunkMatches: true, 2513 2513 }); sres.Stats.ShardFilesConsidered != 2 { ··· 2517 2517 query.NewAnd( 2518 2518 &query.Substring{Pattern: "needle"}, 2519 2519 &query.Repo{Regexp: regexp.MustCompile("nomatch")}, 2520 - ), SearchOptions{ 2520 + ), zoekt.SearchOptions{ 2521 2521 EstimateDocCount: true, 2522 2522 ChunkMatches: true, 2523 2523 }); sres.Stats.ShardFilesConsidered != 0 { ··· 2615 2615 2616 2616 t.Run("LineMatches with BM25", func(t *testing.T) { 2617 2617 q := &query.Substring{Pattern: "abc", CaseSensitive: true, Content: true} 2618 - res := searchForTest(t, b, q, SearchOptions{UseBM25Scoring: true}) 2618 + res := searchForTest(t, b, q, zoekt.SearchOptions{UseBM25Scoring: true}) 2619 2619 2620 2620 // 4096 (content) + 2 (overhead: newlines or doc sections) 2621 2621 if got, want := res.Stats.ContentBytesLoaded, int64(4098); got != want { ··· 2631 2631 2632 2632 t.Run("ChunkMatches with BM25", func(t *testing.T) { 2633 2633 q := &query.Substring{Pattern: "abc", CaseSensitive: true, Content: true} 2634 - res := searchForTest(t, b, q, SearchOptions{UseBM25Scoring: true, ChunkMatches: true}) 2634 + res := searchForTest(t, b, q, zoekt.SearchOptions{UseBM25Scoring: true, ChunkMatches: true}) 2635 2635 2636 2636 // 4096 (content) + 2 (overhead: newlines or doc sections) 2637 2637 if got, want := res.Stats.ContentBytesLoaded, int64(4098); got != want { ··· 2708 2708 query.NewOr(query.NewAnd(&query.Repo{Regexp: regexp.MustCompile("name")}, 2709 2709 query.NewOr(&query.Branch{Pattern: "master"})))) 2710 2710 2711 - b := testIndexBuilder(t, &Repository{ 2711 + b := testIndexBuilder(t, &zoekt.Repository{ 2712 2712 Name: "name", 2713 - Branches: []RepositoryBranch{{"master", "master-version"}}, 2713 + Branches: []zoekt.RepositoryBranch{{"master", "master-version"}}, 2714 2714 }, Document{ 2715 2715 Name: "f2", 2716 2716 Content: []byte("orange\u2318apple"), ··· 2735 2735 2736 2736 func TestAndShort(t *testing.T) { 2737 2737 content := []byte("bla needle at orange bla") 2738 - b := testIndexBuilder(t, &Repository{Name: "reponame"}, 2738 + b := testIndexBuilder(t, &zoekt.Repository{Name: "reponame"}, 2739 2739 Document{Name: "f1", Content: content}, 2740 2740 Document{Name: "f2", Content: []byte("xx at xx")}, 2741 2741 Document{Name: "f3", Content: []byte("yy orange xx")}, ··· 2761 2761 2762 2762 func TestNoCollectRegexpSubstring(t *testing.T) { 2763 2763 content := []byte("bla final bla\nfoo final, foo") 2764 - b := testIndexBuilder(t, &Repository{Name: "reponame"}, 2764 + b := testIndexBuilder(t, &zoekt.Repository{Name: "reponame"}, 2765 2765 Document{Name: "f1", Content: content}, 2766 2766 ) 2767 2767 ··· 2775 2775 t.Fatalf("got %v, want 1 result", res.Files) 2776 2776 } 2777 2777 if f := res.Files[0]; len(f.LineMatches) != 1 { 2778 - t.Fatalf("got line matches %v, want 1 line match", printLineMatches(f.LineMatches)) 2778 + t.Fatalf("got line matches %v, want 1 line index", printLineMatches(f.LineMatches)) 2779 2779 } 2780 2780 }) 2781 2781 ··· 2785 2785 t.Fatalf("got %v, want 1 result", res.Files) 2786 2786 } 2787 2787 if f := res.Files[0]; len(f.ChunkMatches) != 1 { 2788 - t.Fatalf("got line matches %v, want 1 line match", printLineMatches(f.LineMatches)) 2788 + t.Fatalf("got line matches %v, want 1 line index", printLineMatches(f.LineMatches)) 2789 2789 } 2790 2790 }) 2791 2791 } 2792 2792 2793 - func printLineMatches(ms []LineMatch) string { 2793 + func printLineMatches(ms []zoekt.LineMatch) string { 2794 2794 var ss []string 2795 2795 for _, m := range ms { 2796 2796 ss = append(ss, fmt.Sprintf("%d:%q %v", m.LineNumber, m.Line, m.LineFragments)) ··· 2801 2801 2802 2802 func TestLang(t *testing.T) { 2803 2803 content := []byte("bla needle bla") 2804 - b := testIndexBuilder(t, &Repository{Name: "reponame"}, 2804 + b := testIndexBuilder(t, &zoekt.Repository{Name: "reponame"}, 2805 2805 Document{Name: "f1", Content: content}, 2806 2806 Document{Name: "f2", Language: "java", Content: content}, 2807 2807 Document{Name: "f3", Language: "cpp", Content: content}, ··· 2835 2835 2836 2836 func TestLangShortcut(t *testing.T) { 2837 2837 content := []byte("bla needle bla") 2838 - b := testIndexBuilder(t, &Repository{Name: "reponame"}, 2838 + b := testIndexBuilder(t, &zoekt.Repository{Name: "reponame"}, 2839 2839 Document{Name: "f2", Language: "java", Content: content}, 2840 2840 Document{Name: "f3", Language: "cpp", Content: content}, 2841 2841 ) ··· 2849 2849 t.Fatalf("got %v, want 0 results", res.Files) 2850 2850 } 2851 2851 if res.Stats.IndexBytesLoaded > 0 { 2852 - t.Errorf("got IndexBytesLoaded %d, want 0", res.Stats.IndexBytesLoaded) 2852 + t.Errorf("got matchBytesLoaded %d, want 0", res.Stats.IndexBytesLoaded) 2853 2853 } 2854 2854 }) 2855 2855 ··· 2859 2859 t.Fatalf("got %v, want 0 results", res.Files) 2860 2860 } 2861 2861 if res.Stats.IndexBytesLoaded > 0 { 2862 - t.Errorf("got IndexBytesLoaded %d, want 0", res.Stats.IndexBytesLoaded) 2862 + t.Errorf("got matchBytesLoaded %d, want 0", res.Stats.IndexBytesLoaded) 2863 2863 } 2864 2864 }) 2865 2865 } 2866 2866 2867 2867 func TestNoTextMatchAtoms(t *testing.T) { 2868 2868 content := []byte("bla needle bla") 2869 - b := testIndexBuilder(t, &Repository{Name: "reponame"}, 2869 + b := testIndexBuilder(t, &zoekt.Repository{Name: "reponame"}, 2870 2870 Document{Name: "f1", Content: content}, 2871 2871 Document{Name: "f2", Language: "java", Content: content}, 2872 2872 Document{Name: "f3", Language: "cpp", Content: content}, ··· 2889 2889 2890 2890 func TestNoPositiveAtoms(t *testing.T) { 2891 2891 content := []byte("bla needle bla") 2892 - b := testIndexBuilder(t, &Repository{Name: "reponame"}, 2892 + b := testIndexBuilder(t, &zoekt.Repository{Name: "reponame"}, 2893 2893 Document{Name: "f1", Content: content}, 2894 2894 Document{Name: "f2", Content: content}, 2895 2895 ) ··· 2915 2915 content := []byte("start\nbla bla\nend") 2916 2916 // ----------------012345-67890123-456 2917 2917 2918 - b := testIndexBuilder(t, &Repository{Name: "reponame"}, 2918 + b := testIndexBuilder(t, &zoekt.Repository{Name: "reponame"}, 2919 2919 Document{ 2920 2920 Name: "f1", 2921 2921 Content: content, ··· 2952 2952 content := []byte("start\nbla bla\nend") 2953 2953 // ----------------012345-67890123-456 2954 2954 2955 - b := testIndexBuilder(t, &Repository{Name: "reponame"}, 2955 + b := testIndexBuilder(t, &zoekt.Repository{Name: "reponame"}, 2956 2956 Document{ 2957 2957 Name: "f1", 2958 2958 Content: content, ··· 2989 2989 content := []byte("bla\nsymblabla\nbla") 2990 2990 // ----------------0123-4567890123-456 2991 2991 2992 - b := testIndexBuilder(t, &Repository{Name: "reponame"}, 2992 + b := testIndexBuilder(t, &zoekt.Repository{Name: "reponame"}, 2993 2993 Document{ 2994 2994 Name: "f1", 2995 2995 Content: content, ··· 3026 3026 content := []byte("bla\nsym\nbla\nsym\nasymb") 3027 3027 // ----------------0123-4567-890123456-78901 3028 3028 3029 - b := testIndexBuilder(t, &Repository{Name: "reponame"}, 3029 + b := testIndexBuilder(t, &zoekt.Repository{Name: "reponame"}, 3030 3030 Document{ 3031 3031 Name: "f1", 3032 3032 Content: content, ··· 3063 3063 content := []byte("blah\nbla\nbl") 3064 3064 // ----------------01234-5678-90 3065 3065 3066 - b := testIndexBuilder(t, &Repository{Name: "reponame"}, 3066 + b := testIndexBuilder(t, &zoekt.Repository{Name: "reponame"}, 3067 3067 Document{ 3068 3068 Name: "f1", 3069 3069 Content: content, ··· 3100 3100 content := []byte("abcdef") 3101 3101 // ----------------012345 3102 3102 3103 - b := testIndexBuilder(t, &Repository{Name: "reponame"}, 3103 + b := testIndexBuilder(t, &zoekt.Repository{Name: "reponame"}, 3104 3104 Document{ 3105 3105 Name: "f1", 3106 3106 Content: content, ··· 3155 3155 }, 3156 3156 } 3157 3157 3158 - b := testIndexBuilder(t, &Repository{Name: "reponame"}, docs...) 3158 + b := testIndexBuilder(t, &zoekt.Repository{Name: "reponame"}, docs...) 3159 3159 q := &query.Symbol{ 3160 3160 Expr: &query.Regexp{Regexp: mustParseRE(".*")}, 3161 3161 } ··· 3200 3200 3201 3201 func TestHitIterTerminate(t *testing.T) { 3202 3202 // contrived input: trigram frequencies forces selecting abc + 3203 - // def for the distance iteration. There is no match, so this 3203 + // def for the distance iteration. There is no index, so this 3204 3204 // will advance the compressedPostingIterator to beyond the 3205 3205 // end. 3206 3206 content := []byte("abc bcdbcd cdecde abcabc def efg") ··· 3395 3395 } 3396 3396 3397 3397 func TestLineAnd(t *testing.T) { 3398 - b := testIndexBuilder(t, &Repository{Name: "reponame"}, 3398 + b := testIndexBuilder(t, &zoekt.Repository{Name: "reponame"}, 3399 3399 Document{Name: "f1", Content: []byte("apple\nbanana\napple banana chocolate apple pudding banana\ngrape")}, 3400 3400 Document{Name: "f2", Content: []byte("apple orange\nbanana")}, 3401 3401 Document{Name: "f3", Content: []byte("banana grape")}, ··· 3431 3431 } 3432 3432 3433 3433 func TestLineAndFileName(t *testing.T) { 3434 - b := testIndexBuilder(t, &Repository{Name: "reponame"}, 3434 + b := testIndexBuilder(t, &zoekt.Repository{Name: "reponame"}, 3435 3435 Document{Name: "f1", Content: []byte("apple banana\ngrape")}, 3436 3436 Document{Name: "f2", Content: []byte("apple banana\norange")}, 3437 3437 Document{Name: "apple banana", Content: []byte("banana grape")}, ··· 3467 3467 } 3468 3468 3469 3469 func TestMultiLineRegex(t *testing.T) { 3470 - b := testIndexBuilder(t, &Repository{Name: "reponame"}, 3470 + b := testIndexBuilder(t, &zoekt.Repository{Name: "reponame"}, 3471 3471 Document{Name: "f1", Content: []byte("apple banana\ngrape")}, 3472 3472 Document{Name: "f2", Content: []byte("apple orange")}, 3473 3473 Document{Name: "f3", Content: []byte("grape apple")}, ··· 3511 3511 } 3512 3512 3513 3513 func TestSearchTypeFileName(t *testing.T) { 3514 - b := testIndexBuilder(t, &Repository{ 3514 + b := testIndexBuilder(t, &zoekt.Repository{ 3515 3515 Name: "reponame", 3516 3516 }, 3517 3517 Document{Name: "f1", Content: []byte("bla the needle")}, ··· 3520 3520 ) 3521 3521 3522 3522 t.Run("LineMatches", func(t *testing.T) { 3523 - wantSingleMatch := func(res *SearchResult, want string) { 3523 + wantSingleMatch := func(res *zoekt.SearchResult, want string) { 3524 3524 t.Helper() 3525 3525 fmatches := res.Files 3526 3526 if len(fmatches) != 1 { ··· 3562 3562 }) 3563 3563 3564 3564 t.Run("ChunkMatches", func(t *testing.T) { 3565 - wantSingleMatch := func(res *SearchResult, want string) { 3565 + wantSingleMatch := func(res *zoekt.SearchResult, want string) { 3566 3566 t.Helper() 3567 3567 fmatches := res.Files 3568 3568 if len(fmatches) != 1 { ··· 3609 3609 } 3610 3610 3611 3611 func TestSearchTypeLanguage(t *testing.T) { 3612 - b := testIndexBuilder(t, &Repository{ 3612 + b := testIndexBuilder(t, &zoekt.Repository{ 3613 3613 Name: "reponame", 3614 3614 }, 3615 3615 Document{Name: "apex.cls", Content: []byte("public class Car extends Vehicle {")}, ··· 3621 3621 t.Log(b.languageMap) 3622 3622 3623 3623 t.Run("LineMatches", func(t *testing.T) { 3624 - wantSingleMatch := func(res *SearchResult, want string) { 3624 + wantSingleMatch := func(res *zoekt.SearchResult, want string) { 3625 3625 t.Helper() 3626 3626 fmatches := res.Files 3627 3627 if len(fmatches) != 1 { ··· 3668 3668 }) 3669 3669 3670 3670 t.Run("ChunkMatches", func(t *testing.T) { 3671 - wantSingleMatch := func(res *SearchResult, want string) { 3671 + wantSingleMatch := func(res *zoekt.SearchResult, want string) { 3672 3672 t.Helper() 3673 3673 fmatches := res.Files 3674 3674 if len(fmatches) != 1 { ··· 3716 3716 func TestStats(t *testing.T) { 3717 3717 ignored := []cmp.Option{ 3718 3718 cmpopts.EquateEmpty(), 3719 - cmpopts.IgnoreFields(RepoListEntry{}, "Repository"), 3720 - cmpopts.IgnoreFields(RepoListEntry{}, "IndexMetadata"), 3721 - cmpopts.IgnoreFields(RepoStats{}, "IndexBytes"), 3719 + cmpopts.IgnoreFields(zoekt.RepoListEntry{}, "Repository"), 3720 + cmpopts.IgnoreFields(zoekt.RepoListEntry{}, "IndexMetadata"), 3721 + cmpopts.IgnoreFields(zoekt.RepoStats{}, "IndexBytes"), 3722 3722 } 3723 3723 3724 - repoListEntries := func(b *IndexBuilder) []RepoListEntry { 3724 + repoListEntries := func(b *IndexBuilder) []zoekt.RepoListEntry { 3725 3725 searcher := searcherForTest(t, b) 3726 3726 indexdata := searcher.(*indexData) 3727 3727 return indexdata.repoListEntry ··· 3730 3730 t.Run("one empty repo", func(t *testing.T) { 3731 3731 b := testIndexBuilder(t, nil) 3732 3732 got := repoListEntries(b) 3733 - want := []RepoListEntry{ 3733 + want := []zoekt.RepoListEntry{ 3734 3734 { 3735 - Stats: RepoStats{ 3735 + Stats: zoekt.RepoStats{ 3736 3736 Repos: 0, 3737 3737 Shards: 1, 3738 3738 Documents: 0, ··· 3756 3756 Document{Name: "doc 1", Content: []byte("content 1")}, 3757 3757 ) 3758 3758 got := repoListEntries(b) 3759 - want := []RepoListEntry{ 3759 + want := []zoekt.RepoListEntry{ 3760 3760 { 3761 - Stats: RepoStats{ 3761 + Stats: zoekt.RepoStats{ 3762 3762 Repos: 0, 3763 3763 Shards: 1, 3764 3764 Documents: 2, ··· 3778 3778 3779 3779 t.Run("one compound shard", func(t *testing.T) { 3780 3780 b := testIndexBuilderCompound(t, 3781 - []*Repository{ 3781 + []*zoekt.Repository{ 3782 3782 {Name: "repo 0"}, 3783 3783 {Name: "repo 1"}, 3784 3784 }, ··· 3794 3794 }, 3795 3795 ) 3796 3796 got := repoListEntries(b) 3797 - want := []RepoListEntry{ 3797 + want := []zoekt.RepoListEntry{ 3798 3798 { 3799 - Stats: RepoStats{ 3799 + Stats: zoekt.RepoStats{ 3800 3800 Repos: 0, 3801 3801 Shards: 1, 3802 3802 Documents: 2, ··· 3808 3808 }, 3809 3809 }, 3810 3810 { 3811 - Stats: RepoStats{ 3811 + Stats: zoekt.RepoStats{ 3812 3812 Repos: 0, 3813 3813 Shards: 1, 3814 3814 Documents: 2, ··· 3828 3828 3829 3829 t.Run("compound shard with empty repos", func(t *testing.T) { 3830 3830 b := testIndexBuilderCompound(t, 3831 - []*Repository{ 3831 + []*zoekt.Repository{ 3832 3832 {Name: "repo 0"}, 3833 3833 {Name: "repo 1"}, 3834 3834 {Name: "repo 2"}, ··· 3845 3845 ) 3846 3846 got := repoListEntries(b) 3847 3847 3848 - entryEmpty := RepoListEntry{Stats: RepoStats{ 3848 + entryEmpty := zoekt.RepoListEntry{Stats: zoekt.RepoStats{ 3849 3849 Shards: 1, 3850 3850 Documents: 0, 3851 3851 ContentBytes: 0, 3852 3852 }} 3853 - entryNonEmpty := RepoListEntry{Stats: RepoStats{ 3853 + entryNonEmpty := zoekt.RepoListEntry{Stats: zoekt.RepoStats{ 3854 3854 Shards: 1, 3855 3855 Documents: 1, 3856 3856 ContentBytes: 14, 3857 3857 }} 3858 3858 3859 - want := []RepoListEntry{ 3859 + want := []zoekt.RepoListEntry{ 3860 3860 entryNonEmpty, 3861 3861 entryEmpty, 3862 3862 entryNonEmpty, ··· 3898 3898 } 3899 3899 3900 3900 got := sres.Files[0].LineMatches[0] 3901 - want := LineMatch{ 3902 - LineFragments: []LineFragmentMatch{{ 3901 + want := zoekt.LineMatch{ 3902 + LineFragments: []zoekt.LineFragmentMatch{{ 3903 3903 LineOffset: 7, 3904 3904 Offset: 7, 3905 3905 MatchLength: 3, ··· 3932 3932 } 3933 3933 3934 3934 got := sres.Files[0].ChunkMatches[0] 3935 - want := ChunkMatch{ 3935 + want := zoekt.ChunkMatch{ 3936 3936 Content: content, 3937 - ContentStart: Location{ByteOffset: 0, LineNumber: 1, Column: 1}, 3938 - Ranges: []Range{{ 3939 - Start: Location{ByteOffset: 7, LineNumber: 1, Column: 8}, 3940 - End: Location{ByteOffset: 10, LineNumber: 1, Column: 11}, 3937 + ContentStart: zoekt.Location{ByteOffset: 0, LineNumber: 1, Column: 1}, 3938 + Ranges: []zoekt.Range{{ 3939 + Start: zoekt.Location{ByteOffset: 7, LineNumber: 1, Column: 8}, 3940 + End: zoekt.Location{ByteOffset: 10, LineNumber: 1, Column: 11}, 3941 3941 }}, 3942 3942 } 3943 3943 ··· 3971 3971 b.ResetTimer() 3972 3972 3973 3973 for i := 0; i < b.N; i++ { 3974 - sres, err := searcher.Search(ctx, q, &SearchOptions{ChunkMatches: true, NumContextLines: 1}) 3974 + sres, err := searcher.Search(ctx, q, &zoekt.SearchOptions{ChunkMatches: true, NumContextLines: 1}) 3975 3975 if err != nil { 3976 3976 b.Fatal(err) 3977 3977 } 3978 3978 3979 3979 matches := sres.Files 3980 3980 if len(matches) == 0 { 3981 - b.Fatalf("want file match, got none") 3981 + b.Fatalf("want file index, got none") 3982 3982 } 3983 3983 } 3984 3984 })

+13 -33

indexbuilder.go index/indexbuilder.go

··· 12 12 // See the License for the specific language governing permissions and 13 13 // limitations under the License. 14 14 15 - package zoekt 15 + package index 16 16 17 17 import ( 18 18 "bytes" ··· 29 29 "time" 30 30 "unicode/utf8" 31 31 32 + "github.com/sourcegraph/zoekt" 32 33 "github.com/sourcegraph/zoekt/internal/languages" 33 34 ) 34 35 ··· 194 195 namePostings *postingsBuilder 195 196 196 197 // root repositories 197 - repoList []Repository 198 + repoList []zoekt.Repository 198 199 199 200 // name to index. 200 201 subRepoIndices []map[string]uint32 ··· 213 214 ID string 214 215 } 215 216 216 - func (d *Repository) verify() error { 217 - for _, t := range []string{d.FileURLTemplate, d.LineFragmentTemplate, d.CommitURLTemplate} { 217 + func verify(repo *zoekt.Repository) error { 218 + for _, t := range []string{repo.FileURLTemplate, repo.LineFragmentTemplate, repo.CommitURLTemplate} { 218 219 if _, err := ParseTemplate(t); err != nil { 219 220 return err 220 221 } ··· 266 267 267 268 // NewIndexBuilder creates a fresh IndexBuilder. The passed in 268 269 // Repository contains repo metadata, and may be set to nil. 269 - func NewIndexBuilder(r *Repository) (*IndexBuilder, error) { 270 + func NewIndexBuilder(r *zoekt.Repository) (*IndexBuilder, error) { 270 271 b := newIndexBuilder() 271 272 272 273 if r == nil { 273 - r = &Repository{} 274 + r = &zoekt.Repository{} 274 275 } 275 276 if err := b.setRepository(r); err != nil { 276 277 return nil, err ··· 292 293 } 293 294 } 294 295 295 - func (b *IndexBuilder) setRepository(desc *Repository) error { 296 - if err := desc.verify(); err != nil { 296 + func (b *IndexBuilder) setRepository(desc *zoekt.Repository) error { 297 + if err := verify(desc); err != nil { 297 298 return err 298 299 } 299 300 ··· 304 305 repo := *desc 305 306 306 307 // copy subrepomap without root 307 - repo.SubRepoMap = map[string]*Repository{} 308 + repo.SubRepoMap = map[string]*zoekt.Repository{} 308 309 for k, v := range desc.SubRepoMap { 309 310 if k != "" { 310 311 repo.SubRepoMap[k] = v ··· 316 317 return b.populateSubRepoIndices() 317 318 } 318 319 319 - type DocumentSection struct { 320 - Start, End uint32 321 - } 322 - 323 - // Document holds a document (file) to index. 324 - type Document struct { 325 - Name string 326 - Content []byte 327 - Branches []string 328 - SubRepositoryPath string 329 - Language string 330 - 331 - // If set, something is wrong with the file contents, and this 332 - // is the reason it wasn't indexed. 333 - SkipReason string 334 - 335 - // Document sections for symbols. Offsets should use bytes. 336 - Symbols []DocumentSection 337 - SymbolsMetaData []*Symbol 338 - } 339 - 340 320 type symbolSlice struct { 341 321 symbols []DocumentSection 342 - metaData []*Symbol 322 + metaData []*zoekt.Symbol 343 323 } 344 324 345 325 func (s symbolSlice) Len() int { return len(s.symbols) } ··· 370 350 return nil 371 351 } 372 352 373 - func mkSubRepoIndices(repo Repository) map[string]uint32 { 353 + func mkSubRepoIndices(repo zoekt.Repository) map[string]uint32 { 374 354 paths := []string{""} 375 355 for k := range repo.SubRepoMap { 376 356 paths = append(paths, k) ··· 401 381 return b.symKindIndex[t] 402 382 } 403 383 404 - func (b *IndexBuilder) addSymbols(symbols []*Symbol) { 384 + func (b *IndexBuilder) addSymbols(symbols []*zoekt.Symbol) { 405 385 for _, sym := range symbols { 406 386 b.symMetaData = append(b.symMetaData, 407 387 // This field was removed due to redundancy. To avoid

+1 -1

indexbuilder_test.go index/indexbuilder_test.go

··· 1 - package zoekt 1 + package index 2 2 3 3 import ( 4 4 "strings"

+13 -12

indexdata.go index/indexdata.go

··· 12 12 // See the License for the specific language governing permissions and 13 13 // limitations under the License. 14 14 15 - package zoekt 15 + package index 16 16 17 17 import ( 18 18 "encoding/binary" ··· 24 24 "slices" 25 25 "unicode/utf8" 26 26 27 + "github.com/sourcegraph/zoekt" 27 28 "github.com/sourcegraph/zoekt/query" 28 29 ) 29 30 ··· 76 77 // name => mask (power of 2) 77 78 branchIDs []map[string]uint 78 79 79 - metaData IndexMetadata 80 - repoMetaData []Repository 80 + metaData zoekt.IndexMetadata 81 + repoMetaData []zoekt.Repository 81 82 82 83 subRepos []uint32 83 84 subRepoPaths [][]string ··· 91 92 // inverse of LanguageMap in metaData 92 93 languageMap map[uint16]string 93 94 94 - repoListEntry []RepoListEntry 95 + repoListEntry []zoekt.RepoListEntry 95 96 96 97 // repository indexes for all the files 97 98 repos []uint16 ··· 139 140 } 140 141 141 142 // data returns the symbol at index i 142 - func (d *symbolData) data(i uint32) *Symbol { 143 + func (d *symbolData) data(i uint32) *zoekt.Symbol { 143 144 size := uint32(4 * 4) // 4 uint32s 144 145 offset := i * size 145 146 if offset >= uint32(len(d.symMetaData)) { ··· 147 148 } 148 149 149 150 metadata := d.symMetaData[offset : offset+size] 150 - sym := &Symbol{} 151 + sym := &zoekt.Symbol{} 151 152 key := uint32SliceAt(metadata, 1) 152 153 sym.Kind = string(d.kind(key)) 153 154 key = uint32SliceAt(metadata, 2) ··· 172 173 } 173 174 174 175 // calculates stats for files in the range [start, end). 175 - func (d *indexData) calculateStatsForFileRange(start, end uint32) RepoStats { 176 + func (d *indexData) calculateStatsForFileRange(start, end uint32) zoekt.RepoStats { 176 177 if start >= end { 177 178 // An empty shard for an empty repository. 178 - return RepoStats{ 179 + return zoekt.RepoStats{ 179 180 Shards: 1, 180 181 } 181 182 } ··· 190 191 // here). Right now I don't like that these numbers are not true, especially 191 192 // after aggregation. For now I will move forward with this until we can 192 193 // chat more. 193 - return RepoStats{ 194 + return zoekt.RepoStats{ 194 195 ContentBytes: int64(bytesContent) + int64(bytesFN), 195 196 Documents: int(end - start), 196 197 // CR keegan for stefan: our shard count is going to go out of whack, ··· 206 207 } 207 208 208 209 func (d *indexData) calculateStats() error { 209 - d.repoListEntry = make([]RepoListEntry, 0, len(d.repoMetaData)) 210 + d.repoListEntry = make([]zoekt.RepoListEntry, 0, len(d.repoMetaData)) 210 211 var start, end uint32 211 212 212 213 for repoID, md := range d.repoMetaData { ··· 218 219 return fmt.Errorf("shard documents out of order with respect to repositories: expected document %d to be part of repo %d", start, repoID) 219 220 } 220 221 221 - d.repoListEntry = append(d.repoListEntry, RepoListEntry{ 222 + d.repoListEntry = append(d.repoListEntry, zoekt.RepoListEntry{ 222 223 Repository: md, 223 224 IndexMetadata: d.metaData, 224 225 Stats: d.calculateStatsForFileRange(start, end), ··· 429 430 return &ngramIterationResults{ 430 431 matchIterator: &noMatchTree{ 431 432 Why: "freq=0", 432 - Stats: Stats{ 433 + Stats: zoekt.Stats{ 433 434 NgramLookups: ngramLookups, 434 435 }, 435 436 },

+1 -1

indexdata_test.go index/indexdata_test.go

··· 1 - package zoekt 1 + package index 2 2 3 3 import ( 4 4 "math/rand"

+1 -1

indexfile_other.go index/indexfile_other.go

··· 15 15 //go:build !linux && !darwin 16 16 // +build !linux,!darwin 17 17 18 - package zoekt 18 + package index 19 19 20 20 import ( 21 21 "fmt"

+1 -1

indexfile_unix.go index/indexfile_unix.go

··· 14 14 15 15 //go:build linux || darwin 16 16 17 - package zoekt 17 + package index 18 18 19 19 import ( 20 20 "fmt"

+4 -4

internal/archive/e2e_test.go

··· 17 17 "time" 18 18 19 19 "github.com/sourcegraph/zoekt" 20 - "github.com/sourcegraph/zoekt/build" 20 + "github.com/sourcegraph/zoekt/index" 21 21 "github.com/sourcegraph/zoekt/internal/shards" 22 22 "github.com/sourcegraph/zoekt/query" 23 23 "github.com/stretchr/testify/require" ··· 160 160 for _, test := range tests { 161 161 largeFiles, wantNumFiles := test.largeFiles, test.wantNumFiles 162 162 163 - bopts := build.Options{ 163 + bopts := index.Options{ 164 164 SizeMax: fileSize - 1, 165 165 IndexDir: indexDir, 166 166 LargeFiles: largeFiles, ··· 233 233 234 234 // Index 235 235 indexDir := t.TempDir() 236 - bopts := build.Options{ 236 + bopts := index.Options{ 237 237 IndexDir: indexDir, 238 238 } 239 239 opts := Options{ ··· 253 253 indexFiles, err := f.Readdirnames(1) 254 254 require.Len(t, indexFiles, 1) 255 255 256 - repos, _, err := zoekt.ReadMetadataPath(filepath.Join(indexDir, indexFiles[0])) 256 + repos, _, err := index.ReadMetadataPath(filepath.Join(indexDir, indexFiles[0])) 257 257 require.NoError(t, err) 258 258 require.Len(t, repos, 1) 259 259 require.True(t, repos[0].LatestCommitDate.Equal(modTime))

+5 -5

internal/archive/index.go

··· 10 10 "sync" 11 11 12 12 "github.com/sourcegraph/zoekt" 13 - "github.com/sourcegraph/zoekt/build" 13 + "github.com/sourcegraph/zoekt/index" 14 14 ) 15 15 16 16 // Options specify the archive specific indexing options. ··· 74 74 } 75 75 76 76 // Index archive specified in opts using bopts. 77 - func Index(opts Options, bopts build.Options) error { 77 + func Index(opts Options, bopts index.Options) error { 78 78 opts.SetDefaults() 79 79 80 80 if opts.Name == "" && opts.RepoURL == "" { ··· 114 114 defer a.Close() 115 115 116 116 bopts.RepositoryDescription.Source = opts.Archive 117 - var builder *build.Builder 117 + var builder *index.Builder 118 118 119 119 once := sync.Once{} 120 120 var onceErr error ··· 124 124 once.Do(func() { 125 125 // We use the ModTime of the first file as a proxy for the latest commit date. 126 126 bopts.RepositoryDescription.LatestCommitDate = f.ModTime 127 - builder, onceErr = build.NewBuilder(bopts) 127 + builder, onceErr = index.NewBuilder(bopts) 128 128 }) 129 129 if onceErr != nil { 130 130 return onceErr ··· 140 140 return nil 141 141 } 142 142 143 - return builder.Add(zoekt.Document{ 143 + return builder.Add(index.Document{ 144 144 Name: name, 145 145 Content: contents, 146 146 Branches: brs,

+2 -3

internal/debugserver/debug.go

··· 9 9 "github.com/prometheus/client_golang/prometheus" 10 10 "github.com/prometheus/client_golang/prometheus/promauto" 11 11 "github.com/prometheus/client_golang/prometheus/promhttp" 12 + "github.com/sourcegraph/zoekt/index" 12 13 "golang.org/x/net/trace" 13 - 14 - "github.com/sourcegraph/zoekt" 15 14 ) 16 15 17 16 var registerOnce sync.Once ··· 87 86 func register() { 88 87 promauto.NewGaugeVec(prometheus.GaugeOpts{ 89 88 Name: "zoekt_version", 90 - }, []string{"version"}).WithLabelValues(zoekt.Version).Set(1) 89 + }, []string{"version"}).WithLabelValues(index.Version).Set(1) 91 90 }

+14 -2

internal/e2e/e2e_rank_test.go

··· 16 16 17 17 "github.com/google/go-cmp/cmp" 18 18 "github.com/sourcegraph/zoekt" 19 - "github.com/sourcegraph/zoekt/build" 19 + "github.com/sourcegraph/zoekt/index" 20 20 "github.com/sourcegraph/zoekt/internal/archive" 21 21 "github.com/sourcegraph/zoekt/internal/shards" 22 22 "github.com/sourcegraph/zoekt/query" ··· 247 247 // languageMap[lang] = ctags.ScipCTags 248 248 // } 249 249 250 - err := archive.Index(opts, build.Options{ 250 + err := archive.Index(opts, index.Options{ 251 251 IndexDir: indexDir, 252 252 CTagsMustSucceed: true, 253 253 RepositoryDescription: zoekt.Repository{ ··· 370 370 tb.Skip("universal-ctags is missing") 371 371 } 372 372 } 373 + 374 + func checkScipCTags() string { 375 + if ctags := os.Getenv("SCIP_CTAGS_COMMAND"); ctags != "" { 376 + return ctags 377 + } 378 + 379 + if ctags, err := exec.LookPath("scip-ctags"); err == nil { 380 + return ctags 381 + } 382 + 383 + return "" 384 + }

+2 -2

internal/gitindex/ignore_test.go

··· 10 10 "testing" 11 11 12 12 "github.com/sourcegraph/zoekt" 13 - "github.com/sourcegraph/zoekt/build" 13 + "github.com/sourcegraph/zoekt/index" 14 14 "github.com/sourcegraph/zoekt/internal/shards" 15 15 "github.com/sourcegraph/zoekt/query" 16 16 ) ··· 56 56 57 57 indexDir := t.TempDir() 58 58 59 - buildOpts := build.Options{ 59 + buildOpts := index.Options{ 60 60 IndexDir: indexDir, 61 61 RepositoryDescription: zoekt.Repository{ 62 62 Name: "repo",

+16 -17

internal/gitindex/index.go

··· 33 33 "strings" 34 34 35 35 "github.com/go-git/go-billy/v5/osfs" 36 + "github.com/go-git/go-git/v5/config" 37 + "github.com/go-git/go-git/v5/plumbing" 36 38 "github.com/go-git/go-git/v5/plumbing/cache" 39 + "github.com/go-git/go-git/v5/plumbing/object" 37 40 "github.com/go-git/go-git/v5/storage/filesystem" 38 41 "github.com/sourcegraph/zoekt" 39 - "github.com/sourcegraph/zoekt/build" 40 42 "github.com/sourcegraph/zoekt/ignore" 41 - 42 - "github.com/go-git/go-git/v5/config" 43 - "github.com/go-git/go-git/v5/plumbing" 44 - "github.com/go-git/go-git/v5/plumbing/object" 43 + "github.com/sourcegraph/zoekt/index" 45 44 46 45 git "github.com/go-git/go-git/v5" 47 46 ) ··· 313 312 RepoCacheDir string 314 313 315 314 // Indexing options. 316 - BuildOptions build.Options 315 + BuildOptions index.Options 317 316 318 317 // Prefix of the branch to index, e.g. `remotes/origin`. 319 318 BranchPrefix string ··· 512 511 } 513 512 } 514 513 515 - builder, err := build.NewBuilder(opts.BuildOptions) 514 + builder, err := index.NewBuilder(opts.BuildOptions) 516 515 if err != nil { 517 516 return false, fmt.Errorf("build.NewBuilder: %w", err) 518 517 } ··· 611 610 func (r repoPathRanks) rank(path string, content []byte) float64 { 612 611 if rank, ok := r.Paths[path]; ok { 613 612 return rank 614 - } else if build.IsLowPriority(path, content) { 613 + } else if index.IsLowPriority(path, content) { 615 614 return 0.0 616 615 } else { 617 616 return r.MeanRank ··· 689 688 // If it isn't consistent, that we can't proceed with a delta build (and the caller should fall back to a 690 689 // normal one). 691 690 692 - if !build.BranchNamesEqual(existingRepository.Branches, options.BuildOptions.RepositoryDescription.Branches) { 691 + if !index.BranchNamesEqual(existingRepository.Branches, options.BuildOptions.RepositoryDescription.Branches) { 693 692 var existingBranchNames []string 694 693 for _, b := range existingRepository.Branches { 695 694 existingBranchNames = append(existingBranchNames, b.Name) ··· 707 706 } 708 707 709 708 // Check if the build options hash does not match the repository metadata's hash 710 - // If it does not match then one or more index options has changed and will require a normal build instead of a delta build 709 + // If it does not index then one or more index options has changed and will require a normal build instead of a delta build 711 710 if options.BuildOptions.GetHash() != existingRepository.IndexOptions { 712 711 return nil, nil, nil, fmt.Errorf("one or more index options previously stored for repository %s (ID: %d) does not match the index options for this requested build; These index option updates are incompatible with delta build. new index options: %+v", existingRepository.Name, existingRepository.ID, options.BuildOptions.HashOptions()) 713 712 } ··· 894 893 895 894 func createDocument(key fileKey, 896 895 repos map[fileKey]BlobLocation, 897 - opts build.Options, 898 - ) (zoekt.Document, error) { 896 + opts index.Options, 897 + ) (index.Document, error) { 899 898 repo := repos[key] 900 899 blob, err := repo.GitRepo.BlobObject(key.ID) 901 900 branches := repos[key].Branches ··· 906 905 } 907 906 908 907 if err != nil { 909 - return zoekt.Document{}, err 908 + return index.Document{}, err 910 909 } 911 910 912 911 keyFullPath := key.FullPath() ··· 916 915 917 916 contents, err := blobContents(blob) 918 917 if err != nil { 919 - return zoekt.Document{}, err 918 + return index.Document{}, err 920 919 } 921 920 922 - return zoekt.Document{ 921 + return index.Document{ 923 922 SubRepositoryPath: key.SubRepoPath, 924 923 Name: keyFullPath, 925 924 Content: contents, ··· 927 926 }, nil 928 927 } 929 928 930 - func skippedLargeDoc(key fileKey, branches []string, opts build.Options) zoekt.Document { 931 - return zoekt.Document{ 929 + func skippedLargeDoc(key fileKey, branches []string, opts index.Options) index.Document { 930 + return index.Document{ 932 931 SkipReason: fmt.Sprintf("file size exceeds maximum size %d", opts.SizeMax), 933 932 Name: key.FullPath(), 934 933 Branches: branches,

+98 -98

internal/gitindex/index_test.go

··· 33 33 "github.com/google/go-cmp/cmp" 34 34 "github.com/google/go-cmp/cmp/cmpopts" 35 35 "github.com/sourcegraph/zoekt" 36 - "github.com/sourcegraph/zoekt/build" 37 36 "github.com/sourcegraph/zoekt/ignore" 37 + "github.com/sourcegraph/zoekt/index" 38 38 "github.com/sourcegraph/zoekt/internal/shards" 39 39 "github.com/sourcegraph/zoekt/query" 40 40 ) ··· 54 54 } 55 55 opts := Options{ 56 56 RepoDir: filepath.Join(dir, "repo", ".git"), 57 - BuildOptions: build.Options{ 57 + BuildOptions: index.Options{ 58 58 RepositoryDescription: desc, 59 59 IndexDir: dir, 60 60 }, ··· 73 73 opts := Options{ 74 74 RepoDir: "does/not/exist", 75 75 Branches: []string{"main"}, 76 - BuildOptions: build.Options{ 76 + BuildOptions: index.Options{ 77 77 RepositoryDescription: desc, 78 78 IndexDir: dir, 79 79 }, ··· 92 92 executeCommand(t, dir, exec.Command("git", "init", "-b", "main", "repo")) 93 93 94 94 repoDir := filepath.Join(dir, "repo") 95 - executeCommand(t, repoDir, exec.Command("git", "config", "user.name", "Thomas")) 96 - executeCommand(t, repoDir, exec.Command("git", "config", "user.email", "thomas@google.com")) 95 + executeCommand(t, repoDir, exec.Command("git", "config", "--local", "user.name", "Thomas")) 96 + executeCommand(t, repoDir, exec.Command("git", "config", "--local", "user.email", "thomas@google.com")) 97 97 98 98 if err := os.WriteFile(filepath.Join(repoDir, "file1.go"), []byte("package main\n\nfunc main() {}\n"), 0644); err != nil { 99 99 t.Fatalf("WriteFile: %v", err) ··· 106 106 opts := Options{ 107 107 RepoDir: filepath.Join(dir, testDir), 108 108 Branches: []string{"main"}, 109 - BuildOptions: build.Options{ 109 + BuildOptions: index.Options{ 110 110 RepositoryDescription: zoekt.Repository{Name: "repo"}, 111 111 IndexDir: dir, 112 112 }, ··· 143 143 } 144 144 145 145 func TestIndexDeltaBasic(t *testing.T) { 146 - type branchToDocumentMap map[string][]zoekt.Document 146 + type branchToDocumentMap map[string][]index.Document 147 147 148 148 type step struct { 149 149 name string ··· 152 152 optFn func(t *testing.T, options *Options) 153 153 154 154 expectedFallbackToNormalBuild bool 155 - expectedDocuments []zoekt.Document 155 + expectedDocuments []index.Document 156 156 } 157 157 158 - helloWorld := zoekt.Document{Name: "hello_world.txt", Content: []byte("hello")} 158 + helloWorld := index.Document{Name: "hello_world.txt", Content: []byte("hello")} 159 159 160 - fruitV1 := zoekt.Document{Name: "best_fruit.txt", Content: []byte("strawberry")} 161 - fruitV1InFolder := zoekt.Document{Name: "the_best/best_fruit.txt", Content: fruitV1.Content} 162 - fruitV1WithNewName := zoekt.Document{Name: "new_fruit.txt", Content: fruitV1.Content} 160 + fruitV1 := index.Document{Name: "best_fruit.txt", Content: []byte("strawberry")} 161 + fruitV1InFolder := index.Document{Name: "the_best/best_fruit.txt", Content: fruitV1.Content} 162 + fruitV1WithNewName := index.Document{Name: "new_fruit.txt", Content: fruitV1.Content} 163 163 164 - fruitV2 := zoekt.Document{Name: "best_fruit.txt", Content: []byte("grapes")} 165 - fruitV2InFolder := zoekt.Document{Name: "the_best/best_fruit.txt", Content: fruitV2.Content} 164 + fruitV2 := index.Document{Name: "best_fruit.txt", Content: []byte("grapes")} 165 + fruitV2InFolder := index.Document{Name: "the_best/best_fruit.txt", Content: fruitV2.Content} 166 166 167 - fruitV3 := zoekt.Document{Name: "best_fruit.txt", Content: []byte("oranges")} 168 - fruitV4 := zoekt.Document{Name: "best_fruit.txt", Content: []byte("apples")} 167 + fruitV3 := index.Document{Name: "best_fruit.txt", Content: []byte("oranges")} 168 + fruitV4 := index.Document{Name: "best_fruit.txt", Content: []byte("apples")} 169 169 170 - foo := zoekt.Document{Name: "foo.txt", Content: []byte("bar")} 170 + foo := index.Document{Name: "foo.txt", Content: []byte("bar")} 171 171 172 - emptySourcegraphIgnore := zoekt.Document{Name: ignore.IgnoreFile} 173 - sourcegraphIgnoreWithContent := zoekt.Document{Name: ignore.IgnoreFile, Content: []byte("good_content.txt")} 172 + emptySourcegraphIgnore := index.Document{Name: ignore.IgnoreFile} 173 + sourcegraphIgnoreWithContent := index.Document{Name: ignore.IgnoreFile, Content: []byte("good_content.txt")} 174 174 175 175 for _, test := range []struct { 176 176 name string ··· 184 184 { 185 185 name: "setup", 186 186 addedDocuments: branchToDocumentMap{ 187 - "main": []zoekt.Document{helloWorld, fruitV1}, 187 + "main": []index.Document{helloWorld, fruitV1}, 188 188 }, 189 189 190 - expectedDocuments: []zoekt.Document{helloWorld, fruitV1}, 190 + expectedDocuments: []index.Document{helloWorld, fruitV1}, 191 191 }, 192 192 { 193 193 name: "add newer version of fruits", 194 194 addedDocuments: branchToDocumentMap{ 195 - "main": []zoekt.Document{fruitV2}, 195 + "main": []index.Document{fruitV2}, 196 196 }, 197 197 optFn: func(t *testing.T, o *Options) { 198 198 o.BuildOptions.IsDelta = true 199 199 }, 200 200 201 - expectedDocuments: []zoekt.Document{helloWorld, fruitV2}, 201 + expectedDocuments: []index.Document{helloWorld, fruitV2}, 202 202 }, 203 203 }, 204 204 }, ··· 209 209 { 210 210 name: "setup", 211 211 addedDocuments: branchToDocumentMap{ 212 - "main": []zoekt.Document{foo, fruitV1InFolder}, 212 + "main": []index.Document{foo, fruitV1InFolder}, 213 213 }, 214 214 215 - expectedDocuments: []zoekt.Document{foo, fruitV1InFolder}, 215 + expectedDocuments: []index.Document{foo, fruitV1InFolder}, 216 216 }, 217 217 { 218 218 name: "add newer version of fruits inside folder", 219 219 addedDocuments: branchToDocumentMap{ 220 - "main": []zoekt.Document{fruitV2InFolder}, 220 + "main": []index.Document{fruitV2InFolder}, 221 221 }, 222 222 optFn: func(t *testing.T, o *Options) { 223 223 o.BuildOptions.IsDelta = true 224 224 }, 225 225 226 - expectedDocuments: []zoekt.Document{foo, fruitV2InFolder}, 226 + expectedDocuments: []index.Document{foo, fruitV2InFolder}, 227 227 }, 228 228 }, 229 229 }, ··· 234 234 { 235 235 name: "setup", 236 236 addedDocuments: branchToDocumentMap{ 237 - "main": []zoekt.Document{helloWorld, fruitV1}, 237 + "main": []index.Document{helloWorld, fruitV1}, 238 238 }, 239 239 240 - expectedDocuments: []zoekt.Document{helloWorld, fruitV1}, 240 + expectedDocuments: []index.Document{helloWorld, fruitV1}, 241 241 }, 242 242 { 243 243 name: "add new file - foo", 244 244 addedDocuments: branchToDocumentMap{ 245 - "main": []zoekt.Document{foo}, 245 + "main": []index.Document{foo}, 246 246 }, 247 247 optFn: func(t *testing.T, o *Options) { 248 248 o.BuildOptions.IsDelta = true 249 249 }, 250 250 251 - expectedDocuments: []zoekt.Document{helloWorld, fruitV1, foo}, 251 + expectedDocuments: []index.Document{helloWorld, fruitV1, foo}, 252 252 }, 253 253 }, 254 254 }, ··· 259 259 { 260 260 name: "setup", 261 261 addedDocuments: branchToDocumentMap{ 262 - "main": []zoekt.Document{helloWorld, fruitV1, foo}, 262 + "main": []index.Document{helloWorld, fruitV1, foo}, 263 263 }, 264 264 265 - expectedDocuments: []zoekt.Document{helloWorld, fruitV1, foo}, 265 + expectedDocuments: []index.Document{helloWorld, fruitV1, foo}, 266 266 }, 267 267 { 268 268 name: "delete foo file", 269 269 addedDocuments: nil, 270 270 deletedDocuments: branchToDocumentMap{ 271 - "main": []zoekt.Document{foo}, 271 + "main": []index.Document{foo}, 272 272 }, 273 273 274 274 optFn: func(t *testing.T, o *Options) { 275 275 o.BuildOptions.IsDelta = true 276 276 }, 277 277 278 - expectedDocuments: []zoekt.Document{helloWorld, fruitV1}, 278 + expectedDocuments: []index.Document{helloWorld, fruitV1}, 279 279 }, 280 280 }, 281 281 }, ··· 286 286 { 287 287 name: "setup", 288 288 addedDocuments: branchToDocumentMap{ 289 - "main": []zoekt.Document{fruitV1}, 290 - "release": []zoekt.Document{fruitV2}, 291 - "dev": []zoekt.Document{fruitV3}, 289 + "main": []index.Document{fruitV1}, 290 + "release": []index.Document{fruitV2}, 291 + "dev": []index.Document{fruitV3}, 292 292 }, 293 293 294 - expectedDocuments: []zoekt.Document{fruitV1, fruitV2, fruitV3}, 294 + expectedDocuments: []index.Document{fruitV1, fruitV2, fruitV3}, 295 295 }, 296 296 { 297 297 name: "replace fruits v3 with v4 on 'dev', delete fruits on 'main'", 298 298 addedDocuments: branchToDocumentMap{ 299 - "dev": []zoekt.Document{fruitV4}, 299 + "dev": []index.Document{fruitV4}, 300 300 }, 301 301 deletedDocuments: branchToDocumentMap{ 302 - "main": []zoekt.Document{fruitV1}, 302 + "main": []index.Document{fruitV1}, 303 303 }, 304 304 305 305 optFn: func(t *testing.T, o *Options) { 306 306 o.BuildOptions.IsDelta = true 307 307 }, 308 308 309 - expectedDocuments: []zoekt.Document{fruitV2, fruitV4}, 309 + expectedDocuments: []index.Document{fruitV2, fruitV4}, 310 310 }, 311 311 }, 312 312 }, ··· 317 317 { 318 318 name: "setup", 319 319 addedDocuments: branchToDocumentMap{ 320 - "main": []zoekt.Document{fruitV1}, 321 - "release": []zoekt.Document{fruitV2}, 320 + "main": []index.Document{fruitV1}, 321 + "release": []index.Document{fruitV2}, 322 322 }, 323 - expectedDocuments: []zoekt.Document{fruitV1, fruitV2}, 323 + expectedDocuments: []index.Document{fruitV1, fruitV2}, 324 324 }, 325 325 { 326 326 name: "rename fruits file on 'main' + ensure that unmodified fruits file on 'release' is still searchable", 327 327 addedDocuments: branchToDocumentMap{ 328 - "main": []zoekt.Document{fruitV1WithNewName}, 328 + "main": []index.Document{fruitV1WithNewName}, 329 329 }, 330 330 deletedDocuments: branchToDocumentMap{ 331 - "main": []zoekt.Document{fruitV1}, 331 + "main": []index.Document{fruitV1}, 332 332 }, 333 333 334 334 optFn: func(t *testing.T, o *Options) { 335 335 o.BuildOptions.IsDelta = true 336 336 }, 337 337 338 - expectedDocuments: []zoekt.Document{fruitV1WithNewName, fruitV2}, 338 + expectedDocuments: []index.Document{fruitV1WithNewName, fruitV2}, 339 339 }, 340 340 }, 341 341 }, ··· 346 346 { 347 347 name: "setup", 348 348 addedDocuments: branchToDocumentMap{ 349 - "main": []zoekt.Document{fruitV1}, 350 - "dev": []zoekt.Document{fruitV2}, 349 + "main": []index.Document{fruitV1}, 350 + "dev": []index.Document{fruitV2}, 351 351 }, 352 - expectedDocuments: []zoekt.Document{fruitV1, fruitV2}, 352 + expectedDocuments: []index.Document{fruitV1, fruitV2}, 353 353 }, 354 354 { 355 355 name: "switch main to dev's older version of fruits + bump dev's fruits to new version", 356 356 addedDocuments: branchToDocumentMap{ 357 - "main": []zoekt.Document{fruitV2}, 358 - "dev": []zoekt.Document{fruitV3}, 357 + "main": []index.Document{fruitV2}, 358 + "dev": []index.Document{fruitV3}, 359 359 }, 360 360 361 361 optFn: func(t *testing.T, o *Options) { 362 362 o.BuildOptions.IsDelta = true 363 363 }, 364 364 365 - expectedDocuments: []zoekt.Document{fruitV2, fruitV3}, 365 + expectedDocuments: []index.Document{fruitV2, fruitV3}, 366 366 }, 367 367 }, 368 368 }, ··· 373 373 { 374 374 name: "setup", 375 375 addedDocuments: branchToDocumentMap{ 376 - "main": []zoekt.Document{fruitV1, foo}, 377 - "dev": []zoekt.Document{helloWorld}, 376 + "main": []index.Document{fruitV1, foo}, 377 + "dev": []index.Document{helloWorld}, 378 378 }, 379 - expectedDocuments: []zoekt.Document{fruitV1, foo, helloWorld}, 379 + expectedDocuments: []index.Document{fruitV1, foo, helloWorld}, 380 380 }, 381 381 { 382 382 name: "first no-op (normal build -> delta build)", ··· 384 384 o.BuildOptions.IsDelta = true 385 385 }, 386 386 387 - expectedDocuments: []zoekt.Document{fruitV1, foo, helloWorld}, 387 + expectedDocuments: []index.Document{fruitV1, foo, helloWorld}, 388 388 }, 389 389 { 390 390 name: "second no-op (delta build -> delta build)", ··· 392 392 o.BuildOptions.IsDelta = true 393 393 }, 394 394 395 - expectedDocuments: []zoekt.Document{fruitV1, foo, helloWorld}, 395 + expectedDocuments: []index.Document{fruitV1, foo, helloWorld}, 396 396 }, 397 397 }, 398 398 }, ··· 403 403 { 404 404 name: "attempt delta build on a repository that hasn't been indexed yet", 405 405 addedDocuments: branchToDocumentMap{ 406 - "main": []zoekt.Document{helloWorld}, 406 + "main": []index.Document{helloWorld}, 407 407 }, 408 408 optFn: func(t *testing.T, o *Options) { 409 409 o.BuildOptions.IsDelta = true 410 410 }, 411 411 412 412 expectedFallbackToNormalBuild: true, 413 - expectedDocuments: []zoekt.Document{helloWorld}, 413 + expectedDocuments: []index.Document{helloWorld}, 414 414 }, 415 415 }, 416 416 }, ··· 421 421 { 422 422 name: "setup", 423 423 addedDocuments: branchToDocumentMap{ 424 - "main": []zoekt.Document{fruitV1}, 425 - "release": []zoekt.Document{fruitV2}, 426 - "dev": []zoekt.Document{fruitV3}, 424 + "main": []index.Document{fruitV1}, 425 + "release": []index.Document{fruitV2}, 426 + "dev": []index.Document{fruitV3}, 427 427 }, 428 428 429 - expectedDocuments: []zoekt.Document{fruitV1, fruitV2, fruitV3}, 429 + expectedDocuments: []index.Document{fruitV1, fruitV2, fruitV3}, 430 430 }, 431 431 { 432 432 name: "try delta build after dropping 'main' branch from index ", 433 433 addedDocuments: branchToDocumentMap{ 434 - "release": []zoekt.Document{fruitV4}, 434 + "release": []index.Document{fruitV4}, 435 435 }, 436 436 optFn: func(t *testing.T, o *Options) { 437 437 o.Branches = []string{"HEAD", "release", "dev"} // a bit of a hack to override it this way, but it gets the job done ··· 439 439 }, 440 440 441 441 expectedFallbackToNormalBuild: true, 442 - expectedDocuments: []zoekt.Document{fruitV3, fruitV4}, 442 + expectedDocuments: []index.Document{fruitV3, fruitV4}, 443 443 }, 444 444 }, 445 445 }, ··· 450 450 { 451 451 name: "setup", 452 452 addedDocuments: branchToDocumentMap{ 453 - "main": []zoekt.Document{fruitV1}, 453 + "main": []index.Document{fruitV1}, 454 454 }, 455 455 456 - expectedDocuments: []zoekt.Document{fruitV1}, 456 + expectedDocuments: []index.Document{fruitV1}, 457 457 }, 458 458 { 459 459 name: "try delta build after updating Disable CTags index option", 460 460 addedDocuments: branchToDocumentMap{ 461 - "main": []zoekt.Document{fruitV2}, 461 + "main": []index.Document{fruitV2}, 462 462 }, 463 463 optFn: func(t *testing.T, o *Options) { 464 464 o.BuildOptions.IsDelta = true ··· 466 466 }, 467 467 468 468 expectedFallbackToNormalBuild: true, 469 - expectedDocuments: []zoekt.Document{fruitV2}, 469 + expectedDocuments: []index.Document{fruitV2}, 470 470 }, 471 471 { 472 472 name: "try delta build after reverting Disable CTags index option", 473 473 addedDocuments: branchToDocumentMap{ 474 - "main": []zoekt.Document{fruitV3}, 474 + "main": []index.Document{fruitV3}, 475 475 }, 476 476 optFn: func(t *testing.T, o *Options) { 477 477 o.BuildOptions.IsDelta = true ··· 479 479 }, 480 480 481 481 expectedFallbackToNormalBuild: true, 482 - expectedDocuments: []zoekt.Document{fruitV3}, 482 + expectedDocuments: []index.Document{fruitV3}, 483 483 }, 484 484 }, 485 485 }, ··· 490 490 { 491 491 name: "setup", 492 492 addedDocuments: branchToDocumentMap{ 493 - "main": []zoekt.Document{fruitV1}, 493 + "main": []index.Document{fruitV1}, 494 494 }, 495 495 496 - expectedDocuments: []zoekt.Document{fruitV1}, 496 + expectedDocuments: []index.Document{fruitV1}, 497 497 }, 498 498 { 499 499 name: "try delta build after updating Disable CTags index option", 500 500 addedDocuments: branchToDocumentMap{ 501 - "main": []zoekt.Document{fruitV2}, 501 + "main": []index.Document{fruitV2}, 502 502 }, 503 503 optFn: func(t *testing.T, o *Options) { 504 504 o.BuildOptions.IsDelta = true ··· 506 506 }, 507 507 508 508 expectedFallbackToNormalBuild: true, 509 - expectedDocuments: []zoekt.Document{fruitV2}, 509 + expectedDocuments: []index.Document{fruitV2}, 510 510 }, 511 511 { 512 512 name: "try another delta build while CTags is still disabled", 513 513 addedDocuments: branchToDocumentMap{ 514 - "main": []zoekt.Document{fruitV3}, 514 + "main": []index.Document{fruitV3}, 515 515 }, 516 516 optFn: func(t *testing.T, o *Options) { 517 517 o.BuildOptions.IsDelta = true 518 518 o.BuildOptions.DisableCTags = true 519 519 }, 520 520 521 - expectedDocuments: []zoekt.Document{fruitV3}, 521 + expectedDocuments: []index.Document{fruitV3}, 522 522 }, 523 523 }, 524 524 }, ··· 529 529 { 530 530 name: "setup", 531 531 addedDocuments: branchToDocumentMap{ 532 - "main": []zoekt.Document{emptySourcegraphIgnore}, 532 + "main": []index.Document{emptySourcegraphIgnore}, 533 533 }, 534 534 535 - expectedDocuments: []zoekt.Document{emptySourcegraphIgnore}, 535 + expectedDocuments: []index.Document{emptySourcegraphIgnore}, 536 536 }, 537 537 { 538 538 name: "attempt delta build after modifying ignore file", 539 539 addedDocuments: branchToDocumentMap{ 540 - "main": []zoekt.Document{sourcegraphIgnoreWithContent}, 540 + "main": []index.Document{sourcegraphIgnoreWithContent}, 541 541 }, 542 542 optFn: func(t *testing.T, o *Options) { 543 543 o.BuildOptions.IsDelta = true 544 544 }, 545 545 546 546 expectedFallbackToNormalBuild: true, 547 - expectedDocuments: []zoekt.Document{sourcegraphIgnoreWithContent}, 547 + expectedDocuments: []index.Document{sourcegraphIgnoreWithContent}, 548 548 }, 549 549 }, 550 550 }, ··· 555 555 { 556 556 name: "setup: first shard", 557 557 addedDocuments: branchToDocumentMap{ 558 - "main": []zoekt.Document{foo}, 558 + "main": []index.Document{foo}, 559 559 }, 560 560 561 - expectedDocuments: []zoekt.Document{foo}, 561 + expectedDocuments: []index.Document{foo}, 562 562 }, 563 563 { 564 564 name: "setup: second shard (delta)", 565 565 addedDocuments: branchToDocumentMap{ 566 - "main": []zoekt.Document{fruitV1}, 566 + "main": []index.Document{fruitV1}, 567 567 }, 568 568 optFn: func(t *testing.T, o *Options) { 569 569 o.BuildOptions.IsDelta = true 570 570 }, 571 571 572 - expectedDocuments: []zoekt.Document{foo, fruitV1}, 572 + expectedDocuments: []index.Document{foo, fruitV1}, 573 573 }, 574 574 { 575 575 name: "setup: third shard (delta)", 576 576 addedDocuments: branchToDocumentMap{ 577 - "main": []zoekt.Document{helloWorld}, 577 + "main": []index.Document{helloWorld}, 578 578 }, 579 579 optFn: func(t *testing.T, o *Options) { 580 580 o.BuildOptions.IsDelta = true 581 581 }, 582 582 583 - expectedDocuments: []zoekt.Document{foo, fruitV1, helloWorld}, 583 + expectedDocuments: []index.Document{foo, fruitV1, helloWorld}, 584 584 }, 585 585 { 586 586 name: "attempt another delta build after we already blew past the shard threshold", 587 587 addedDocuments: branchToDocumentMap{ 588 - "main": []zoekt.Document{fruitV2InFolder}, 588 + "main": []index.Document{fruitV2InFolder}, 589 589 }, 590 590 optFn: func(t *testing.T, o *Options) { 591 591 o.DeltaShardNumberFallbackThreshold = 2 ··· 593 593 }, 594 594 595 595 expectedFallbackToNormalBuild: true, 596 - expectedDocuments: []zoekt.Document{foo, fruitV1, helloWorld, fruitV2InFolder}, 596 + expectedDocuments: []index.Document{foo, fruitV1, helloWorld, fruitV2InFolder}, 597 597 }, 598 598 }, 599 599 }, ··· 664 664 } 665 665 666 666 // setup: prepare indexOptions with given overrides 667 - buildOptions := build.Options{ 667 + buildOptions := index.Options{ 668 668 IndexDir: indexDir, 669 669 RepositoryDescription: zoekt.Repository{ 670 670 Name: "repository", ··· 736 736 t.Fatalf("Search: %s", err) 737 737 } 738 738 739 - var receivedDocuments []zoekt.Document 739 + var receivedDocuments []index.Document 740 740 for _, f := range result.Files { 741 - receivedDocuments = append(receivedDocuments, zoekt.Document{ 741 + receivedDocuments = append(receivedDocuments, index.Document{ 742 742 Name: f.FileName, 743 743 Content: f.Content, 744 744 }) 745 745 } 746 746 747 - for _, docs := range [][]zoekt.Document{step.expectedDocuments, receivedDocuments} { 747 + for _, docs := range [][]index.Document{step.expectedDocuments, receivedDocuments} { 748 748 sort.Slice(docs, func(i, j int) bool { 749 749 a, b := docs[i], docs[j] 750 750 ··· 763 763 } 764 764 765 765 compareOptions := []cmp.Option{ 766 - cmpopts.IgnoreFields(zoekt.Document{}, "Branches"), 766 + cmpopts.IgnoreFields(index.Document{}, "Branches"), 767 767 cmpopts.EquateEmpty(), 768 768 } 769 769 ··· 923 923 assertOutput := func(templateText string, want string) { 924 924 t.Helper() 925 925 926 - tt, err := zoekt.ParseTemplate(templateText) 926 + tt, err := index.ParseTemplate(templateText) 927 927 if err != nil { 928 928 t.Fatal(err) 929 929 } ··· 968 968 Submodules: false, 969 969 BranchPrefix: "refs/heads/", 970 970 Branches: []string{"main"}, 971 - BuildOptions: build.Options{ 971 + BuildOptions: index.Options{ 972 972 RepositoryDescription: zoekt.Repository{ 973 973 Name: "test-repo", 974 974 URL: "https://github.com/example/test-repo",

+8 -8

internal/gitindex/tree_test.go

··· 30 30 "github.com/google/go-cmp/cmp" 31 31 "github.com/grafana/regexp" 32 32 "github.com/sourcegraph/zoekt" 33 - "github.com/sourcegraph/zoekt/build" 34 33 "github.com/sourcegraph/zoekt/ignore" 34 + "github.com/sourcegraph/zoekt/index" 35 35 "github.com/sourcegraph/zoekt/internal/shards" 36 36 "github.com/sourcegraph/zoekt/query" 37 37 ) ··· 202 202 203 203 indexDir := t.TempDir() 204 204 205 - buildOpts := build.Options{ 205 + buildOpts := index.Options{ 206 206 IndexDir: indexDir, 207 207 } 208 208 opts := Options{ ··· 306 306 307 307 indexDir := t.TempDir() 308 308 309 - buildOpts := build.Options{ 309 + buildOpts := index.Options{ 310 310 IndexDir: indexDir, 311 311 } 312 312 opts := Options{ ··· 363 363 364 364 indexDir := t.TempDir() 365 365 366 - buildOpts := build.Options{ 366 + buildOpts := index.Options{ 367 367 IndexDir: indexDir, 368 368 } 369 369 ··· 429 429 430 430 indexDir := t.TempDir() 431 431 432 - buildOpts := build.Options{ 432 + buildOpts := index.Options{ 433 433 IndexDir: indexDir, 434 434 RepositoryDescription: zoekt.Repository{ 435 435 Name: "repo", ··· 475 475 476 476 indexDir := t.TempDir() 477 477 478 - buildOpts := build.Options{ 478 + buildOpts := index.Options{ 479 479 IndexDir: indexDir, 480 480 RepositoryDescription: zoekt.Repository{ 481 481 Name: "gerrit.googlesource.com/adir", ··· 507 507 508 508 indexDir := t.TempDir() 509 509 510 - buildOpts := build.Options{ 510 + buildOpts := index.Options{ 511 511 IndexDir: indexDir, 512 512 RepositoryDescription: zoekt.Repository{ 513 513 Name: "repo", ··· 560 560 t.Fatalf("createMultibranchRepo: %v", err) 561 561 } 562 562 563 - buildOpts := build.Options{ 563 + buildOpts := index.Options{ 564 564 IndexDir: indexDir, 565 565 RepositoryDescription: zoekt.Repository{ 566 566 Name: "repo",

+2 -2

internal/profiler/profiler.go

··· 5 5 "os" 6 6 7 7 "cloud.google.com/go/profiler" 8 - "github.com/sourcegraph/zoekt" 8 + "github.com/sourcegraph/zoekt/index" 9 9 ) 10 10 11 11 // Init starts the supported profilers IFF the environment variable is set. ··· 13 13 if os.Getenv("GOOGLE_CLOUD_PROFILER_ENABLED") != "" { 14 14 err := profiler.Start(profiler.Config{ 15 15 Service: svcName, 16 - ServiceVersion: zoekt.Version, 16 + ServiceVersion: index.Version, 17 17 MutexProfiling: true, 18 18 AllocForceGC: true, 19 19 })

+3 -3

internal/shards/aggregate.go

··· 7 7 8 8 "github.com/prometheus/client_golang/prometheus" 9 9 "github.com/prometheus/client_golang/prometheus/promauto" 10 - 11 10 "github.com/sourcegraph/zoekt" 11 + "github.com/sourcegraph/zoekt/index" 12 12 ) 13 13 14 14 var metricFinalAggregateSize = promauto.NewHistogramVec(prometheus.HistogramOpts{ ··· 46 46 if len(r.Files) > 0 { 47 47 c.aggregate.Files = append(c.aggregate.Files, r.Files...) 48 48 49 - c.aggregate.Files = zoekt.SortAndTruncateFiles(c.aggregate.Files, c.opts) 49 + c.aggregate.Files = index.SortAndTruncateFiles(c.aggregate.Files, c.opts) 50 50 51 51 for k, v := range r.RepoURLs { 52 52 c.aggregate.RepoURLs[k] = v ··· 150 150 151 151 // limitSender wraps a sender and calls cancel once the truncator has finished 152 152 // truncating. 153 - func limitSender(cancel context.CancelFunc, sender zoekt.Sender, truncator zoekt.DisplayTruncator) zoekt.Sender { 153 + func limitSender(cancel context.CancelFunc, sender zoekt.Sender, truncator index.DisplayTruncator) zoekt.Sender { 154 154 return zoekt.SenderFunc(func(result *zoekt.SearchResult) { 155 155 var hasMore bool 156 156 result.Files, hasMore = truncator(result.Files)

+6 -5

internal/shards/eval_test.go

··· 6 6 "testing" 7 7 8 8 "github.com/sourcegraph/zoekt" 9 + "github.com/sourcegraph/zoekt/index" 9 10 "github.com/sourcegraph/zoekt/query" 10 11 ) 11 12 12 13 func TestSearchTypeRepo(t *testing.T) { 13 14 ss := newShardedSearcher(2) 14 15 nextShardNum := 1 15 - addShard := func(docs ...zoekt.Document) { 16 + addShard := func(docs ...index.Document) { 16 17 b := testIndexBuilder(t, &zoekt.Repository{ID: 1, Name: "reponame"}, docs...) 17 18 shard := searcherForTest(t, b) 18 19 ss.replace(map[string]zoekt.Searcher{fmt.Sprintf("key-%d", nextShardNum): shard}) 19 20 nextShardNum++ 20 21 } 21 22 addShard( 22 - zoekt.Document{Name: "f1", Content: []byte("bla the needle")}, 23 - zoekt.Document{Name: "f2", Content: []byte("another file another needle")}) 23 + index.Document{Name: "f1", Content: []byte("bla the needle")}, 24 + index.Document{Name: "f2", Content: []byte("another file another needle")}) 24 25 addShard( 25 - zoekt.Document{Name: "f3", Content: []byte("another shard")}) 26 + index.Document{Name: "f3", Content: []byte("another shard")}) 26 27 27 28 searcher := &typeRepoSearcher{ss} 28 29 search := func(q query.Q, o ...zoekt.SearchOptions) *zoekt.SearchResult { ··· 96 97 t.Fatalf("got %v, want 0 matches", len(res.Files)) 97 98 } 98 99 99 - // no match by path 100 + // no index by path 100 101 res = search(query.NewAnd( 101 102 &query.Type{ 102 103 Type: query.TypeRepo,

+1 -1

internal/shards/sched.go

··· 340 340 // - batch timedout 341 341 // - released 342 342 // 343 - // We have separate gauges and counters for exclusive processes which match 343 + // We have separate gauges and counters for exclusive processes which index 344 344 // what we track for normal processes: 345 345 // 346 346 // - exclusive queued

+7 -6

internal/shards/shards.go

··· 28 28 "sync" 29 29 "time" 30 30 31 + "github.com/sourcegraph/zoekt/index" 31 32 "golang.org/x/sync/semaphore" 32 33 33 34 "github.com/prometheus/client_golang/prometheus" ··· 623 624 // For streaming, the wrapping has to happen in the inverted order. 624 625 sender = copyFileSender(sender) 625 626 626 - if truncator, hasLimits := zoekt.NewDisplayTruncator(opts); hasLimits { 627 + if truncator, hasLimits := index.NewDisplayTruncator(opts); hasLimits { 627 628 var cancel context.CancelFunc 628 629 ctx, cancel = context.WithCancel(ctx) 629 630 defer cancel() ··· 806 807 807 808 // sendByRepository splits a zoekt.SearchResult by repository and calls 808 809 // sender.Send for each batch. Ranking in Sourcegraph expects zoekt.SearchResult 809 - // to contain results with the same zoekt.SearchResult.Priority only. 810 + // to contain results with the same zoekt.SearchResult.priority only. 810 811 // 811 812 // We split by repository instead of by priority because it is easier to set 812 813 // RepoURLs and LineFragments in zoekt.SearchResult. 813 814 func sendByRepository(result *zoekt.SearchResult, opts *zoekt.SearchOptions, sender zoekt.Sender) { 814 815 if len(result.RepoURLs) <= 1 || len(result.Files) == 0 { 815 - zoekt.SortFiles(result.Files) 816 + index.SortFiles(result.Files) 816 817 sender.Send(result) 817 818 return 818 819 } 819 820 820 821 send := func(repoName string, a, b int, stats zoekt.Stats) { 821 - zoekt.SortFiles(result.Files[a:b]) 822 + index.SortFiles(result.Files[a:b]) 822 823 sender.Send(&zoekt.SearchResult{ 823 824 Stats: stats, 824 825 Progress: zoekt.Progress{ ··· 1203 1204 return nil, err 1204 1205 } 1205 1206 1206 - iFile, err := zoekt.NewIndexFile(f) 1207 + iFile, err := index.NewIndexFile(f) 1207 1208 if err != nil { 1208 1209 return nil, err 1209 1210 } 1210 - s, err := zoekt.NewSearcher(iFile) 1211 + s, err := index.NewSearcher(iFile) 1211 1212 if err != nil { 1212 1213 iFile.Close() 1213 1214 return nil, fmt.Errorf("NewSearcher(%s): %v", fn, err)

+41 -40

internal/shards/shards_test.go

··· 36 36 "github.com/google/go-cmp/cmp" 37 37 "github.com/google/go-cmp/cmp/cmpopts" 38 38 "github.com/grafana/regexp" 39 + "github.com/sourcegraph/zoekt/index" 39 40 40 41 "github.com/sourcegraph/zoekt" 41 42 "github.com/sourcegraph/zoekt/query" ··· 199 200 ss := newShardedSearcher(1) 200 201 201 202 var nextShardNum int 202 - addShard := func(repo string, priority float64, docs ...zoekt.Document) { 203 + addShard := func(repo string, priority float64, docs ...index.Document) { 203 204 r := &zoekt.Repository{ID: hash(repo), Name: repo} 204 205 r.RawConfig = map[string]string{ 205 206 "public": "1", ··· 213 214 nextShardNum++ 214 215 } 215 216 216 - addShard("weekend-project", 20, zoekt.Document{Name: "f2", Content: []byte("foo bas")}) 217 - addShard("moderately-popular", 500, zoekt.Document{Name: "f3", Content: []byte("foo bar")}) 218 - addShard("weekend-project-2", 20, zoekt.Document{Name: "f2", Content: []byte("foo bas")}) 219 - addShard("super-star", 5000, zoekt.Document{Name: "f1", Content: []byte("foo bar bas")}) 217 + addShard("weekend-project", 20, index.Document{Name: "f2", Content: []byte("foo bas")}) 218 + addShard("moderately-popular", 500, index.Document{Name: "f3", Content: []byte("foo bar")}) 219 + addShard("weekend-project-2", 20, index.Document{Name: "f2", Content: []byte("foo bas")}) 220 + addShard("super-star", 5000, index.Document{Name: "f1", Content: []byte("foo bar bas")}) 220 221 221 222 want := []string{ 222 223 "super-star", ··· 241 242 ss := newShardedSearcher(1) 242 243 243 244 var nextShardNum int 244 - addShard := func(repo string, rank uint16, docs ...zoekt.Document) { 245 + addShard := func(repo string, rank uint16, docs ...index.Document) { 245 246 r := &zoekt.Repository{ID: hash(repo), Name: repo} 246 247 r.RawConfig = map[string]string{ 247 248 "public": "1", ··· 255 256 nextShardNum++ 256 257 } 257 258 258 - addShard("old-project", 1, zoekt.Document{Name: "f1", Content: []byte("foobar")}) 259 - addShard("recent", 2, zoekt.Document{Name: "f2", Content: []byte("foobaz")}) 260 - addShard("old-project-2", 1, zoekt.Document{Name: "f3", Content: []byte("foo bar")}) 261 - addShard("new", 3, zoekt.Document{Name: "f4", Content: []byte("foo baz")}, 262 - zoekt.Document{Name: "f5", Content: []byte("fooooo")}) 259 + addShard("old-project", 1, index.Document{Name: "f1", Content: []byte("foobar")}) 260 + addShard("recent", 2, index.Document{Name: "f2", Content: []byte("foobaz")}) 261 + addShard("old-project-2", 1, index.Document{Name: "f3", Content: []byte("foo bar")}) 262 + addShard("new", 3, index.Document{Name: "f4", Content: []byte("foo baz")}, 263 + index.Document{Name: "f5", Content: []byte("fooooo")}) 263 264 264 265 // Run a stream search and gather the results 265 266 var results []*zoekt.SearchResult ··· 409 410 } 410 411 411 412 func TestUnloadIndex(t *testing.T) { 412 - b := testIndexBuilder(t, nil, zoekt.Document{ 413 + b := testIndexBuilder(t, nil, index.Document{ 413 414 Name: "filename", 414 415 Content: []byte("needle needle needle"), 415 416 }) ··· 420 421 } 421 422 indexBytes := buf.Bytes() 422 423 indexFile := &memSeeker{indexBytes} 423 - searcher, err := zoekt.NewSearcher(indexFile) 424 + searcher, err := index.NewSearcher(indexFile) 424 425 if err != nil { 425 426 t.Fatalf("NewSearcher: %v", err) 426 427 } ··· 471 472 }, 472 473 } 473 474 474 - doc := zoekt.Document{ 475 + doc := index.Document{ 475 476 Name: "foo.go", 476 477 Content: []byte("bar\nbaz"), 477 478 Branches: []string{"main", "dev"}, ··· 608 609 } 609 610 } 610 611 611 - func testIndexBuilder(t testing.TB, repo *zoekt.Repository, docs ...zoekt.Document) *zoekt.IndexBuilder { 612 - b, err := zoekt.NewIndexBuilder(repo) 612 + func testIndexBuilder(t testing.TB, repo *zoekt.Repository, docs ...index.Document) *index.IndexBuilder { 613 + b, err := index.NewIndexBuilder(repo) 613 614 if err != nil { 614 615 t.Fatalf("NewIndexBuilder: %v", err) 615 616 } ··· 622 623 return b 623 624 } 624 625 625 - func searcherForTest(t testing.TB, b *zoekt.IndexBuilder) zoekt.Searcher { 626 + func searcherForTest(t testing.TB, b *index.IndexBuilder) zoekt.Searcher { 626 627 var buf bytes.Buffer 627 628 if err := b.Write(&buf); err != nil { 628 629 t.Fatal(err) 629 630 } 630 631 f := &memSeeker{buf.Bytes()} 631 632 632 - searcher, err := zoekt.NewSearcher(f) 633 + searcher, err := index.NewSearcher(f) 633 634 if err != nil { 634 635 t.Fatalf("NewSearcher: %v", err) 635 636 } ··· 650 651 func testSearcherForRepo(b testing.TB, r *zoekt.Repository, numFiles int) zoekt.Searcher { 651 652 builder := testIndexBuilder(b, r) 652 653 653 - if err := builder.Add(zoekt.Document{ 654 + if err := builder.Add(index.Document{ 654 655 Name: fmt.Sprintf("%s/filename-%d.go", r.Name, 0), 655 656 Content: []byte("needle needle needle haystack"), 656 657 }); err != nil { ··· 658 659 } 659 660 660 661 for i := 1; i < numFiles; i++ { 661 - if err := builder.Add(zoekt.Document{ 662 + if err := builder.Add(index.Document{ 662 663 Name: fmt.Sprintf("%s/filename-%d.go", r.Name, i), 663 664 Content: []byte("haystack haystack haystack"), 664 665 }); err != nil { ··· 754 755 ss := newShardedSearcher(1) 755 756 756 757 var nextShardNum int 757 - addShard := func(repo string, rawConfig map[string]string, docs ...zoekt.Document) { 758 + addShard := func(repo string, rawConfig map[string]string, docs ...index.Document) { 758 759 r := &zoekt.Repository{Name: repo} 759 760 r.RawConfig = rawConfig 760 761 b := testIndexBuilder(t, r, docs...) ··· 762 763 ss.replace(map[string]zoekt.Searcher{fmt.Sprintf("key-%d", nextShardNum): shard}) 763 764 nextShardNum++ 764 765 } 765 - addShard("public", map[string]string{"public": "1"}, zoekt.Document{Name: "f1", Content: []byte("foo bar bas")}) 766 - addShard("private_archived", map[string]string{"archived": "1"}, zoekt.Document{Name: "f2", Content: []byte("foo bas")}) 767 - addShard("public_fork", map[string]string{"public": "1", "fork": "1"}, zoekt.Document{Name: "f3", Content: []byte("foo bar")}) 766 + addShard("public", map[string]string{"public": "1"}, index.Document{Name: "f1", Content: []byte("foo bar bas")}) 767 + addShard("private_archived", map[string]string{"archived": "1"}, index.Document{Name: "f2", Content: []byte("foo bas")}) 768 + addShard("public_fork", map[string]string{"public": "1", "fork": "1"}, index.Document{Name: "f3", Content: []byte("foo bar")}) 768 769 769 770 cases := []struct { 770 771 pattern string ··· 979 980 func TestFileBasedSearch(t *testing.T) { 980 981 cases := []struct { 981 982 name string 982 - testShardedSearch func(t *testing.T, q query.Q, ib *zoekt.IndexBuilder, useDocumentRanks bool) []zoekt.FileMatch 983 + testShardedSearch func(t *testing.T, q query.Q, ib *index.IndexBuilder, useDocumentRanks bool) []zoekt.FileMatch 983 984 }{ 984 985 {"Search", testShardedSearch}, 985 986 {"StreamSearch", testShardedStreamSearch}, ··· 990 991 c2 := []byte("In Dutch, ananas means pineapple") 991 992 // -----------0123456789012345678901234567890123456789 992 993 b := testIndexBuilder(t, nil, 993 - zoekt.Document{Name: "f1", Content: c1}, 994 - zoekt.Document{Name: "f2", Content: c2}, 994 + index.Document{Name: "f1", Content: c1}, 995 + index.Document{Name: "f2", Content: c2}, 995 996 ) 996 997 997 998 for _, tt := range cases { ··· 1019 1020 func TestWordBoundaryRanking(t *testing.T) { 1020 1021 cases := []struct { 1021 1022 name string 1022 - testShardedSearch func(t *testing.T, q query.Q, ib *zoekt.IndexBuilder, useDocumentRanks bool) []zoekt.FileMatch 1023 + testShardedSearch func(t *testing.T, q query.Q, ib *index.IndexBuilder, useDocumentRanks bool) []zoekt.FileMatch 1023 1024 }{ 1024 1025 {"Search", testShardedSearch}, 1025 1026 {"StreamSearch", testShardedStreamSearch}, 1026 1027 } 1027 1028 1028 1029 b := testIndexBuilder(t, nil, 1029 - zoekt.Document{Name: "f1", Content: []byte("xbytex xbytex")}, 1030 - zoekt.Document{Name: "f2", Content: []byte("xbytex\nbytex\nbyte bla")}, 1030 + index.Document{Name: "f1", Content: []byte("xbytex xbytex")}, 1031 + index.Document{Name: "f2", Content: []byte("xbytex\nbytex\nbyte bla")}, 1031 1032 // -----------------------------------------0123456 789012 34567890 1032 - zoekt.Document{Name: "f3", Content: []byte("xbytex ybytex")}) 1033 + index.Document{Name: "f3", Content: []byte("xbytex ybytex")}) 1033 1034 1034 1035 for _, tt := range cases { 1035 1036 for _, useDocumentRanks := range []bool{false, true} { ··· 1059 1060 func TestAtomCountScore(t *testing.T) { 1060 1061 cases := []struct { 1061 1062 name string 1062 - testShardedSearch func(t *testing.T, q query.Q, ib *zoekt.IndexBuilder, useDocumentRanks bool) []zoekt.FileMatch 1063 + testShardedSearch func(t *testing.T, q query.Q, ib *index.IndexBuilder, useDocumentRanks bool) []zoekt.FileMatch 1063 1064 }{ 1064 1065 {"Search", testShardedSearch}, 1065 1066 {"StreamSearch", testShardedStreamSearch}, ··· 1072 1073 {Name: "needle", Version: "v2"}, 1073 1074 }, 1074 1075 }, 1075 - zoekt.Document{Name: "f1", Content: []byte("needle the bla"), Branches: []string{"branches"}}, 1076 - zoekt.Document{Name: "needle-file-branch", Content: []byte("needle content"), Branches: []string{"needle"}}, 1077 - zoekt.Document{Name: "needle-file", Content: []byte("needle content"), Branches: []string{"branches"}}) 1076 + index.Document{Name: "f1", Content: []byte("needle the bla"), Branches: []string{"branches"}}, 1077 + index.Document{Name: "needle-file-branch", Content: []byte("needle content"), Branches: []string{"needle"}}, 1078 + index.Document{Name: "needle-file", Content: []byte("needle content"), Branches: []string{"branches"}}) 1078 1079 1079 1080 for _, tt := range cases { 1080 1081 for _, useDocumentRanks := range []bool{false, true} { ··· 1101 1102 func TestUseBM25Scoring(t *testing.T) { 1102 1103 b := testIndexBuilder(t, 1103 1104 &zoekt.Repository{}, 1104 - zoekt.Document{Name: "f1", Content: []byte("one two two three")}, 1105 - zoekt.Document{Name: "f2", Content: []byte("one two one two")}, 1106 - zoekt.Document{Name: "f3", Content: []byte("one three three three")}) 1105 + index.Document{Name: "f1", Content: []byte("one two two three")}, 1106 + index.Document{Name: "f2", Content: []byte("one two one two")}, 1107 + index.Document{Name: "f3", Content: []byte("one three three three")}) 1107 1108 1108 1109 ss := newShardedSearcher(1) 1109 1110 searcher := searcherForTest(t, b) ··· 1133 1134 } 1134 1135 } 1135 1136 1136 - func testShardedStreamSearch(t *testing.T, q query.Q, ib *zoekt.IndexBuilder, useDocumentRanks bool) []zoekt.FileMatch { 1137 + func testShardedStreamSearch(t *testing.T, q query.Q, ib *index.IndexBuilder, useDocumentRanks bool) []zoekt.FileMatch { 1137 1138 ss := newShardedSearcher(1) 1138 1139 searcher := searcherForTest(t, ib) 1139 1140 ss.replace(map[string]zoekt.Searcher{"r1": searcher}) ··· 1153 1154 return files 1154 1155 } 1155 1156 1156 - func testShardedSearch(t *testing.T, q query.Q, ib *zoekt.IndexBuilder, useDocumentRanks bool) []zoekt.FileMatch { 1157 + func testShardedSearch(t *testing.T, q query.Q, ib *index.IndexBuilder, useDocumentRanks bool) []zoekt.FileMatch { 1157 1158 ss := newShardedSearcher(1) 1158 1159 searcher := searcherForTest(t, ib) 1159 1160 ss.replace(map[string]zoekt.Searcher{"r1": searcher})

+2 -2

internal/shards/watcher.go

··· 26 26 "time" 27 27 28 28 "github.com/fsnotify/fsnotify" 29 - "github.com/sourcegraph/zoekt" 29 + "github.com/sourcegraph/zoekt/index" 30 30 ) 31 31 32 32 type shardLoader interface { ··· 130 130 131 131 // In the case of downgrades, avoid reading 132 132 // newer index formats. 133 - if version > zoekt.IndexFormatVersion && version > zoekt.NextIndexFormatVersion { 133 + if version > index.IndexFormatVersion && version > index.NextIndexFormatVersion { 134 134 continue 135 135 } 136 136

+2 -2

internal/shards/watcher_test.go

··· 21 21 "testing" 22 22 "time" 23 23 24 - "github.com/sourcegraph/zoekt" 24 + "github.com/sourcegraph/zoekt/index" 25 25 ) 26 26 27 27 type loggingLoader struct { ··· 182 182 // t.Fatalf("got %v, want 'empty'", err) 183 183 // } 184 184 185 - want := zoekt.NextIndexFormatVersion 185 + want := index.NextIndexFormatVersion 186 186 shardLatest := filepath.Join(dir, fmt.Sprintf("foo_v%d.00000.zoekt", want)) 187 187 188 188 for delta := -1; delta <= 1; delta++ {

+15 -11

limit.go index/limit.go

··· 1 - package zoekt 1 + package index 2 + 3 + import ( 4 + "log" 2 5 3 - import "log" 6 + "github.com/sourcegraph/zoekt" 7 + ) 4 8 5 9 // SortAndTruncateFiles is a convenience around SortFiles and 6 10 // DisplayTruncator. Given an aggregated files it will sort and then truncate 7 11 // based on the search options. 8 - func SortAndTruncateFiles(files []FileMatch, opts *SearchOptions) []FileMatch { 12 + func SortAndTruncateFiles(files []zoekt.FileMatch, opts *zoekt.SearchOptions) []zoekt.FileMatch { 9 13 SortFiles(files) 10 14 truncator, _ := NewDisplayTruncator(opts) 11 15 files, _ = truncator(files) ··· 16 20 // display limits by truncating and mutating before. hasMore is true until the 17 21 // limits are exhausted. Once hasMore is false each subsequent call will 18 22 // return an empty after and hasMore false. 19 - type DisplayTruncator func(before []FileMatch) (after []FileMatch, hasMore bool) 23 + type DisplayTruncator func(before []zoekt.FileMatch) (after []zoekt.FileMatch, hasMore bool) 20 24 21 25 // NewDisplayTruncator will return a DisplayTruncator which enforces the limits in 22 26 // opts. If there are no limits to enforce, hasLimits is false and there is no 23 27 // need to call DisplayTruncator. 24 - func NewDisplayTruncator(opts *SearchOptions) (_ DisplayTruncator, hasLimits bool) { 28 + func NewDisplayTruncator(opts *zoekt.SearchOptions) (_ DisplayTruncator, hasLimits bool) { 25 29 docLimit := opts.MaxDocDisplayCount 26 30 docLimited := docLimit > 0 27 31 ··· 31 35 done := false 32 36 33 37 if !docLimited && !matchLimited { 34 - return func(fm []FileMatch) ([]FileMatch, bool) { 38 + return func(fm []zoekt.FileMatch) ([]zoekt.FileMatch, bool) { 35 39 return fm, true 36 40 }, false 37 41 } 38 42 39 - return func(fm []FileMatch) ([]FileMatch, bool) { 43 + return func(fm []zoekt.FileMatch) ([]zoekt.FileMatch, bool) { 40 44 if done { 41 45 return nil, false 42 46 } ··· 60 64 }, true 61 65 } 62 66 63 - func limitMatches(files []FileMatch, limit int, chunkMatches bool) ([]FileMatch, int) { 64 - var limiter func(file *FileMatch, limit int) int 67 + func limitMatches(files []zoekt.FileMatch, limit int, chunkMatches bool) ([]zoekt.FileMatch, int) { 68 + var limiter func(file *zoekt.FileMatch, limit int) int 65 69 if chunkMatches { 66 70 limiter = limitChunkMatches 67 71 } else { ··· 78 82 79 83 // Limit the number of ChunkMatches in the given FileMatch, returning the 80 84 // remaining limit, if any. 81 - func limitChunkMatches(file *FileMatch, limit int) int { 85 + func limitChunkMatches(file *zoekt.FileMatch, limit int) int { 82 86 for i := range file.ChunkMatches { 83 87 cm := &file.ChunkMatches[i] 84 88 if len(cm.Ranges) > limit { ··· 127 131 128 132 // Limit the number of LineMatches in the given FileMatch, returning the 129 133 // remaining limit, if any. 130 - func limitLineMatches(file *FileMatch, limit int) int { 134 + func limitLineMatches(file *zoekt.FileMatch, limit int) int { 131 135 for i := range file.LineMatches { 132 136 lm := &file.LineMatches[i] 133 137 if len(lm.LineFragments) > limit {

+15 -14

limit_test.go index/limit_test.go

··· 1 - package zoekt 1 + package index 2 2 3 3 import ( 4 4 "bytes" ··· 6 6 "testing" 7 7 8 8 "github.com/google/go-cmp/cmp" 9 + "github.com/sourcegraph/zoekt" 9 10 ) 10 11 11 12 func TestLimitMatches(t *testing.T) { ··· 62 63 for _, tc := range cases { 63 64 t.Run("ChunkMatches", func(t *testing.T) { 64 65 // Generate a ChunkMatch suitable for testing `LimitChunkMatches`. 65 - generateChunkMatch := func(numRanges, lineNumber int) (ChunkMatch, int) { 66 - cm := ChunkMatch{SymbolInfo: make([]*Symbol, numRanges)} 66 + generateChunkMatch := func(numRanges, lineNumber int) (zoekt.ChunkMatch, int) { 67 + cm := zoekt.ChunkMatch{SymbolInfo: make([]*zoekt.Symbol, numRanges)} 67 68 68 69 // To simplify testing, we generate Content and the associated 69 70 // Ranges with fixed logic: each ChunkMatch has 1 line of ··· 74 75 // 1 line of context. 75 76 cm.Content = append(cm.Content, []byte("context\n")...) 76 77 for i := 0; i < numRanges; i += 1 { 77 - cm.Ranges = append(cm.Ranges, Range{ 78 + cm.Ranges = append(cm.Ranges, zoekt.Range{ 78 79 // We only provide LineNumber as that's all that's 79 80 // relevant. 80 - Start: Location{LineNumber: uint32(lineNumber + (2 * i) + 1)}, 81 - End: Location{LineNumber: uint32(lineNumber + (2 * i) + 2)}, 81 + Start: zoekt.Location{LineNumber: uint32(lineNumber + (2 * i) + 1)}, 82 + End: zoekt.Location{LineNumber: uint32(lineNumber + (2 * i) + 2)}, 82 83 }) 83 84 cm.Content = append(cm.Content, []byte(fmt.Sprintf("range%dStart\nrange%dEnd\n", i, i))...) 84 85 } ··· 91 92 return cm, lineNumber + (2 * numRanges) + 4 92 93 } 93 94 94 - res := SearchResult{} 95 + res := zoekt.SearchResult{} 95 96 for _, file := range tc.in { 96 - fm := FileMatch{} 97 + fm := zoekt.FileMatch{} 97 98 lineNumber := 0 98 99 for _, numRanges := range file { 99 - var cm ChunkMatch 100 + var cm zoekt.ChunkMatch 100 101 cm, lineNumber = generateChunkMatch(numRanges, lineNumber) 101 102 fm.ChunkMatches = append(fm.ChunkMatches, cm) 102 103 } 103 104 res.Files = append(res.Files, fm) 104 105 } 105 106 106 - res.Files = SortAndTruncateFiles(res.Files, &SearchOptions{ 107 + res.Files = SortAndTruncateFiles(res.Files, &zoekt.SearchOptions{ 107 108 MaxMatchDisplayCount: tc.limit, 108 109 ChunkMatches: true, 109 110 }) ··· 133 134 }) 134 135 135 136 t.Run("LineMatches", func(t *testing.T) { 136 - res := SearchResult{} 137 + res := zoekt.SearchResult{} 137 138 for _, file := range tc.in { 138 - fm := FileMatch{} 139 + fm := zoekt.FileMatch{} 139 140 for _, numFragments := range file { 140 - fm.LineMatches = append(fm.LineMatches, LineMatch{LineFragments: make([]LineFragmentMatch, numFragments)}) 141 + fm.LineMatches = append(fm.LineMatches, zoekt.LineMatch{LineFragments: make([]zoekt.LineFragmentMatch, numFragments)}) 141 142 } 142 143 res.Files = append(res.Files, fm) 143 144 } 144 145 145 - res.Files = SortAndTruncateFiles(res.Files, &SearchOptions{ 146 + res.Files = SortAndTruncateFiles(res.Files, &zoekt.SearchOptions{ 146 147 MaxMatchDisplayCount: tc.limit, 147 148 ChunkMatches: false, 148 149 })

+8 -6

matchiter.go index/matchiter.go

··· 12 12 // See the License for the specific language governing permissions and 13 13 // limitations under the License. 14 14 15 - package zoekt 15 + package index 16 16 17 17 import ( 18 18 "bytes" 19 19 "fmt" 20 + 21 + "github.com/sourcegraph/zoekt" 20 22 ) 21 23 22 24 // candidateMatch is a candidate match for a substring. ··· 73 75 // updateStats is called twice. After matchtree construction and after 74 76 // searching is done. Implementations must take care to not report 75 77 // statistics twice. 76 - updateStats(*Stats) 78 + updateStats(*zoekt.Stats) 77 79 } 78 80 79 81 // noMatchTree is both matchIterator and matchTree that matches nothing. ··· 81 83 Why string 82 84 83 85 // Stats captures the work done to create the noMatchTree. 84 - Stats Stats 86 + Stats zoekt.Stats 85 87 } 86 88 87 89 func (t *noMatchTree) String() string { ··· 102 104 return matchesNone 103 105 } 104 106 105 - func (t *noMatchTree) updateStats(s *Stats) { 107 + func (t *noMatchTree) updateStats(s *zoekt.Stats) { 106 108 s.Add(t.Stats) 107 - t.Stats = Stats{} 109 + t.Stats = zoekt.Stats{} 108 110 } 109 111 110 112 func (m *candidateMatch) String() string { ··· 166 168 i.fileIdx = nextDoc 167 169 } 168 170 169 - func (i *ngramDocIterator) updateStats(s *Stats) { 171 + func (i *ngramDocIterator) updateStats(s *zoekt.Stats) { 170 172 i.iter.updateStats(s) 171 173 s.NgramMatches += i.matchCount 172 174 s.NgramLookups += i.ngramLookups

+4 -4

matchtree.go index/matchtree.go

··· 12 12 // See the License for the specific language governing permissions and 13 13 // limitations under the License. 14 14 15 - package zoekt 15 + package index 16 16 17 17 import ( 18 18 "bytes" ··· 23 23 "unicode/utf8" 24 24 25 25 "github.com/grafana/regexp" 26 - 26 + "github.com/sourcegraph/zoekt" 27 27 "github.com/sourcegraph/zoekt/internal/syntaxutil" 28 28 "github.com/sourcegraph/zoekt/query" 29 29 ) ··· 604 604 605 605 // updateMatchTreeStats calls updateStats on all atoms in mt which have that 606 606 // function defined. 607 - func updateMatchTreeStats(mt matchTree, stats *Stats) { 607 + func updateMatchTreeStats(mt matchTree, stats *zoekt.Stats) { 608 608 visitMatchTree(mt, func(mt matchTree) { 609 - if atom, ok := mt.(interface{ updateStats(*Stats) }); ok { 609 + if atom, ok := mt.(interface{ updateStats(*zoekt.Stats) }); ok { 610 610 atom.updateStats(stats) 611 611 } 612 612 })

+6 -5

matchtree_test.go index/matchtree_test.go

··· 12 12 // See the License for the specific language governing permissions and 13 13 // limitations under the License. 14 14 15 - package zoekt 15 + package index 16 16 17 17 import ( 18 18 "reflect" ··· 21 21 22 22 "github.com/RoaringBitmap/roaring" 23 23 "github.com/grafana/regexp" 24 + "github.com/sourcegraph/zoekt" 24 25 25 26 "github.com/sourcegraph/zoekt/query" 26 27 ) ··· 287 288 288 289 func TestRepoSet(t *testing.T) { 289 290 d := &indexData{ 290 - repoMetaData: []Repository{{Name: "r0"}, {Name: "r1"}, {Name: "r2"}, {Name: "r3"}}, 291 + repoMetaData: []zoekt.Repository{{Name: "r0"}, {Name: "r1"}, {Name: "r2"}, {Name: "r3"}}, 291 292 fileBranchMasks: []uint64{1, 1, 1, 1, 1, 1}, 292 293 repos: []uint16{0, 0, 1, 2, 3, 3}, 293 294 } ··· 310 311 311 312 func TestRepo(t *testing.T) { 312 313 d := &indexData{ 313 - repoMetaData: []Repository{{Name: "foo"}, {Name: "bar"}}, 314 + repoMetaData: []zoekt.Repository{{Name: "foo"}, {Name: "bar"}}, 314 315 fileBranchMasks: []uint64{1, 1, 1, 1, 1}, 315 316 repos: []uint16{0, 0, 1, 0, 1}, 316 317 } ··· 333 334 334 335 func TestBranchesRepos(t *testing.T) { 335 336 d := &indexData{ 336 - repoMetaData: []Repository{ 337 + repoMetaData: []zoekt.Repository{ 337 338 {ID: hash("foo"), Name: "foo"}, 338 339 {ID: hash("bar"), Name: "bar"}, 339 340 }, ··· 366 367 367 368 func TestRepoIDs(t *testing.T) { 368 369 d := &indexData{ 369 - repoMetaData: []Repository{{Name: "r0", ID: 0}, {Name: "r1", ID: 1}, {Name: "r2", ID: 2}, {Name: "r3", ID: 3}}, 370 + repoMetaData: []zoekt.Repository{{Name: "r0", ID: 0}, {Name: "r1", ID: 1}, {Name: "r2", ID: 2}, {Name: "r3", ID: 3}}, 370 371 fileBranchMasks: []uint64{1, 1, 1, 1, 1, 1}, 371 372 repos: []uint16{0, 0, 1, 2, 3, 3}, 372 373 }

+4 -3

merge.go index/merge.go

··· 1 - package zoekt 1 + package index 2 2 3 3 import ( 4 4 "crypto/sha1" ··· 10 10 "runtime" 11 11 "sort" 12 12 13 + "github.com/sourcegraph/zoekt" 13 14 "github.com/sourcegraph/zoekt/internal/tenant" 14 15 ) 15 16 ··· 95 96 } 96 97 97 98 sort.Slice(ds, func(i, j int) bool { 98 - return ds[i].repoMetaData[0].priority > ds[j].repoMetaData[0].priority 99 + return ds[i].repoMetaData[0].GetPriority() > ds[j].repoMetaData[0].GetPriority() 99 100 }) 100 101 101 102 ib := newIndexBuilder() ··· 237 238 return err 238 239 } 239 240 240 - doc.SymbolsMetaData = make([]*Symbol, len(doc.Symbols)) 241 + doc.SymbolsMetaData = make([]*zoekt.Symbol, len(doc.Symbols)) 241 242 for i := range doc.SymbolsMetaData { 242 243 doc.SymbolsMetaData[i] = d.symbols.data(d.fileEndSymbol[docID] + uint32(i)) 243 244 }

+5 -4

merge_test.go index/merge_test.go

··· 1 - package zoekt 1 + package index 2 2 3 3 import ( 4 4 "os" ··· 6 6 "testing" 7 7 8 8 "github.com/google/go-cmp/cmp" 9 + "github.com/sourcegraph/zoekt" 9 10 ) 10 11 11 12 // We compare 2 simple shards before and after the transformation ··· 13 14 // identical. 14 15 func TestExplode(t *testing.T) { 15 16 simpleShards := []string{ 16 - "./testdata/shards/repo_v16.00000.zoekt", 17 - "./testdata/shards/repo2_v16.00000.zoekt", 17 + ".././testdata/shards/repo_v16.00000.zoekt", 18 + ".././testdata/shards/repo2_v16.00000.zoekt", 18 19 } 19 20 20 21 // repo name -> IndexMetadata 21 - m := make(map[string]*IndexMetadata, 2) 22 + m := make(map[string]*zoekt.IndexMetadata, 2) 22 23 23 24 // merge 24 25 var files []IndexFile

+12 -11

read.go index/read.go

··· 12 12 // See the License for the specific language governing permissions and 13 13 // limitations under the License. 14 14 15 - package zoekt 15 + package index 16 16 17 17 import ( 18 18 "encoding/binary" ··· 25 25 "sort" 26 26 27 27 "github.com/rs/xid" 28 + "github.com/sourcegraph/zoekt" 28 29 ) 29 30 30 31 // IndexFile is a file suitable for concurrent read access. For performance ··· 242 243 243 244 // canReadVersion returns checks if zoekt can read in md. If it can't a 244 245 // non-nil error is returned. 245 - func canReadVersion(md *IndexMetadata) bool { 246 + func canReadVersion(md *zoekt.IndexMetadata) bool { 246 247 // Backwards compatible with v16 247 248 return md.IndexFormatVersion == IndexFormatVersion || md.IndexFormatVersion == NextIndexFormatVersion 248 249 } ··· 262 263 } 263 264 264 265 d.metaData = *md 265 - d.repoMetaData = make([]Repository, 0, len(repos)) 266 + d.repoMetaData = make([]zoekt.Repository, 0, len(repos)) 266 267 for _, r := range repos { 267 268 d.repoMetaData = append(d.repoMetaData, *r) 268 269 } ··· 412 413 return &d, nil 413 414 } 414 415 415 - func (r *reader) parseMetadata(metaData simpleSection, repoMetaData simpleSection) ([]*Repository, *IndexMetadata, error) { 416 - var md IndexMetadata 416 + func (r *reader) parseMetadata(metaData simpleSection, repoMetaData simpleSection) ([]*zoekt.Repository, *zoekt.IndexMetadata, error) { 417 + var md zoekt.IndexMetadata 417 418 if err := r.readJSON(&md, metaData); err != nil { 418 419 return nil, nil, err 419 420 } ··· 433 434 } 434 435 } 435 436 436 - var repos []*Repository 437 + var repos []*zoekt.Repository 437 438 if md.IndexFormatVersion >= 17 { 438 439 if err := json.Unmarshal(blob, &repos); err != nil { 439 440 return nil, &md, err 440 441 } 441 442 } else { 442 - repos = make([]*Repository, 1) 443 + repos = make([]*zoekt.Repository, 1) 443 444 if err := json.Unmarshal(blob, &repos[0]); err != nil { 444 445 return nil, &md, err 445 446 } ··· 568 569 // results coming from this searcher are valid only for the lifetime 569 570 // of the Searcher itself, ie. []byte members should be copied into 570 571 // fresh buffers if the result is to survive closing the shard. 571 - func NewSearcher(r IndexFile) (Searcher, error) { 572 + func NewSearcher(r IndexFile) (zoekt.Searcher, error) { 572 573 rd := &reader{r: r} 573 574 574 575 var toc indexTOC ··· 585 586 586 587 // ReadMetadata returns the metadata of index shard without reading 587 588 // the index data. The IndexFile is not closed. 588 - func ReadMetadata(inf IndexFile) ([]*Repository, *IndexMetadata, error) { 589 + func ReadMetadata(inf IndexFile) ([]*zoekt.Repository, *zoekt.IndexMetadata, error) { 589 590 rd := &reader{r: inf} 590 591 var toc indexTOC 591 592 err := rd.readTOCSections(&toc, []string{"metaData", "repoMetaData"}) ··· 597 598 598 599 // ReadMetadataPathAlive is like ReadMetadataPath except that it only returns 599 600 // alive repositories. 600 - func ReadMetadataPathAlive(p string) ([]*Repository, *IndexMetadata, error) { 601 + func ReadMetadataPathAlive(p string) ([]*zoekt.Repository, *zoekt.IndexMetadata, error) { 601 602 repos, id, err := ReadMetadataPath(p) 602 603 if err != nil { 603 604 return nil, nil, err ··· 614 615 // ReadMetadataPath returns the metadata of index shard at p without reading 615 616 // the index data. ReadMetadataPath is a helper for ReadMetadata which opens 616 617 // the IndexFile at p. 617 - func ReadMetadataPath(p string) ([]*Repository, *IndexMetadata, error) { 618 + func ReadMetadataPath(p string) ([]*zoekt.Repository, *zoekt.IndexMetadata, error) { 618 619 f, err := os.Open(p) 619 620 if err != nil { 620 621 return nil, nil, err

+6 -9

read_test.go index/read_test.go

··· 12 12 // See the License for the specific language governing permissions and 13 13 // limitations under the License. 14 14 15 - package zoekt 15 + package index 16 16 17 17 import ( 18 18 "bytes" 19 19 "context" 20 20 "encoding/json" 21 - "flag" 22 21 "fmt" 23 22 "io/fs" 24 23 "os" ··· 30 29 "testing" 31 30 32 31 "github.com/google/go-cmp/cmp" 33 - 32 + "github.com/sourcegraph/zoekt" 34 33 "github.com/sourcegraph/zoekt/query" 35 34 ) 36 - 37 - var update = flag.Bool("update", false, "update golden files") 38 35 39 36 func TestReadWrite(t *testing.T) { 40 37 b, err := NewIndexBuilder(nil) ··· 199 196 } 200 197 } 201 198 202 - func loadShard(fn string) (Searcher, error) { 199 + func loadShard(fn string) (zoekt.Searcher, error) { 203 200 f, err := os.Open(fn) 204 201 if err != nil { 205 202 return nil, err ··· 222 219 type out struct { 223 220 FormatVersion int 224 221 FeatureVersion int 225 - FileMatches [][]FileMatch 222 + FileMatches [][]zoekt.FileMatch 226 223 } 227 224 228 225 qs := []query.Q{ ··· 259 256 FeatureVersion: index.metaData.IndexFeatureVersion, 260 257 } 261 258 for _, q := range qs { 262 - res, err := shard.Search(context.Background(), q, &SearchOptions{}) 259 + res, err := shard.Search(context.Background(), q, &zoekt.SearchOptions{}) 263 260 if err != nil { 264 261 t.Fatalf("failed search %s on %s during updating: %v", q, name, err) 265 262 } ··· 291 288 } 292 289 293 290 for j, q := range qs { 294 - res, err := shard.Search(context.Background(), q, &SearchOptions{}) 291 + res, err := shard.Search(context.Background(), q, &zoekt.SearchOptions{}) 295 292 if err != nil { 296 293 t.Fatalf("failed search %s on %s: %v", q, name, err) 297 294 }

+15 -14

score.go index/score.go

··· 12 12 // See the License for the specific language governing permissions and 13 13 // limitations under the License. 14 14 15 - package zoekt 15 + package index 16 16 17 17 import ( 18 18 "bytes" ··· 20 20 "math" 21 21 "strings" 22 22 23 + "github.com/sourcegraph/zoekt" 23 24 "github.com/sourcegraph/zoekt/internal/ctags" 24 25 ) 25 26 ··· 36 37 // scoreChunk calculates the score for each line in the chunk based on its candidate matches, and returns the score of 37 38 // the best-scoring line, along with its line number. 38 39 // Invariant: there should be at least one input candidate, len(ms) > 0. 39 - func (p *contentProvider) scoreChunk(ms []*candidateMatch, language string, opts *SearchOptions) (chunkScore, []*Symbol) { 40 + func (p *contentProvider) scoreChunk(ms []*candidateMatch, language string, opts *zoekt.SearchOptions) (chunkScore, []*zoekt.Symbol) { 40 41 nl := p.newlines() 41 42 42 43 var bestScore lineScore 43 44 bestLine := 0 44 - var symbolInfo []*Symbol 45 + var symbolInfo []*zoekt.Symbol 45 46 46 47 start := 0 47 48 currentLine := -1 ··· 92 93 // - All candidate matches are assumed to come from the same line in the content. 93 94 // - If this line represents a filename, then lineNumber must be -1. 94 95 // - There should be at least one input candidate, len(ms) > 0. 95 - func (p *contentProvider) scoreLine(ms []*candidateMatch, language string, lineNumber int, opts *SearchOptions) (lineScore, []*Symbol) { 96 + func (p *contentProvider) scoreLine(ms []*candidateMatch, language string, lineNumber int, opts *zoekt.SearchOptions) (lineScore, []*zoekt.Symbol) { 96 97 if opts.UseBM25Scoring { 97 98 score, symbolInfo := p.scoreLineBM25(ms, lineNumber) 98 99 ls := lineScore{score: score} ··· 112 113 } 113 114 114 115 filename := p.data(true) 115 - var symbolInfo []*Symbol 116 + var symbolInfo []*zoekt.Symbol 116 117 117 118 var bestScore lineScore 118 119 for i, m := range ms { ··· 164 165 // information. 165 166 if m.symbol { 166 167 if symbolInfo == nil { 167 - symbolInfo = make([]*Symbol, len(ms)) 168 + symbolInfo = make([]*zoekt.Symbol, len(ms)) 168 169 } 169 170 // findSymbols does not hydrate in Sym. So we need to store it. 170 171 si.Sym = string(sym) ··· 200 201 // Notes: 201 202 // - This BM25 calculation skips inverse document frequency (idf) to keep the implementation simple. 202 203 // - It uses the same calculateTermFrequency method as BM25 file scoring, which boosts filename and symbol matches. 203 - func (p *contentProvider) scoreLineBM25(ms []*candidateMatch, lineNumber int) (float64, []*Symbol) { 204 + func (p *contentProvider) scoreLineBM25(ms []*candidateMatch, lineNumber int) (float64, []*zoekt.Symbol) { 204 205 // If this is a filename, then don't compute BM25. The score would not be comparable to line scores. 205 206 if lineNumber < 0 { 206 207 return 0, nil ··· 221 222 score += ((k + 1.0) * float64(f)) / (k*(1.0-b+b*L) + float64(f)) 222 223 } 223 224 224 - // Check if any match comes from a symbol match tree, and if so hydrate in symbol information 225 - var symbolInfo []*Symbol 225 + // Check if any index comes from a symbol match tree, and if so hydrate in symbol information 226 + var symbolInfo []*zoekt.Symbol 226 227 for _, m := range ms { 227 228 if m.symbol { 228 229 if sec, si, ok := p.findSymbol(m); ok && si != nil { ··· 245 246 // - Symbol matches also count more than content matches, to reward matches on symbol definitions. 246 247 func (p *contentProvider) calculateTermFrequency(cands []*candidateMatch, df termDocumentFrequency) map[string]int { 247 248 // Treat each candidate match as a term and compute the frequencies. For now, ignore case sensitivity and 248 - // ignore whether the match is a word boundary. 249 + // ignore whether the index is a word boundary. 249 250 termFreqs := map[string]int{} 250 251 for _, m := range cands { 251 252 term := string(m.substrLowered) ··· 264 265 265 266 // scoreFile computes a score for the file match using various scoring signals, like 266 267 // whether there's an exact match on a symbol, the number of query clauses that matched, etc. 267 - func (d *indexData) scoreFile(fileMatch *FileMatch, doc uint32, mt matchTree, known map[matchTree]bool, opts *SearchOptions) { 268 + func (d *indexData) scoreFile(fileMatch *zoekt.FileMatch, doc uint32, mt matchTree, known map[matchTree]bool, opts *zoekt.SearchOptions) { 268 269 atomMatchCount := 0 269 270 visitMatchAtoms(mt, known, func(mt matchTree) { 270 271 atomMatchCount++ 271 272 }) 272 273 273 274 addScore := func(what string, computed float64) { 274 - fileMatch.addScore(what, computed, -1, opts.DebugScore) 275 + fileMatch.AddScore(what, computed, -1, opts.DebugScore) 275 276 } 276 277 277 278 // atom-count boosts files with matches from more than 1 atom. The 278 279 // maximum boost is scoreFactorAtomMatch. 279 280 if atomMatchCount > 0 { 280 - fileMatch.addScore("atom", (1.0-1.0/float64(atomMatchCount))*scoreFactorAtomMatch, float64(atomMatchCount), opts.DebugScore) 281 + fileMatch.AddScore("atom", (1.0-1.0/float64(atomMatchCount))*scoreFactorAtomMatch, float64(atomMatchCount), opts.DebugScore) 281 282 } 282 283 283 284 maxFileScore := 0.0 ··· 342 343 // Unlike standard file scoring, this scoring strategy ignores all other signals including document ranks. This keeps 343 344 // things simple for now, since BM25 is not normalized and can be tricky to combine with other scoring signals. It also 344 345 // ignores the individual LineMatch and ChunkMatch scores, instead calculating a score over all matches in the file. 345 - func (d *indexData) scoreFilesUsingBM25(fileMatches []FileMatch, tfs []termFrequency, df termDocumentFrequency, opts *SearchOptions) { 346 + func (d *indexData) scoreFilesUsingBM25(fileMatches []zoekt.FileMatch, tfs []termFrequency, df termDocumentFrequency, opts *zoekt.SearchOptions) { 346 347 // Use standard parameter defaults used in Lucene (https://lucene.apache.org/core/10_1_0/core/org/apache/lucene/search/similarities/BM25Similarity.html) 347 348 k, b := 1.2, 0.75 348 349

+1 -1

section.go index/section.go

··· 12 12 // See the License for the specific language governing permissions and 13 13 // limitations under the License. 14 14 15 - package zoekt 15 + package index 16 16 17 17 import ( 18 18 "encoding/binary"

+1 -1

toc.go index/toc.go

··· 12 12 // See the License for the specific language governing permissions and 13 13 // limitations under the License. 14 14 15 - package zoekt 15 + package index 16 16 17 17 // IndexFormatVersion is a version number. It is increased every time the 18 18 // on-disk index format is changed.

+5 -6

tombstones.go index/tombstones.go

··· 1 - package zoekt 1 + package index 2 2 3 3 import ( 4 4 "encoding/json" 5 5 "fmt" 6 6 "os" 7 7 "path/filepath" 8 + 9 + "github.com/sourcegraph/zoekt" 8 10 ) 9 11 10 - var mockRepos []*Repository 12 + var mockRepos []*zoekt.Repository 11 13 12 14 // SetTombstone idempotently sets a tombstone for repoName in .meta. 13 15 func SetTombstone(shardPath string, repoID uint32) error { ··· 20 22 } 21 23 22 24 func setTombstone(shardPath string, repoID uint32, tombstone bool) error { 23 - var repos []*Repository 25 + var repos []*zoekt.Repository 24 26 var err error 25 27 26 28 if mockRepos != nil { ··· 90 92 91 93 return f.Name(), finalPath, nil 92 94 } 93 - 94 - // umask holds the Umask of the current process 95 - var umask os.FileMode

+7 -5

tombstones_test.go index/tombstones_test.go

··· 1 - package zoekt 1 + package index 2 2 3 3 import ( 4 4 "encoding/json" 5 5 "os" 6 6 "path/filepath" 7 7 "testing" 8 + 9 + "github.com/sourcegraph/zoekt" 8 10 ) 9 11 10 12 func TestSetTombstone(t *testing.T) { ··· 24 26 isAlive := func(alive []bool) { 25 27 t.Helper() 26 28 blob := readMeta(ghostShard) 27 - ghostRepos := []*Repository{} 29 + ghostRepos := []*zoekt.Repository{} 28 30 if err := json.Unmarshal(blob, &ghostRepos); err != nil { 29 31 t.Fatal(err) 30 32 } ··· 51 53 isAlive([]bool{false, true, true}) 52 54 } 53 55 54 - func mkRepos(repoNames ...string) []*Repository { 55 - ret := make([]*Repository, 0, len(repoNames)) 56 + func mkRepos(repoNames ...string) []*zoekt.Repository { 57 + ret := make([]*zoekt.Repository, 0, len(repoNames)) 56 58 for i, n := range repoNames { 57 - ret = append(ret, &Repository{ID: uint32(i + 1), Name: n}) 59 + ret = append(ret, &zoekt.Repository{ID: uint32(i + 1), Name: n}) 58 60 } 59 61 return ret 60 62 }

+1 -1

tombstones_unix.go index/tombstones_unix.go

··· 1 1 //go:build !windows && !wasm 2 2 3 - package zoekt 3 + package index 4 4 5 5 import ( 6 6 "os"

+1 -1

tombstones_windows.go index/tombstones_windows.go

··· 1 - package zoekt 1 + package index 2 2 3 3 func init() { 4 4 // no setting of file permissions on Windows

+1 -1

web/api.go

··· 54 54 ResultID string 55 55 Language string 56 56 // If this was a duplicate result, this will contain the file 57 - // of the first match. 57 + // of the first index. 58 58 DuplicateID string 59 59 60 60 Branches []string

+31 -30

web/e2e_test.go

··· 30 30 "time" 31 31 32 32 "github.com/google/go-cmp/cmp" 33 + "github.com/sourcegraph/zoekt/index" 33 34 34 35 "github.com/sourcegraph/zoekt" 35 36 "github.com/sourcegraph/zoekt/query" ··· 54 55 return "memSeeker" 55 56 } 56 57 57 - func searcherForTest(t *testing.T, b *zoekt.IndexBuilder) zoekt.Streamer { 58 + func searcherForTest(t *testing.T, b *index.IndexBuilder) zoekt.Streamer { 58 59 var buf bytes.Buffer 59 60 if err := b.Write(&buf); err != nil { 60 61 t.Fatal(err) 61 62 } 62 63 f := &memSeeker{buf.Bytes()} 63 64 64 - searcher, err := zoekt.NewSearcher(f) 65 + searcher, err := index.NewSearcher(f) 65 66 if err != nil { 66 67 t.Fatalf("NewSearcher: %v", err) 67 68 } ··· 83 84 } 84 85 85 86 func TestBasic(t *testing.T) { 86 - b, err := zoekt.NewIndexBuilder(&zoekt.Repository{ 87 + b, err := index.NewIndexBuilder(&zoekt.Repository{ 87 88 Name: "name", 88 89 URL: "repo-url", 89 90 CommitURLTemplate: `{{ URLJoinPath "https://github.com/org/repo/commit/" .Version}}`, ··· 94 95 if err != nil { 95 96 t.Fatalf("NewIndexBuilder: %v", err) 96 97 } 97 - if err := b.Add(zoekt.Document{ 98 + if err := b.Add(index.Document{ 98 99 // use a name which requires correct escaping. https://github.com/sourcegraph/zoekt/issues/807 99 100 Name: "foo/bar+baz", 100 101 Content: []byte("to carry water in the no later bla"), ··· 149 150 } 150 151 151 152 func TestPrint(t *testing.T) { 152 - b, err := zoekt.NewIndexBuilder(&zoekt.Repository{ 153 + b, err := index.NewIndexBuilder(&zoekt.Repository{ 153 154 Name: "name", 154 155 URL: "repo-url", 155 156 CommitURLTemplate: "{{.Version}}", ··· 160 161 if err != nil { 161 162 t.Fatalf("NewIndexBuilder: %v", err) 162 163 } 163 - if err := b.Add(zoekt.Document{ 164 + if err := b.Add(index.Document{ 164 165 Name: "f2", 165 166 Content: []byte("to carry water in the no later bla"), 166 167 Branches: []string{"master"}, ··· 168 169 t.Fatalf("Add: %v", err) 169 170 } 170 171 171 - if err := b.Add(zoekt.Document{ 172 + if err := b.Add(index.Document{ 172 173 Name: "dir/f2", 173 174 Content: []byte("blabla"), 174 175 Branches: []string{"master"}, ··· 202 203 } 203 204 204 205 func TestPrintDefault(t *testing.T) { 205 - b, err := zoekt.NewIndexBuilder(&zoekt.Repository{ 206 + b, err := index.NewIndexBuilder(&zoekt.Repository{ 206 207 Name: "name", 207 208 URL: "repo-url", 208 209 Branches: []zoekt.RepositoryBranch{{Name: "master", Version: "1234"}}, ··· 210 211 if err != nil { 211 212 t.Fatalf("NewIndexBuilder: %v", err) 212 213 } 213 - if err := b.Add(zoekt.Document{ 214 + if err := b.Add(index.Document{ 214 215 Name: "f2", 215 216 Content: []byte("to carry water in the no later bla"), 216 217 Branches: []string{"master"}, ··· 272 273 } 273 274 274 275 func TestFormatJson(t *testing.T) { 275 - b, err := zoekt.NewIndexBuilder(&zoekt.Repository{ 276 + b, err := index.NewIndexBuilder(&zoekt.Repository{ 276 277 Name: "name", 277 278 URL: "repo-url", 278 279 Branches: []zoekt.RepositoryBranch{{Name: "master", Version: "1234"}}, ··· 280 281 if err != nil { 281 282 t.Fatalf("NewIndexBuilder: %v", err) 282 283 } 283 - if err := b.Add(zoekt.Document{ 284 + if err := b.Add(index.Document{ 284 285 Name: "f2", 285 286 Content: []byte("to carry water in the no later bla"), 286 287 Branches: []string{"master"}, ··· 327 328 } 328 329 329 330 func TestContextLines(t *testing.T) { 330 - b, err := zoekt.NewIndexBuilder(&zoekt.Repository{ 331 + b, err := index.NewIndexBuilder(&zoekt.Repository{ 331 332 Name: "name", 332 333 URL: "repo-url", 333 334 Branches: []zoekt.RepositoryBranch{{Name: "master", Version: "1234"}}, ··· 335 336 if err != nil { 336 337 t.Fatalf("NewIndexBuilder: %v", err) 337 338 } 338 - if err := b.Add(zoekt.Document{ 339 + if err := b.Add(index.Document{ 339 340 Name: "f2", 340 341 Content: []byte("one line\nsecond snippet\nthird thing\nfourth\nfifth block\nsixth example\nseventh"), 341 342 Branches: []string{"master"}, 342 343 }); err != nil { 343 344 t.Fatalf("Add: %v", err) 344 345 } 345 - if err := b.Add(zoekt.Document{ 346 + if err := b.Add(index.Document{ 346 347 Name: "f3", 347 348 Content: []byte("\n\n\n\nto carry water in the no later bla\n\n\n\n"), 348 349 Branches: []string{"master"}, 349 350 }); err != nil { 350 351 t.Fatalf("Add: %v", err) 351 352 } 352 - if err := b.Add(zoekt.Document{ 353 + if err := b.Add(index.Document{ 353 354 Name: "f4", 354 355 Content: []byte("un \n \n\ttrois\n \n\nsix\n "), 355 356 Branches: []string{"master"}, 356 357 }); err != nil { 357 358 t.Fatalf("Add: %v", err) 358 359 } 359 - if err := b.Add(zoekt.Document{ 360 + if err := b.Add(index.Document{ 360 361 Name: "f5", 361 362 Content: []byte("\ngreen\npastures\n\nhere"), 362 363 Branches: []string{"master"}, ··· 440 441 }, 441 442 }, 442 443 "/search?q=one&format=json&ctx=2": { 443 - "match at start returns After but no Before", 444 + "index at start returns After but no Before", 444 445 FileMatch{ 445 446 FileName: "f2", 446 447 Repo: "name", ··· 461 462 }, 462 463 }, 463 464 "/search?q=seventh&format=json&ctx=2": { 464 - "match at end returns Before but no After", 465 + "index at end returns Before but no After", 465 466 FileMatch{ 466 467 FileName: "f2", 467 468 Repo: "name", ··· 482 483 }, 483 484 }, 484 485 "/search?q=seventh&format=json&ctx=10": { 485 - "match with large context at end returns whole document", 486 + "index with large context at end returns whole document", 486 487 FileMatch{ 487 488 FileName: "f2", 488 489 Repo: "name", ··· 503 504 }, 504 505 }, 505 506 "/search?q=one&format=json&ctx=10": { 506 - "match with large context at start returns whole document", 507 + "index with large context at start returns whole document", 507 508 FileMatch{ 508 509 FileName: "f2", 509 510 Repo: "name", ··· 651 652 } 652 653 653 654 func TestContextLinesMustBeValid(t *testing.T) { 654 - b, err := zoekt.NewIndexBuilder(&zoekt.Repository{ 655 + b, err := index.NewIndexBuilder(&zoekt.Repository{ 655 656 Name: "name", 656 657 URL: "repo-url", 657 658 Branches: []zoekt.RepositoryBranch{{Name: "master", Version: "1234"}}, ··· 659 660 if err != nil { 660 661 t.Fatalf("NewIndexBuilder: %v", err) 661 662 } 662 - if err := b.Add(zoekt.Document{ 663 + if err := b.Add(index.Document{ 663 664 Name: "f2", 664 665 Content: []byte("to carry water in the no later bla"), 665 666 Branches: []string{"master"}, ··· 746 747 } 747 748 748 749 func TestHostCustomization(t *testing.T) { 749 - b, err := zoekt.NewIndexBuilder(&zoekt.Repository{ 750 + b, err := index.NewIndexBuilder(&zoekt.Repository{ 750 751 Name: "name", 751 752 }) 752 753 if err != nil { 753 754 t.Fatalf("NewIndexBuilder: %v", err) 754 755 } 755 - if err := b.Add(zoekt.Document{ 756 + if err := b.Add(index.Document{ 756 757 Name: "file", 757 758 Content: []byte("bla"), 758 759 }); err != nil { ··· 798 799 } 799 800 800 801 func TestDupResult(t *testing.T) { 801 - b, err := zoekt.NewIndexBuilder(&zoekt.Repository{ 802 + b, err := index.NewIndexBuilder(&zoekt.Repository{ 802 803 Name: "name", 803 804 }) 804 805 if err != nil { ··· 806 807 } 807 808 808 809 for i := 0; i < 2; i++ { 809 - if err := b.Add(zoekt.Document{ 810 + if err := b.Add(index.Document{ 810 811 Name: fmt.Sprintf("file%d", i), 811 812 Content: []byte("bla"), 812 813 }); err != nil { ··· 848 849 } 849 850 850 851 func TestTruncateLine(t *testing.T) { 851 - b, err := zoekt.NewIndexBuilder(&zoekt.Repository{ 852 + b, err := index.NewIndexBuilder(&zoekt.Repository{ 852 853 Name: "name", 853 854 }) 854 855 if err != nil { ··· 856 857 } 857 858 858 859 largePadding := bytes.Repeat([]byte{'a'}, 100*1000) // 100kb 859 - if err := b.Add(zoekt.Document{ 860 + if err := b.Add(index.Document{ 860 861 Name: "file", 861 862 Content: append(append(largePadding, []byte("helloworld")...), largePadding...), 862 863 }); err != nil { ··· 904 905 } 905 906 906 907 func TestHealthz(t *testing.T) { 907 - b, err := zoekt.NewIndexBuilder(&zoekt.Repository{ 908 + b, err := index.NewIndexBuilder(&zoekt.Repository{ 908 909 Name: "name", 909 910 }) 910 911 if err != nil { ··· 912 913 } 913 914 914 915 for i := 0; i < 2; i++ { 915 - if err := b.Add(zoekt.Document{ 916 + if err := b.Add(index.Document{ 916 917 Name: fmt.Sprintf("file%d", i), 917 918 Content: []byte("bla"), 918 919 }); err != nil {

+4 -2

web/server.go

··· 32 32 "time" 33 33 34 34 "github.com/grafana/regexp" 35 - "github.com/sourcegraph/zoekt" 35 + "github.com/sourcegraph/zoekt/index" 36 36 zjson "github.com/sourcegraph/zoekt/internal/json" 37 + 38 + "github.com/sourcegraph/zoekt" 37 39 "github.com/sourcegraph/zoekt/internal/tenant/systemtenant" 38 40 "github.com/sourcegraph/zoekt/query" 39 41 ) ··· 152 154 return t 153 155 } 154 156 155 - t, err := zoekt.ParseTemplate(str) 157 + t, err := index.ParseTemplate(str) 156 158 if err != nil { 157 159 log.Printf("text template parse error: %v", err) 158 160 t = texttemplate.Must(texttemplate.New("empty").Parse(""))

+4 -2

write.go index/write.go

··· 12 12 // See the License for the specific language governing permissions and 13 13 // limitations under the License. 14 14 15 - package zoekt 15 + package index 16 16 17 17 import ( 18 18 "bufio" ··· 23 23 "io" 24 24 "sort" 25 25 "time" 26 + 27 + "github.com/sourcegraph/zoekt" 26 28 ) 27 29 28 30 func (w *writer) writeTOC(toc *indexTOC) { ··· 174 176 indexTime = time.Now().UTC() 175 177 } 176 178 177 - if err := b.writeJSON(&IndexMetadata{ 179 + if err := b.writeJSON(&zoekt.IndexMetadata{ 178 180 IndexFormatVersion: b.indexFormatVersion, 179 181 IndexTime: indexTime, 180 182 IndexFeatureVersion: b.featureVersion,

Configure Feed

Configure Feed