fork of https://github.com/sourcegraph/zoekt
0

Configure Feed

Select the types of activity you want to include in your feed.

indexserver: introduce DocumentRankVersion (#460)

This field allows us to indicate when the ranks have changed, which
forces us to re-index. We introduce DocumentRankVersion as a builder
options such that it can influence that behavior. By making a
corresponding change to Sourcegraph to set this field we will
automatically update indexes as document ranks change for a repository.

Note: We remove the document ranking feature flag from index server.
This is now completely controlled by frontend.

Note: I got started on a larger refactor here to make this more clean.
The idea is to make DocumentRanksPath a URL which is controlled by
Sourcegraph. We can then use the URL changing to indicate re-indexing
for example. For now this is a much smaller change which will unblock us
sooner.

Test Plan: Ran a local server and tested that when the ranks are missing
nothing happens. When the ranks change or are added we reindex.
Additionally when the ranks do not change it does nothing.

go install ./cmd/zoekt-git-index
go run ./cmd/zoekt-sourcegraph-indexserver \
-sourcegraph_url ~/src/github.com/sourcegraph/ \
-listen 127.0.0.1:6072

# We expect this to trigger an index
echo -e "README.md\t0.1" > ~/src/github.com/sourcegraph/zoekt/SG_DOCUMENT_RANKS
pkill -SIGUSR1 zoekt-sourcegra

# We expect this to do nothing since the ranks have not changed
pkill -SIGUSR1 zoekt-sourcegra

# We expect this to trigger a re-index
echo -e "README.md\t0.2" > ~/src/github.com/sourcegraph/zoekt/SG_DOCUMENT_RANKS
pkill -SIGUSR1 zoekt-sourcegra

+37 -9
+20 -5
build/builder.go
··· 104 104 // ranks will be computed on-the-fly. 105 105 DocumentRanksPath string 106 106 107 + // DocumentRanksVersion is a string which when changed will cause us to 108 + // reindex a shard. This field is used so that when the contents of 109 + // DocumentRanksPath changes, we can reindex. 110 + DocumentRanksVersion string 111 + 107 112 // changedOrRemovedFiles is a list of file paths that have been changed or removed 108 113 // since the last indexing job for this repository. These files will be tombstoned 109 114 // in the older shards for this repository. ··· 117 122 ctagsPath string 118 123 cTagsMustSucceed bool 119 124 largeFiles []string 125 + 126 + // documentRankVersion is an experimental field which will change when the 127 + // DocumentRanksPath content changes. If empty we ignore it. 128 + documentRankVersion string 120 129 } 121 130 122 131 func (o *Options) HashOptions() HashOptions { 123 132 return HashOptions{ 124 - sizeMax: o.SizeMax, 125 - disableCTags: o.DisableCTags, 126 - ctagsPath: o.CTagsPath, 127 - cTagsMustSucceed: o.CTagsMustSucceed, 128 - largeFiles: o.LargeFiles, 133 + sizeMax: o.SizeMax, 134 + disableCTags: o.DisableCTags, 135 + ctagsPath: o.CTagsPath, 136 + cTagsMustSucceed: o.CTagsMustSucceed, 137 + largeFiles: o.LargeFiles, 138 + documentRankVersion: o.DocumentRanksVersion, 129 139 } 130 140 } 131 141 ··· 138 148 hasher.Write([]byte(fmt.Sprintf("%d", h.sizeMax))) 139 149 hasher.Write([]byte(fmt.Sprintf("%q", h.largeFiles))) 140 150 hasher.Write([]byte(fmt.Sprintf("%t", h.disableCTags))) 151 + 152 + if h.documentRankVersion != "" { 153 + hasher.Write([]byte{0}) 154 + io.WriteString(hasher, h.documentRankVersion) 155 + } 141 156 142 157 return fmt.Sprintf("%x", hasher.Sum(nil)) 143 158 }
+2
cmd/zoekt-git-index/main.go
··· 43 43 isDelta := flag.Bool("delta", false, "whether we should use delta build") 44 44 deltaShardNumberFallbackThreshold := flag.Uint64("delta_threshold", 0, "upper limit on the number of preexisting shards that can exist before attempting a delta build (0 to disable fallback behavior)") 45 45 offlineRanking := flag.String("offline_ranking", "", "the name of the file that contains the ranking info.") 46 + offlineRankingVersion := flag.String("offline_ranking_version", "", "a version string identifying the contents in offline_ranking.") 46 47 flag.Parse() 47 48 48 49 // Tune GOMAXPROCS to match Linux container CPU quota. ··· 70 71 opts := cmd.OptionsFromFlags() 71 72 opts.IsDelta = *isDelta 72 73 opts.DocumentRanksPath = *offlineRanking 74 + opts.DocumentRanksVersion = *offlineRankingVersion 73 75 74 76 var branches []string 75 77 if *branchesStr != "" {
+11 -2
cmd/zoekt-sourcegraph-indexserver/index.go
··· 54 54 // Priority indicates ranking in results, higher first. 55 55 Priority float64 56 56 57 + // DocumentRanksVersion when non-empty will lead to indexing using offline 58 + // ranking. When the string changes this will also cause us to re-index with 59 + // new ranks. 60 + DocumentRanksVersion string 61 + 57 62 // Public is true if the repository is public. 58 63 Public bool 59 64 ··· 116 121 CTagsMustSucceed: o.Symbols, 117 122 DisableCTags: !o.Symbols, 118 123 IsDelta: o.UseDelta, 124 + 125 + DocumentRanksVersion: o.DocumentRanksVersion, 119 126 } 120 127 } 121 128 ··· 322 329 "-submodules=false", 323 330 } 324 331 325 - if rankingEnabled { 332 + if o.DocumentRanksVersion != "" { 326 333 // We store the document ranks as JSON in gitDir and tell zoekt-git-index where 327 334 // to find the file. 328 335 documentsRankFile := filepath.Join(gitDir, "documents.rank") ··· 354 361 sglog.Uint32("id", o.RepoID), 355 362 ) 356 363 } else { 357 - args = append(args, "-offline_ranking", documentsRankFile) 364 + args = append(args, 365 + "-offline_ranking", documentsRankFile, 366 + "-offline_ranking_version", o.DocumentRanksVersion) 358 367 } 359 368 } 360 369
-2
cmd/zoekt-sourcegraph-indexserver/main.go
··· 512 512 return ticker 513 513 } 514 514 515 - var rankingEnabled, _ = strconv.ParseBool(os.Getenv("ENABLE_EXPERIMENTAL_RANKING")) 516 - 517 515 // Index starts an index job for repo name at commit. 518 516 func (s *Server) Index(args *indexArgs) (state indexState, err error) { 519 517 tr := trace.New("index", args.Name)
+4
cmd/zoekt-sourcegraph-indexserver/sg.go
··· 617 617 Priority: float("SG_PRIORITY"), 618 618 } 619 619 620 + if stat, err := os.Stat(filepath.Join(dir, "SG_DOCUMENT_RANKS")); err == nil { 621 + opts.DocumentRanksVersion = stat.ModTime().String() 622 + } 623 + 620 624 branches, err := sf.getBranches(name) 621 625 if err != nil { 622 626 return opts, err