fork of https://github.com/sourcegraph/zoekt
0

Configure Feed

Select the types of activity you want to include in your feed.

Ranking: increase contribution of repo rank (#546)

The file score includes a "repo rank" signal, which is based on the
repository's number of stars. Previously, we were aggressively normalizing the
number of stars, which made the repo ranks small and close together. This PR
changes the normalization to spread it out better over the full range. This
increases its contribution to the score.

+104 -13
+3 -3
api.go
··· 19 19 "encoding/json" 20 20 "errors" 21 21 "fmt" 22 - "math" 23 22 "reflect" 24 23 "strconv" 25 24 "time" ··· 618 617 // based on priority. Setting it on read instead of during indexing 619 618 // allows us to avoid a complete reindex. 620 619 if r.Rank == 0 && r.priority > 0 { 621 - l := math.Log(float64(r.priority)) 622 - repo.Rank = uint16((1.0 - 1.0/math.Pow(1+l, 0.6)) * 10000) 620 + // Normalize the repo score within [0, 1), with the midpoint at 5,000. This means popular 621 + // repos (roughly ones with over 5,000 stars) see diminishing returns from more stars. 622 + r.Rank = uint16(r.priority / (5000.0 + r.priority) * maxUInt16) 623 623 } 624 624 } 625 625 return nil
+91
build/e2e_test.go
··· 1241 1241 }) 1242 1242 } 1243 1243 } 1244 + 1245 + func TestRepoRanks(t *testing.T) { 1246 + if os.Getenv("CI") == "" && checkCTags() == "" { 1247 + t.Skip("ctags not available") 1248 + } 1249 + dir := t.TempDir() 1250 + 1251 + opts := Options{ 1252 + IndexDir: dir, 1253 + RepositoryDescription: zoekt.Repository{ 1254 + Name: "repo", 1255 + }, 1256 + DocumentRanksVersion: "ranking", 1257 + } 1258 + 1259 + searchQuery := &query.Substring{Content: true, Pattern: "Inner"} 1260 + exampleJava, err := os.ReadFile("./testdata/example.java") 1261 + if err != nil { 1262 + t.Fatal(err) 1263 + } 1264 + 1265 + cases := []struct { 1266 + name string 1267 + repoRank uint16 1268 + wantScore float64 1269 + }{ 1270 + { 1271 + name: "no shard rank", 1272 + // 5500 (partial symbol at boundary) + 1000 (Java class) + 500 (word match) + 10 (file order) 1273 + wantScore: 7012.00, 1274 + }, 1275 + { 1276 + name: "medium shard rank", 1277 + repoRank: 30000, 1278 + // 5500 (partial symbol at boundary) + 1000 (Java class) + 500 (word match) + 10 (file order) + 9.16 (repo rank) 1279 + wantScore: 7021.16, 1280 + }, 1281 + { 1282 + name: "high shard rank", 1283 + repoRank: 60000, 1284 + // 5500 (partial symbol at boundary) + 1000 (Java class) + 500 (word match) + 10 (file order) + 18.31 (repo rank) 1285 + wantScore: 7030.31, 1286 + }, 1287 + } 1288 + 1289 + for _, c := range cases { 1290 + t.Run(c.name, func(t *testing.T) { 1291 + opts.RepositoryDescription = zoekt.Repository{ 1292 + Name: "repo", 1293 + Rank: c.repoRank, 1294 + } 1295 + 1296 + b, err := NewBuilder(opts) 1297 + if err != nil { 1298 + t.Fatalf("NewBuilder: %v", err) 1299 + } 1300 + 1301 + err = b.Add(zoekt.Document{Name: "example.java", Content: exampleJava}) 1302 + if err != nil { 1303 + t.Fatal(err) 1304 + } 1305 + 1306 + if err := b.Finish(); err != nil { 1307 + t.Fatalf("Finish: %v", err) 1308 + } 1309 + 1310 + ss, err := shards.NewDirectorySearcher(dir) 1311 + if err != nil { 1312 + t.Fatalf("NewDirectorySearcher(%s): %v", dir, err) 1313 + } 1314 + defer ss.Close() 1315 + 1316 + srs, err := ss.Search(context.Background(), searchQuery, &zoekt.SearchOptions{ 1317 + UseDocumentRanks: true, 1318 + DebugScore: true, 1319 + }) 1320 + 1321 + if err != nil { 1322 + t.Fatal(err) 1323 + } 1324 + 1325 + if got, want := len(srs.Files), 1; got != want { 1326 + t.Fatalf("file matches: want %d, got %d", want, got) 1327 + } 1328 + 1329 + if got := srs.Files[0].Score; math.Abs(got-c.wantScore) >= 0.01 { 1330 + t.Fatalf("score: want %f, got %f\ndebug: %s\ndebugscore: %s", c.wantScore, got, srs.Files[0].Debug, srs.Files[0].LineMatches[0].DebugScore) 1331 + } 1332 + }) 1333 + } 1334 + }
+1 -1
contentprovider.go
··· 468 468 // equal weight with the query-dependent signals. 469 469 scoreFileRankFactor = 9000.0 470 470 scoreFileOrderFactor = 10.0 471 - scoreShardRankFactor = 20.0 471 + scoreRepoRankFactor = 20.0 472 472 473 473 // Used for ordering line and chunk matches within a file. 474 474 scoreLineOrderFactor = 1.0
+1 -1
eval.go
··· 385 385 } 386 386 387 387 fileMatch.addScore("doc-order", scoreFileOrderFactor*(1.0-float64(nextDoc)/float64(len(d.boundaries))), opts.DebugScore) 388 - fileMatch.addScore("shard-order", scoreShardRankFactor*float64(md.Rank)/maxUInt16, opts.DebugScore) 388 + fileMatch.addScore("repo-rank", scoreRepoRankFactor*float64(md.Rank)/maxUInt16, opts.DebugScore) 389 389 390 390 fileMatch.Branches = d.gatherBranches(nextDoc, mt, known) 391 391 sortMatchesByScore(fileMatch.LineMatches)
+8 -8
shards/shards_test.go
··· 202 202 nextShardNum++ 203 203 } 204 204 205 - addShard("weekend-project", 0.25, zoekt.Document{Name: "f2", Content: []byte("foo bas")}) 206 - addShard("moderately-popular", 0.5, zoekt.Document{Name: "f3", Content: []byte("foo bar")}) 207 - addShard("weekend-project-2", 0.25, zoekt.Document{Name: "f2", Content: []byte("foo bas")}) 208 - addShard("super-star", 0.9, zoekt.Document{Name: "f1", Content: []byte("foo bar bas")}) 205 + addShard("weekend-project", 20, zoekt.Document{Name: "f2", Content: []byte("foo bas")}) 206 + addShard("moderately-popular", 500, zoekt.Document{Name: "f3", Content: []byte("foo bar")}) 207 + addShard("weekend-project-2", 20, zoekt.Document{Name: "f2", Content: []byte("foo bas")}) 208 + addShard("super-star", 5000, zoekt.Document{Name: "f1", Content: []byte("foo bar bas")}) 209 209 210 210 want := []string{ 211 211 "super-star", ··· 244 244 nextShardNum++ 245 245 } 246 246 247 - addShard("weekend-project", 0.25, zoekt.Document{Name: "f1", Content: []byte("foobar")}) 248 - addShard("moderately-popular", 0.4, zoekt.Document{Name: "f2", Content: []byte("foobaz")}) 249 - addShard("weekend-project-2", 0.25, zoekt.Document{Name: "f3", Content: []byte("foo bar")}) 250 - addShard("super-star", 0.9, zoekt.Document{Name: "f4", Content: []byte("foo baz")}, 247 + addShard("weekend-project", 20, zoekt.Document{Name: "f1", Content: []byte("foobar")}) 248 + addShard("moderately-popular", 500, zoekt.Document{Name: "f2", Content: []byte("foobaz")}) 249 + addShard("weekend-project-2", 20, zoekt.Document{Name: "f3", Content: []byte("foo bar")}) 250 + addShard("super-star", 5000, zoekt.Document{Name: "f4", Content: []byte("foo baz")}, 251 251 zoekt.Document{Name: "f5", Content: []byte("fooooo")}) 252 252 253 253 // Run a stream search and gather the results