fork of https://github.com/sourcegraph/zoekt
0

Configure Feed

Select the types of activity you want to include in your feed.

scoring: use repo freshness as tiebreaker (#832)

We ignore priority and instead use the latest commit date as repo rank.
This has a big impact for Sourcegraph because it means we switch from
star count to repo freshness as tiebreaker.

As a minor tweak, we also separate query based scores from tiebreakers.
To achieve this we reserve the last 7 digits of a score for tiebreakers:
- 5 digits (maxUint16) for repo rank
- 2 digits ([0,10]) for file order (2 digits).

Example:

Before:
score: 8775.35 <- atom(2):200, fragment:8550.00, repo-rank: 19, doc-order:6.35

After:
score: 8750_00019_06.35 <- atom(2):200, fragment:8550.00, repo-rank: 19, doc-order:6.35

+238 -144
+26 -3
api.go
··· 635 635 r.ID = uint32(id) 636 636 } 637 637 638 - if v, ok := repo.RawConfig["priority"]; ok { 638 + // Sourcegraph indexserver doesn't set repo.Rank, so we set it here. Setting it 639 + // on read instead of during indexing allows us to avoid a complete reindex. 640 + // 641 + // Prefer "latest_commit_date" over "priority" for ranking. We keep priority for 642 + // backwards compatibility. 643 + if _, ok := repo.RawConfig["latest_commit_date"]; ok { 644 + // We use the number of months since 1970 as a simple measure of repo freshness. 645 + // It is monotonically increasing and stable across re-indexes and restarts. 646 + r.Rank = monthsSince1970(repo.LatestCommitDate) 647 + } else if v, ok := repo.RawConfig["priority"]; ok { 639 648 r.priority, err = strconv.ParseFloat(v, 64) 640 649 if err != nil { 641 650 r.priority = 0 ··· 645 654 // based on priority. Setting it on read instead of during indexing 646 655 // allows us to avoid a complete reindex. 647 656 if r.Rank == 0 && r.priority > 0 { 648 - // Normalize the repo score within [0, 1), with the midpoint at 5,000. This means popular 649 - // repos (roughly ones with over 5,000 stars) see diminishing returns from more stars. 657 + // Normalize the repo score within [0, maxUint16), with the midpoint at 5,000. 658 + // This means popular repos (roughly ones with over 5,000 stars) see diminishing 659 + // returns from more stars. 650 660 r.Rank = uint16(r.priority / (5000.0 + r.priority) * maxUInt16) 651 661 } 652 662 } 663 + 653 664 return nil 665 + } 666 + 667 + // monthsSince1970 returns the number of months since 1970. It returns values in 668 + // the range [0, maxUInt16]. The upper bound is reached in the year 7431, the 669 + // lower bound for all dates before 1970. 670 + func monthsSince1970(t time.Time) uint16 { 671 + base := time.Unix(0, 0) 672 + if t.Before(base) { 673 + return 0 674 + } 675 + months := int(t.Year()-1970)*12 + int(t.Month()-1) 676 + return uint16(min(months, maxUInt16)) 654 677 } 655 678 656 679 // MergeMutable will merge x into r. mutated will be true if it made any
+26
api_test.go
··· 368 368 } 369 369 }) 370 370 } 371 + 372 + func TestMonthsSince1970(t *testing.T) { 373 + tests := []struct { 374 + name string 375 + input time.Time 376 + expected uint16 377 + }{ 378 + {"Before 1970", time.Date(1950, 12, 31, 0, 0, 0, 0, time.UTC), 0}, 379 + {"Unix 0", time.Date(1970, 1, 1, 0, 0, 0, 0, time.UTC), 0}, 380 + {"Feb 1970", time.Date(1970, 2, 1, 0, 0, 0, 0, time.UTC), 1}, 381 + {"Year 1989", time.Date(1989, 12, 13, 0, 0, 0, 0, time.UTC), 239}, 382 + {"Sep 2024", time.Date(2024, 9, 20, 0, 0, 0, 0, time.UTC), 656}, 383 + {"Oct 2024", time.Date(2024, 10, 20, 0, 0, 0, 0, time.UTC), 657}, 384 + {"Apr 7431", time.Date(7431, 4, 1, 0, 0, 0, 0, time.UTC), 65535}, 385 + {"9999", time.Date(9999, 0, 0, 0, 0, 0, 0, time.UTC), 65535}, 386 + } 387 + 388 + for _, tt := range tests { 389 + t.Run(tt.name, func(t *testing.T) { 390 + result := monthsSince1970(tt.input) 391 + if result != tt.expected { 392 + t.Errorf("expected %d, got %d", tt.expected, result) 393 + } 394 + }) 395 + } 396 + }
+105 -97
build/scoring_test.go
··· 40 40 fileName: "a/b/c/config.go", 41 41 query: &query.Substring{FileName: true, Pattern: "config"}, 42 42 language: "Go", 43 - // 5500 (partial base at boundary) + 500 (word) + 10 (file order) 44 - wantScore: 6010, 43 + // 5500 (partial base at boundary) + 500 (word) 44 + wantScore: 6000, 45 45 }, 46 46 { 47 47 fileName: "a/b/c/config.go", 48 48 query: &query.Substring{FileName: true, Pattern: "config.go"}, 49 49 language: "Go", 50 - // 7000 (full base match) + 500 (word) + 10 (file order) 51 - wantScore: 7510, 50 + // 7000 (full base match) + 500 (word) 51 + wantScore: 7500, 52 52 }, 53 53 { 54 54 fileName: "a/config/c/d.go", 55 55 query: &query.Substring{FileName: true, Pattern: "config"}, 56 56 language: "Go", 57 - // 500 (word) + 10 (file order) 58 - wantScore: 510, 57 + // 500 (word) 58 + wantScore: 500, 59 59 }, 60 60 } 61 61 ··· 128 128 content: exampleJava, 129 129 query: &query.Substring{Content: true, Pattern: "nerClass"}, 130 130 language: "Java", 131 - // 5500 (partial symbol at boundary) + 1000 (Java class) + 50 (partial word) + 10 (file order) 132 - wantScore: 6560, 131 + // 5500 (partial symbol at boundary) + 1000 (Java class) + 50 (partial word) 132 + wantScore: 6550, 133 133 }, 134 134 { 135 135 fileName: "example.java", 136 136 content: exampleJava, 137 137 query: &query.Substring{Content: true, Pattern: "StaticClass"}, 138 138 language: "Java", 139 - // 5500 (partial symbol at boundary) + 1000 (Java class) + 500 (word) + 10 (file order) 140 - wantScore: 7010, 139 + // 5500 (partial symbol at boundary) + 1000 (Java class) + 500 (word) 140 + wantScore: 7000, 141 141 }, 142 142 { 143 143 fileName: "example.java", 144 144 content: exampleJava, 145 145 query: &query.Substring{Content: true, Pattern: "innerEnum"}, 146 146 language: "Java", 147 - // 7000 (symbol) + 900 (Java enum) + 500 (word) + 10 (file order) 148 - wantScore: 8410, 147 + // 7000 (symbol) + 900 (Java enum) + 500 (word) 148 + wantScore: 8400, 149 149 }, 150 150 { 151 151 fileName: "example.java", 152 152 content: exampleJava, 153 153 query: &query.Substring{Content: true, Pattern: "innerInterface"}, 154 154 language: "Java", 155 - // 7000 (symbol) + 800 (Java interface) + 500 (word) + 10 (file order) 156 - wantScore: 8310, 155 + // 7000 (symbol) + 800 (Java interface) + 500 (word) 156 + wantScore: 8300, 157 157 }, 158 158 { 159 159 fileName: "example.java", 160 160 content: exampleJava, 161 161 query: &query.Substring{Content: true, Pattern: "innerMethod"}, 162 162 language: "Java", 163 - // 7000 (symbol) + 700 (Java method) + 500 (word) + 10 (file order) 164 - wantScore: 8210, 163 + // 7000 (symbol) + 700 (Java method) + 500 (word) 164 + wantScore: 8200, 165 165 }, 166 166 { 167 167 fileName: "example.java", 168 168 content: exampleJava, 169 169 query: &query.Substring{Content: true, Pattern: "field"}, 170 170 language: "Java", 171 - // 7000 (symbol) + 600 (Java field) + 500 (word) + 10 (file order) 172 - wantScore: 8110, 171 + // 7000 (symbol) + 600 (Java field) + 500 (word) 172 + wantScore: 8100, 173 173 }, 174 174 { 175 175 fileName: "example.java", 176 176 content: exampleJava, 177 177 query: &query.Substring{Content: true, Pattern: "B"}, 178 178 language: "Java", 179 - // 7000 (symbol) + 500 (Java enum constant) + 500 (word) + 10 (file order) 180 - wantScore: 8010, 179 + // 7000 (symbol) + 500 (Java enum constant) + 500 (word) 180 + wantScore: 8000, 181 181 }, 182 182 // 2 Atoms (1x content and 1x filename) 183 183 { ··· 185 185 content: exampleJava, 186 186 query: &query.Substring{Pattern: "example"}, // matches filename and a Java field 187 187 language: "Java", 188 - // 5500 (edge symbol) + 600 (Java field) + 500 (word) + 200 (atom) + 10 (file order) 189 - wantScore: 6810, 188 + // 5500 (edge symbol) + 600 (Java field) + 500 (word) + 200 (atom) 189 + wantScore: 6800, 190 190 }, 191 191 // 3 Atoms (2x content, 1x filename) 192 192 { ··· 197 197 &query.Substring{Content: true, Pattern: "runInnerInterface"}, // matches a Java method 198 198 }}, 199 199 language: "Java", 200 - // 7000 (symbol) + 700 (Java method) + 500 (word) + 266.67 (atom) + 10 (file order) 201 - wantScore: 8476.667, 200 + // 7000 (symbol) + 700 (Java method) + 500 (word) + 266.67 (atom) 201 + wantScore: 8466, 202 202 }, 203 203 // 4 Atoms (4x content) 204 204 { ··· 211 211 &query.Substring{Content: true, Pattern: "app"}, 212 212 }}, 213 213 language: "Java", 214 - // 7000 (symbol) + 900 (Java enum) + 500 (word) + 300 (atom) + 10 (file order) 215 - wantScore: 8710, 214 + // 7000 (symbol) + 900 (Java enum) + 500 (word) + 300 (atom) 215 + wantScore: 8700, 216 216 }, 217 217 { 218 218 fileName: "example.java", 219 219 content: exampleJava, 220 220 query: &query.Substring{Content: true, Pattern: "unInnerInterface("}, 221 221 language: "Java", 222 - // 4000 (overlap Symbol) + 700 (Java method) + 50 (partial word) + 10 (file order) 223 - wantScore: 4760, 222 + // 4000 (overlap Symbol) + 700 (Java method) + 50 (partial word) 223 + wantScore: 4750, 224 224 }, 225 225 { 226 226 fileName: "example.java", 227 227 content: exampleJava, 228 228 query: &query.Substring{Content: true, Pattern: "InnerEnum"}, 229 229 language: "Java", 230 - // 7000 (Symbol) + 900 (Java enum) + 500 (word) + 10 (file order) 231 - wantScore: 8410, 230 + // 7000 (Symbol) + 900 (Java enum) + 500 (word) 231 + wantScore: 8400, 232 232 }, 233 233 { 234 234 fileName: "example.java", 235 235 content: exampleJava, 236 236 query: &query.Substring{Content: true, Pattern: "enum InnerEnum"}, 237 237 language: "Java", 238 - // 5500 (edge Symbol) + 900 (Java enum) + 500 (word) + 10 (file order) 239 - wantScore: 6910, 238 + // 5500 (edge Symbol) + 900 (Java enum) + 500 (word) 239 + wantScore: 6900, 240 240 }, 241 241 { 242 242 fileName: "example.java", 243 243 content: exampleJava, 244 244 query: &query.Substring{Content: true, Pattern: "public enum InnerEnum {"}, 245 245 language: "Java", 246 - // 4000 (overlap Symbol) + 900 (Java enum) + 500 (word) + 10 (file order) 247 - wantScore: 5410, 246 + // 4000 (overlap Symbol) + 900 (Java enum) + 500 (word) 247 + wantScore: 5400, 248 248 }, 249 249 } 250 250 ··· 265 265 content: exampleKotlin, 266 266 query: &query.Substring{Content: true, Pattern: "oxyPreloader"}, 267 267 language: "Kotlin", 268 - // 5500 (partial symbol at boundary) + 1000 (Kotlin class) + 50 (partial word) + 10 (file order) 269 - wantScore: 6560, 268 + // 5500 (partial symbol at boundary) + 1000 (Kotlin class) + 50 (partial word) 269 + wantScore: 6550, 270 270 }, 271 271 { 272 272 fileName: "example.kt", 273 273 content: exampleKotlin, 274 274 query: &query.Substring{Content: true, Pattern: "ViewMetadata"}, 275 275 language: "Kotlin", 276 - // 7000 (symbol) + 900 (Kotlin interface) + 500 (word) + 10 (file order) 277 - wantScore: 8410, 276 + // 7000 (symbol) + 900 (Kotlin interface) + 500 (word) 277 + wantScore: 8400, 278 278 }, 279 279 { 280 280 fileName: "example.kt", 281 281 content: exampleKotlin, 282 282 query: &query.Substring{Content: true, Pattern: "onScrolled"}, 283 283 language: "Kotlin", 284 - // 7000 (symbol) + 800 (Kotlin method) + 500 (word) + 10 (file order) 285 - wantScore: 8310, 284 + // 7000 (symbol) + 800 (Kotlin method) + 500 (word) 285 + wantScore: 8300, 286 286 }, 287 287 { 288 288 fileName: "example.kt", 289 289 content: exampleKotlin, 290 290 query: &query.Substring{Content: true, Pattern: "PreloadErrorHandler"}, 291 291 language: "Kotlin", 292 - // 7000 (symbol) + 700 (Kotlin typealias) + 500 (word) + 10 (file order) 293 - wantScore: 8210, 292 + // 7000 (symbol) + 700 (Kotlin typealias) + 500 (word) 293 + wantScore: 8200, 294 294 }, 295 295 { 296 296 fileName: "example.kt", 297 297 content: exampleKotlin, 298 298 query: &query.Substring{Content: true, Pattern: "FLING_THRESHOLD_PX"}, 299 299 language: "Kotlin", 300 - // 7000 (symbol) + 600 (Kotlin constant) + 500 (word) + 10 (file order) 301 - wantScore: 8110, 300 + // 7000 (symbol) + 600 (Kotlin constant) + 500 (word) 301 + wantScore: 8100, 302 302 }, 303 303 { 304 304 fileName: "example.kt", 305 305 content: exampleKotlin, 306 306 query: &query.Substring{Content: true, Pattern: "scrollState"}, 307 307 language: "Kotlin", 308 - // 7000 (symbol) + 500 (Kotlin variable) + 500 (word) + 10 (file order) 309 - wantScore: 8010, 308 + // 7000 (symbol) + 500 (Kotlin variable) + 500 (word) 309 + wantScore: 8000, 310 310 }, 311 311 } 312 312 ··· 330 330 content: exampleCpp, 331 331 query: &query.Substring{Content: true, Pattern: "FooClass"}, 332 332 language: "C++", 333 - // 7000 (Symbol) + 1000 (C++ class) + 500 (full word) + 10 (file order) 334 - wantScore: 8510, 333 + // 7000 (Symbol) + 1000 (C++ class) + 500 (full word) 334 + wantScore: 8500, 335 335 }, 336 336 { 337 337 fileName: "example.cc", 338 338 content: exampleCpp, 339 339 query: &query.Substring{Content: true, Pattern: "NestedEnum"}, 340 340 language: "C++", 341 - // 7000 (Symbol) + 900 (C++ enum) + 500 (full word) + 10 (file order) 342 - wantScore: 8410, 341 + // 7000 (Symbol) + 900 (C++ enum) + 500 (full word) 342 + wantScore: 8400, 343 343 }, 344 344 { 345 345 fileName: "example.cc", 346 346 content: exampleCpp, 347 347 query: &query.Substring{Content: true, Pattern: "main"}, 348 348 language: "C++", 349 - // 7000 (Symbol) + 800 (C++ function) + 500 (full word) + 10 (file order) 350 - wantScore: 8310, 349 + // 7000 (Symbol) + 800 (C++ function) + 500 (full word) 350 + wantScore: 8300, 351 351 }, 352 352 { 353 353 fileName: "example.cc", 354 354 content: exampleCpp, 355 355 query: &query.Substring{Content: true, Pattern: "FooStruct"}, 356 356 language: "C++", 357 - // 7000 (Symbol) + 700 (C++ struct) + 500 (full word) + 10 (file order) 358 - wantScore: 8210, 357 + // 7000 (Symbol) + 700 (C++ struct) + 500 (full word) 358 + wantScore: 8200, 359 359 }, 360 360 { 361 361 fileName: "example.cc", 362 362 content: exampleCpp, 363 363 query: &query.Substring{Content: true, Pattern: "TheUnion"}, 364 364 language: "C++", 365 - // 7000 (Symbol) + 600 (C++ union) + 500 (full word) + 10 (file order) 366 - wantScore: 8110, 365 + // 7000 (Symbol) + 600 (C++ union) + 500 (full word) 366 + wantScore: 8100, 367 367 }, 368 368 } 369 369 ··· 387 387 content: examplePython, 388 388 query: &query.Substring{Content: true, Pattern: "C1"}, 389 389 language: "Python", 390 - // 7000 (symbol) + 1000 (Python class) + 500 (word) + 10 (file order) 391 - wantScore: 8510, 390 + // 7000 (symbol) + 1000 (Python class) + 500 (word) 391 + wantScore: 8500, 392 392 }, 393 393 { 394 394 fileName: "example.py", 395 395 content: examplePython, 396 396 query: &query.Substring{Content: true, Pattern: "g"}, 397 397 language: "Python", 398 - // 7000 (symbol) + 800 (Python function) + 500 (word) + 10 (file order) 399 - wantScore: 8310, 398 + // 7000 (symbol) + 800 (Python function) + 500 (word) 399 + wantScore: 8300, 400 400 }, 401 401 } 402 402 ··· 412 412 content: examplePython, 413 413 query: &query.Substring{Content: true, Pattern: "__init__"}, 414 414 language: "Python", 415 - // 7000 (symbol) + 800 (Python method) + 50 (partial word) + 10 (file order) 416 - wantScore: 7860, 415 + // 7000 (symbol) + 800 (Python method) + 50 (partial word) 416 + wantScore: 7850, 417 417 } 418 418 419 419 checkScoring(t, scipOnlyCase, false, ctags.ScipCTags) ··· 431 431 content: exampleRuby, 432 432 query: &query.Substring{Content: true, Pattern: "Parental"}, 433 433 language: "Ruby", 434 - // 7000 (symbol) + 1000 (Ruby class) + 500 (word) + 10 (file order) 435 - wantScore: 8510, 434 + // 7000 (symbol) + 1000 (Ruby class) + 500 (word) 435 + wantScore: 8500, 436 436 }, 437 437 { 438 438 fileName: "example.rb", 439 439 content: exampleRuby, 440 440 query: &query.Substring{Content: true, Pattern: "parental_func"}, 441 441 language: "Ruby", 442 - // 7000 (symbol) + 900 (Ruby method) + 500 (word) + 10 (file order) 443 - wantScore: 8410, 442 + // 7000 (symbol) + 900 (Ruby method) + 500 (word) 443 + wantScore: 8400, 444 444 }, 445 445 { 446 446 fileName: "example.rb", 447 447 content: exampleRuby, 448 448 query: &query.Substring{Content: true, Pattern: "MyModule"}, 449 449 language: "Ruby", 450 - // 7000 (symbol) + 500 (Ruby module) + 500 (word) + 10 (file order) 451 - wantScore: 8210, 450 + // 7000 (symbol) + 500 (Ruby module) + 500 (word) 451 + wantScore: 8200, 452 452 }, 453 453 } 454 454 ··· 471 471 content: exampleScala, 472 472 query: &query.Substring{Content: true, Pattern: "SymbolIndexBucket"}, 473 473 language: "Scala", 474 - // 7000 (symbol) + 1000 (Scala class) + 500 (word) + 10 (file order) 475 - wantScore: 8510, 474 + // 7000 (symbol) + 1000 (Scala class) + 500 (word) 475 + wantScore: 8500, 476 476 }, 477 477 { 478 478 fileName: "example.scala", 479 479 content: exampleScala, 480 480 query: &query.Substring{Content: true, Pattern: "stdLibPatches"}, 481 481 language: "Scala", 482 - // 7000 (symbol) + 800 (Scala object) + 500 (word) + 10 (file order) 483 - wantScore: 8310, 482 + // 7000 (symbol) + 800 (Scala object) + 500 (word) 483 + wantScore: 8300, 484 484 }, 485 485 { 486 486 fileName: "example.scala", 487 487 content: exampleScala, 488 488 query: &query.Substring{Content: true, Pattern: "close"}, 489 489 language: "Scala", 490 - // 7000 (symbol) + 700 (Scala method) + 500 (word) + 10 (file order) 491 - wantScore: 8210, 490 + // 7000 (symbol) + 700 (Scala method) + 500 (word) 491 + wantScore: 8200, 492 492 }, 493 493 { 494 494 fileName: "example.scala", 495 495 content: exampleScala, 496 496 query: &query.Substring{Content: true, Pattern: "javaSymbol"}, 497 497 language: "Scala", 498 - // 7000 (symbol) + 500 (Scala method) + 500 (word) + 10 (file order) 499 - wantScore: 8010, 498 + // 7000 (symbol) + 500 (Scala method) + 500 (word) 499 + wantScore: 8000, 500 500 }, 501 501 } 502 502 ··· 516 516 `), 517 517 query: &query.Substring{Content: true, Pattern: "aInterface"}, 518 518 language: "Go", 519 - // 7000 (full base match) + 1000 (Go interface) + 500 (word) + 10 (file order) 520 - wantScore: 8510, 519 + // 7000 (full base match) + 1000 (Go interface) + 500 (word) 520 + wantScore: 8500, 521 521 }, 522 522 { 523 523 fileName: "src/net/http/client.go", ··· 527 527 `), 528 528 query: &query.Substring{Content: true, Pattern: "aStruct"}, 529 529 language: "Go", 530 - // 7000 (full base match) + 900 (Go struct) + 500 (word) + 10 (file order) 531 - wantScore: 8410, 530 + // 7000 (full base match) + 900 (Go struct) + 500 (word) 531 + wantScore: 8400, 532 532 }, 533 533 { 534 534 fileName: "src/net/http/client.go", ··· 538 538 `), 539 539 query: &query.Substring{Content: true, Pattern: "aFunc"}, 540 540 language: "Go", 541 - // 7000 (full base match) + 800 (Go function) + 500 (word) + 10 (file order) 542 - wantScore: 8310, 541 + // 7000 (full base match) + 800 (Go function) + 500 (word) 542 + wantScore: 8300, 543 543 }, 544 544 { 545 545 fileName: "src/net/http/client.go", ··· 554 554 &query.Symbol{Expr: &query.Substring{Pattern: "Get", Content: true}}, 555 555 }}, 556 556 language: "Go", 557 - // 7000 (full base match) + 800 (Go func) + 50 (Exported Go) + 500 (word) + 200 (atom) + 10 (file order) 558 - wantScore: 8560, 557 + // 7000 (full base match) + 800 (Go func) + 50 (Exported Go) + 500 (word) + 200 (atom) 558 + wantScore: 8550, 559 559 }, 560 560 } 561 561 ··· 636 636 t.Fatalf("file matches: want %d, got %d", want, got) 637 637 } 638 638 639 - if got := srs.Files[0].Score; math.Abs(got-c.wantScore) > epsilon { 639 + if got := withoutTiebreaker(srs.Files[0].Score, useBM25); math.Abs(got-c.wantScore) > epsilon { 640 640 t.Fatalf("score: want %f, got %f\ndebug: %s\ndebugscore: %s", c.wantScore, got, srs.Files[0].Debug, srs.Files[0].ChunkMatches[0].DebugScore) 641 641 } 642 642 ··· 646 646 }) 647 647 } 648 648 649 + // helper to remove the tiebreaker from the score for easier comparison 650 + func withoutTiebreaker(fullScore float64, useBM25 bool) float64 { 651 + if useBM25 { 652 + return fullScore 653 + } 654 + return math.Trunc(fullScore / zoekt.ScoreOffset) 655 + } 656 + 649 657 func TestDocumentRanks(t *testing.T) { 650 658 requireCTags(t) 651 659 dir := t.TempDir() ··· 672 680 }{ 673 681 { 674 682 name: "score with no document ranks", 675 - // 5500 (partial symbol at boundary) + 1000 (Java class) + 500 (word match) + 10 (file order) 676 - wantScore: 7010.00, 683 + // 5500 (partial symbol at boundary) + 1000 (Java class) + 500 (word match) 684 + wantScore: 7000.00, 677 685 }, 678 686 { 679 687 name: "score with document ranks", 680 688 documentRank: 0.8, 681 - // 5500 (partial symbol at boundary) + 1000 (Java class) + 500 (word match) + 225 (file rank) + 10 (file order) 682 - wantScore: 7235.00, 689 + // 5500 (partial symbol at boundary) + 1000 (Java class) + 500 (word match) + 225 (file rank) 690 + wantScore: 7225.00, 683 691 }, 684 692 { 685 693 name: "score with custom document ranks weight", 686 694 documentRank: 0.8, 687 695 documentRanksWeight: 1000.0, 688 - // 5500 (partial symbol at boundary) + 1000 (Java class) + 500 (word match) + 25.00 (file rank) + 10 (file order) 689 - wantScore: 7035.00, 696 + // 5500 (partial symbol at boundary) + 1000 (Java class) + 500 (word match) + 25.00 (file rank) 697 + wantScore: 7025.00, 690 698 }, 691 699 } 692 700 ··· 725 733 t.Fatalf("file matches: want %d, got %d", want, got) 726 734 } 727 735 728 - if got := srs.Files[0].Score; got != c.wantScore { 736 + if got := withoutTiebreaker(srs.Files[0].Score, false); got != c.wantScore { 729 737 t.Fatalf("score: want %f, got %f\ndebug: %s\ndebugscore: %s", c.wantScore, got, srs.Files[0].Debug, srs.Files[0].LineMatches[0].DebugScore) 730 738 } 731 739 }) ··· 758 766 { 759 767 name: "no shard rank", 760 768 // 5500 (partial symbol at boundary) + 1000 (Java class) + 500 (word match) + 10 (file order) 761 - wantScore: 7010.00, 769 + wantScore: 7000_00000_10.00, 762 770 }, 763 771 { 764 772 name: "medium shard rank", 765 773 repoRank: 30000, 766 - // 5500 (partial symbol at boundary) + 1000 (Java class) + 500 (word match) + 10 (file order) + 9.16 (repo rank) 767 - wantScore: 7019.16, 774 + // 5500 (partial symbol at boundary) + 1000 (Java class) + 500 (word match) + 30000 (repo rank) + 10 (file order) 775 + wantScore: 7000_30000_10.00, 768 776 }, 769 777 { 770 778 name: "high shard rank", 771 779 repoRank: 60000, 772 - // 5500 (partial symbol at boundary) + 1000 (Java class) + 500 (word match) + 10 (file order) + 18.31 (repo rank) 773 - wantScore: 7028.31, 780 + // 5500 (partial symbol at boundary) + 1000 (Java class) + 500 (word match) + 60000 (repo rank) + 10 (file order) 781 + wantScore: 7000_60000_10.00, 774 782 }, 775 783 } 776 784
+2
cmd/zoekt-sourcegraph-indexserver/index.go
··· 121 121 "public": marshalBool(o.Public), 122 122 "fork": marshalBool(o.Fork), 123 123 "archived": marshalBool(o.Archived), 124 + // Calculate repo rank based on the latest commit date. 125 + "latest_commit_date": "1", 124 126 }, 125 127 }, 126 128 IndexDir: o.IndexDir,
+7 -2
cmd/zoekt-sourcegraph-indexserver/index_test.go
··· 14 14 "time" 15 15 16 16 "github.com/sourcegraph/log/logtest" 17 - proto "github.com/sourcegraph/zoekt/cmd/zoekt-sourcegraph-indexserver/protos/sourcegraph/zoekt/configuration/v1" 18 - "github.com/sourcegraph/zoekt/ctags" 19 17 "google.golang.org/grpc" 20 18 "google.golang.org/protobuf/testing/protocmp" 21 19 "google.golang.org/protobuf/types/known/timestamppb" 20 + 21 + proto "github.com/sourcegraph/zoekt/cmd/zoekt-sourcegraph-indexserver/protos/sourcegraph/zoekt/configuration/v1" 22 + "github.com/sourcegraph/zoekt/ctags" 22 23 23 24 "github.com/google/go-cmp/cmp" 24 25 "github.com/google/go-cmp/cmp/cmpopts" ··· 493 494 "git -C $TMPDIR/test%2Frepo.git update-ref HEAD deadbeef", 494 495 "git -C $TMPDIR/test%2Frepo.git config zoekt.archived 0", 495 496 "git -C $TMPDIR/test%2Frepo.git config zoekt.fork 0", 497 + "git -C $TMPDIR/test%2Frepo.git config zoekt.latest_commit_date 1", 496 498 "git -C $TMPDIR/test%2Frepo.git config zoekt.name test/repo", 497 499 "git -C $TMPDIR/test%2Frepo.git config zoekt.priority 0", 498 500 "git -C $TMPDIR/test%2Frepo.git config zoekt.public 0", ··· 515 517 "git -C $TMPDIR/test%2Frepo.git update-ref HEAD deadbeef", 516 518 "git -C $TMPDIR/test%2Frepo.git config zoekt.archived 0", 517 519 "git -C $TMPDIR/test%2Frepo.git config zoekt.fork 0", 520 + "git -C $TMPDIR/test%2Frepo.git config zoekt.latest_commit_date 1", 518 521 "git -C $TMPDIR/test%2Frepo.git config zoekt.name test/repo", 519 522 "git -C $TMPDIR/test%2Frepo.git config zoekt.priority 0", 520 523 "git -C $TMPDIR/test%2Frepo.git config zoekt.public 0", ··· 546 549 "git -C $TMPDIR/test%2Frepo.git update-ref refs/heads/dev feebdaed", 547 550 "git -C $TMPDIR/test%2Frepo.git config zoekt.archived 0", 548 551 "git -C $TMPDIR/test%2Frepo.git config zoekt.fork 0", 552 + "git -C $TMPDIR/test%2Frepo.git config zoekt.latest_commit_date 1", 549 553 "git -C $TMPDIR/test%2Frepo.git config zoekt.name test/repo", 550 554 "git -C $TMPDIR/test%2Frepo.git config zoekt.priority 0", 551 555 "git -C $TMPDIR/test%2Frepo.git config zoekt.public 0", ··· 593 597 "git -C $TMPDIR/test%2Frepo.git update-ref refs/heads/release 12345678", 594 598 "git -C $TMPDIR/test%2Frepo.git config zoekt.archived 0", 595 599 "git -C $TMPDIR/test%2Frepo.git config zoekt.fork 0", 600 + "git -C $TMPDIR/test%2Frepo.git config zoekt.latest_commit_date 1", 596 601 "git -C $TMPDIR/test%2Frepo.git config zoekt.name test/repo", 597 602 "git -C $TMPDIR/test%2Frepo.git config zoekt.priority 0", 598 603 "git -C $TMPDIR/test%2Frepo.git config zoekt.public 0",
+8 -3
contentprovider.go
··· 530 530 531 531 // File-only scoring signals. For now these are also bounded ~9000 to give them 532 532 // equal weight with the query-dependent signals. 533 - scoreFileRankFactor = 9000.0 534 - scoreFileOrderFactor = 10.0 535 - scoreRepoRankFactor = 20.0 533 + scoreFileRankFactor = 9000.0 536 534 537 535 // Used for ordering line and chunk matches within a file. 538 536 scoreLineOrderFactor = 1.0 537 + 538 + // Used for tiebreakers. The scores are not combined with the main score, but 539 + // are used to break ties between matches with the same score. The factors are 540 + // chosen to separate the tiebreakers from the main score and from each other. 541 + // If you make changes here, make sure to update indexData.scoreFile too. 542 + scoreRepoRankFactor = 100.0 543 + scoreFileOrderFactor = 10.0 539 544 ) 540 545 541 546 // findMaxOverlappingSection returns the index of the section in secs that
+5
internal/e2e/e2e_rank_test.go
··· 251 251 err := archive.Index(opts, build.Options{ 252 252 IndexDir: indexDir, 253 253 CTagsMustSucceed: true, 254 + RepositoryDescription: zoekt.Repository{ 255 + // Use the latest commit date to calculate the repo rank when loading the shard. 256 + // This is the same setting we use in production. 257 + RawConfig: map[string]string{"latest_commit_date": "1"}, 258 + }, 254 259 }) 255 260 if err != nil { 256 261 return fmt.Errorf("failed to index %s: %w", opts.Archive, err)
+13 -13
internal/e2e/testdata/WaitGroup.txt
··· 1 1 queryString: WaitGroup 2 2 query: case_substr:"WaitGroup" 3 - targetRank: 2 4 - 5 - github.com/golang/go/src/sync/waitgroup.go 6 - 23:type WaitGroup struct { 7 - 91:func (wg *WaitGroup) Wait() { 8 - 13:// A WaitGroup waits for a collection of goroutines to finish. 9 - hidden 13 more line matches 3 + targetRank: 1 10 4 11 5 **github.com/sourcegraph/conc/waitgroup.go** 12 6 22:type WaitGroup struct { ··· 14 8 38:func (h *WaitGroup) Wait() { 15 9 hidden 10 more line matches 16 10 11 + github.com/golang/go/src/sync/waitgroup.go 12 + 23:type WaitGroup struct { 13 + 91:func (wg *WaitGroup) Wait() { 14 + 13:// A WaitGroup waits for a collection of goroutines to finish. 15 + hidden 13 more line matches 16 + 17 17 github.com/golang/go/test/fixedbugs/issue19467.dir/mysync.go 18 18 9:type WaitGroup struct { 19 19 13:func (wg *WaitGroup) Add(x int) { ··· 24 24 16:func NewStoppableWaitGroup() *StoppableWaitGroup { 25 25 7:// A StoppableWaitGroup waits for a collection of goroutines to finish. 26 26 hidden 3 more line matches 27 + 28 + github.com/sourcegraph/conc/waitgroup_test.go 29 + 13:func ExampleWaitGroup() { 30 + 42:func TestWaitGroup(t *testing.T) { 31 + 29:func ExampleWaitGroup_WaitAndRecover() { 32 + hidden 12 more line matches 27 33 28 34 github.com/golang/go/src/sync/example_test.go 29 35 20:func ExampleWaitGroup() { 30 36 19:// using a WaitGroup to block until all the fetches are complete. 31 37 21: var wg sync.WaitGroup 32 38 hidden 1 more line matches 33 - 34 - github.com/sourcegraph/conc/waitgroup_test.go 35 - 13:func ExampleWaitGroup() { 36 - 42:func TestWaitGroup(t *testing.T) { 37 - 29:func ExampleWaitGroup_WaitAndRecover() { 38 - hidden 12 more line matches 39 39 40 40 hidden 227 more file matches
+2 -2
internal/e2e/testdata/rank_stats.txt
··· 1 1 queries: 16 2 - recall@1: 8 (50%) 2 + recall@1: 9 (56%) 3 3 recall@5: 11 (69%) 4 - mrr: 0.600787 4 + mrr: 0.632037
+23 -3
score.go
··· 21 21 "strings" 22 22 ) 23 23 24 - const maxUInt16 = 0xffff 24 + const ( 25 + maxUInt16 = 0xffff 26 + ScoreOffset = 10_000_000 27 + ) 25 28 26 29 // addScore increments the score of the FileMatch by the computed score. If 27 30 // debugScore is true, it also adds a debug string to the FileMatch. If raw is ··· 99 102 } 100 103 } 101 104 105 + // Add tiebreakers 106 + // 107 + // ScoreOffset shifts the score 7 digits to the left. 108 + fileMatch.Score = math.Trunc(fileMatch.Score) * ScoreOffset 109 + 102 110 md := d.repoMetaData[d.repos[doc]] 111 + 112 + // md.Rank lies in the range [0, 65535]. Hence, we have to allocate 5 digits for 113 + // the rank. The scoreRepoRankFactor shifts the rank score 2 digits to the left, 114 + // reserving digits 3-7 for the repo rank. 115 + addScore("repo-rank", scoreRepoRankFactor*float64(md.Rank)) 116 + 117 + // digits 1-2 and the decimals are reserved for the doc order. Doc order 118 + // (without the scaling factor) lies in the range [0, 1]. The upper bound is 119 + // achieved for matches in the first document of a shard. 103 120 addScore("doc-order", scoreFileOrderFactor*(1.0-float64(doc)/float64(len(d.boundaries)))) 104 - addScore("repo-rank", scoreRepoRankFactor*float64(md.Rank)/maxUInt16) 105 121 106 122 if opts.DebugScore { 107 - fileMatch.Debug = fmt.Sprintf("score: %.2f <- %s", fileMatch.Score, strings.TrimSuffix(fileMatch.Debug, ", ")) 123 + // To make the debug output easier to read, we split the score into the query 124 + // dependent score and the tiebreaker 125 + score := math.Trunc(fileMatch.Score / ScoreOffset) 126 + tiebreaker := fileMatch.Score - score*ScoreOffset 127 + fileMatch.Debug = fmt.Sprintf("score: %d (%.2f) <- %s", int(score), tiebreaker, strings.TrimSuffix(fileMatch.Debug, ", ")) 108 128 } 109 129 } 110 130
+7 -7
shards/shards_test.go
··· 229 229 ss := newShardedSearcher(1) 230 230 231 231 var nextShardNum int 232 - addShard := func(repo string, priority float64, docs ...zoekt.Document) { 232 + addShard := func(repo string, rank uint16, docs ...zoekt.Document) { 233 233 r := &zoekt.Repository{ID: hash(repo), Name: repo} 234 234 r.RawConfig = map[string]string{ 235 - "public": "1", 236 - "priority": strconv.FormatFloat(priority, 'f', 2, 64), 235 + "public": "1", 237 236 } 237 + r.Rank = rank 238 238 b := testIndexBuilder(t, r, docs...) 239 239 shard := searcherForTest(t, b) 240 240 ss.replace(map[string]zoekt.Searcher{ ··· 243 243 nextShardNum++ 244 244 } 245 245 246 - addShard("weekend-project", 20, zoekt.Document{Name: "f1", Content: []byte("foobar")}) 247 - addShard("moderately-popular", 500, zoekt.Document{Name: "f2", Content: []byte("foobaz")}) 248 - addShard("weekend-project-2", 20, zoekt.Document{Name: "f3", Content: []byte("foo bar")}) 249 - addShard("super-star", 5000, zoekt.Document{Name: "f4", Content: []byte("foo baz")}, 246 + addShard("old-project", 1, zoekt.Document{Name: "f1", Content: []byte("foobar")}) 247 + addShard("recent", 2, zoekt.Document{Name: "f2", Content: []byte("foobaz")}) 248 + addShard("old-project-2", 1, zoekt.Document{Name: "f3", Content: []byte("foo bar")}) 249 + addShard("new", 3, zoekt.Document{Name: "f4", Content: []byte("foo baz")}, 250 250 zoekt.Document{Name: "f5", Content: []byte("fooooo")}) 251 251 252 252 // Run a stream search and gather the results
+4 -4
testdata/golden/TestReadSearch/ctagsrepo_v16.00000.golden
··· 29 29 } 30 30 ], 31 31 "Checksum": "n9fUYqacPXg=", 32 - "Score": 6810 32 + "Score": 68000000010 33 33 } 34 34 ], 35 35 [ ··· 59 59 } 60 60 ], 61 61 "Checksum": "n9fUYqacPXg=", 62 - "Score": 510 62 + "Score": 5000000010 63 63 } 64 64 ], 65 65 [ ··· 94 94 } 95 95 ], 96 96 "Checksum": "n9fUYqacPXg=", 97 - "Score": 8010 97 + "Score": 80000000010 98 98 } 99 99 ], 100 100 [ ··· 129 129 } 130 130 ], 131 131 "Checksum": "n9fUYqacPXg=", 132 - "Score": 6060 132 + "Score": 60500000010 133 133 } 134 134 ] 135 135 ]
+4 -4
testdata/golden/TestReadSearch/ctagsrepo_v17.00000.golden
··· 29 29 } 30 30 ], 31 31 "Checksum": "n9fUYqacPXg=", 32 - "Score": 6810 32 + "Score": 68000000010 33 33 } 34 34 ], 35 35 [ ··· 59 59 } 60 60 ], 61 61 "Checksum": "n9fUYqacPXg=", 62 - "Score": 510 62 + "Score": 5000000010 63 63 } 64 64 ], 65 65 [ ··· 94 94 } 95 95 ], 96 96 "Checksum": "n9fUYqacPXg=", 97 - "Score": 8010 97 + "Score": 80000000010 98 98 } 99 99 ], 100 100 [ ··· 129 129 } 130 130 ], 131 131 "Checksum": "n9fUYqacPXg=", 132 - "Score": 6060 132 + "Score": 60500000010 133 133 } 134 134 ] 135 135 ]
+2 -2
testdata/golden/TestReadSearch/repo17_v17.00000.golden
··· 29 29 } 30 30 ], 31 31 "Checksum": "n9fUYqacPXg=", 32 - "Score": 510 32 + "Score": 5000000010 33 33 } 34 34 ], 35 35 [ ··· 59 59 } 60 60 ], 61 61 "Checksum": "n9fUYqacPXg=", 62 - "Score": 510 62 + "Score": 5000000010 63 63 } 64 64 ], 65 65 null,
+2 -2
testdata/golden/TestReadSearch/repo2_v16.00000.golden
··· 29 29 } 30 30 ], 31 31 "Checksum": "Ju1TnQKZ6mE=", 32 - "Score": 6810 32 + "Score": 68000000010 33 33 } 34 34 ], 35 35 [ ··· 59 59 } 60 60 ], 61 61 "Checksum": "Ju1TnQKZ6mE=", 62 - "Score": 510 62 + "Score": 5000000010 63 63 } 64 64 ], 65 65 null,
+2 -2
testdata/golden/TestReadSearch/repo_v16.00000.golden
··· 29 29 } 30 30 ], 31 31 "Checksum": "n9fUYqacPXg=", 32 - "Score": 510 32 + "Score": 5000000010 33 33 } 34 34 ], 35 35 [ ··· 59 59 } 60 60 ], 61 61 "Checksum": "n9fUYqacPXg=", 62 - "Score": 510 62 + "Score": 5000000010 63 63 } 64 64 ], 65 65 null,