Select the types of activity you want to include in your feed.
Rename UseKeywordScoring to mention BM25 (#778)
It's confusing to call this `UseKeywordScoring`, since we do not use it for the
`keyword` patterntype in Sourcegraph. This commit clarifies the name to mention
BM25.
···946946 // will be used. This option is temporary and is only exposed for testing/ tuning purposes.
947947 DocumentRanksWeight float64
948948949949- // EXPERIMENTAL. If true, use keyword-style scoring instead of the default scoring formula.
950950- // Currently, this treats each match in a file as a term and computes an approximation to BM25.
951951- // When enabled, all other scoring signals are ignored, including document ranks.
952952- UseKeywordScoring bool
949949+ // EXPERIMENTAL. If true, use text-search style scoring instead of the default scoring formula.
950950+ // The scoring algorithm treats each match in a file as a term and computes an approximation to
951951+ // BM25. When enabled, all other scoring signals are ignored, including document ranks.
952952+ UseBM25Scoring bool
953953954954 // Trace turns on opentracing for this request if true and if the Jaeger address was provided as
955955 // a command-line flag
···10151015 addBool("Whole", s.Whole)
10161016 addBool("ChunkMatches", s.ChunkMatches)
10171017 addBool("UseDocumentRanks", s.UseDocumentRanks)
10181018- addBool("UseKeywordScoring", s.UseKeywordScoring)
10181018+ addBool("UseBM25Scoring", s.UseBM25Scoring)
10191019 addBool("Trace", s.Trace)
10201020 addBool("DebugScore", s.DebugScore)
10211021
···107107 // If set, the search results will contain debug information for scoring.
108108 bool debug_score = 14;
109109110110- // EXPERIMENTAL. If true, use keyword-style scoring instead of the default scoring formula.
110110+ // EXPERIMENTAL. If true, use text search scoring instead of the default scoring formula.
111111 // Currently, this treats each match in a file as a term and computes an approximation to BM25.
112112 // When enabled, all other scoring signals are ignored, including document ranks.
113113- bool use_keyword_scoring = 15;
113113+ bool use_bm25_scoring = 15;
114114}
115115116116message ListRequest {
+4-4
score.go
···3939 m.Score += computed
4040}
41414242-func (m *FileMatch) addKeywordScore(score float64, sumTf float64, L float64, debugScore bool) {
4242+func (m *FileMatch) addBM25Score(score float64, sumTf float64, L float64, debugScore bool) {
4343 if debugScore {
4444- m.Debug += fmt.Sprintf("keyword-score:%.2f (sum-tf: %.2f, length-ratio: %.2f)", score, sumTf, L)
4444+ m.Debug += fmt.Sprintf("bm25-score:%.2f (sum-tf: %.2f, length-ratio: %.2f)", score, sumTf, L)
4545 }
4646 m.Score += score
4747}
···116116}
117117118118// scoreFileUsingBM25 computes a score for the file match using an approximation to BM25, the most common scoring
119119-// algorithm for keyword search: https://en.wikipedia.org/wiki/Okapi_BM25. It implements all parts of the formula
119119+// algorithm for text search: https://en.wikipedia.org/wiki/Okapi_BM25. It implements all parts of the formula
120120// except inverse document frequency (idf), since we don't have access to global term frequency statistics.
121121//
122122// Filename matches count twice as much as content matches. This mimics a common text search strategy where you
···160160 score += ((k + 1.0) * tf) / (k*(1.0-b+b*L) + tf)
161161 }
162162163163- fileMatch.addKeywordScore(score, sumTf, L, opts.DebugScore)
163163+ fileMatch.addBM25Score(score, sumTf, L, opts.DebugScore)
164164}