fork of https://github.com/sourcegraph/zoekt
0

Configure Feed

Select the types of activity you want to include in your feed.

web: escape + as %2B in file path templates (#843)

When constructing a shard we specify templates for constructing a URL.
Finally those URLs end up going via html/template which has pretty
strict escaping rules. This commit makes two changes:

URL construction via text/template. We still get the safety benefits
later on when finally rendering the output, but given we are
constructing URLs it makes more sense to use text/template.

Special escaping of + in URLs. I couldn't convince html/template to not
break URls containing + in it. So instead we use + escaped to %2B. I
tested gerrit, github and sourcegraph with %2B in filenames and they all
worked.

To do the above I introduced a template function called URLJoinPath
which is a wrapper around url.JoinPath with the additional behaviour
around + escaping.

Test Plan: Did lots of updates in tests. See diff.

+191 -31
+30 -14
gitindex/index.go
··· 90 90 u.User = nil 91 91 } 92 92 93 + // helper to generate u.JoinPath as a template 94 + varVersion := ".Version" 95 + varPath := ".Path" 96 + urlJoinPath := func(elem ...string) string { 97 + elem = append([]string{u.String()}, elem...) 98 + var parts []string 99 + for _, e := range elem { 100 + if e == varVersion || e == varPath { 101 + parts = append(parts, e) 102 + } else { 103 + parts = append(parts, strconv.Quote(e)) 104 + } 105 + } 106 + return fmt.Sprintf("{{URLJoinPath %s}}", strings.Join(parts, " ")) 107 + } 108 + 93 109 repo.URL = u.String() 94 110 switch typ { 95 111 case "gitiles": 96 112 // eg. https://gerrit.googlesource.com/gitiles/+/master/tools/run_dev.sh#20 97 - repo.CommitURLTemplate = u.String() + "/+/{{.Version}}" 98 - repo.FileURLTemplate = u.String() + "/+/{{.Version}}/{{.Path}}" 113 + repo.CommitURLTemplate = urlJoinPath("+", varVersion) 114 + repo.FileURLTemplate = urlJoinPath("+", varVersion, varPath) 99 115 repo.LineFragmentTemplate = "#{{.LineNumber}}" 100 116 case "github": 101 117 // eg. https://github.com/hanwen/go-fuse/blob/notify/genversion.sh#L10 102 - repo.CommitURLTemplate = u.String() + "/commit/{{.Version}}" 103 - repo.FileURLTemplate = u.String() + "/blob/{{.Version}}/{{.Path}}" 118 + repo.CommitURLTemplate = urlJoinPath("commit", varVersion) 119 + repo.FileURLTemplate = urlJoinPath("blob", varVersion, varPath) 104 120 repo.LineFragmentTemplate = "#L{{.LineNumber}}" 105 121 case "cgit": 106 122 // http://git.savannah.gnu.org/cgit/lilypond.git/tree/elisp/lilypond-mode.el?h=dev/philh&id=b2ca0fefe3018477aaca23b6f672c7199ba5238e#n100 107 - repo.CommitURLTemplate = u.String() + "/commit/?id={{.Version}}" 108 - repo.FileURLTemplate = u.String() + "/tree/{{.Path}}/?id={{.Version}}" 123 + repo.CommitURLTemplate = urlJoinPath("commit") + "/?id={{.Version}}" 124 + repo.FileURLTemplate = urlJoinPath("tree", varPath) + "/?id={{.Version}}" 109 125 repo.LineFragmentTemplate = "#n{{.LineNumber}}" 110 126 case "gitweb": 111 127 // https://gerrit.libreoffice.org/gitweb?p=online.git;a=blob;f=Makefile.am;h=cfcfd7c36fbae10e269653dc57a9b68c92d4c10b;hb=848145503bf7b98ce4a4aa0a858a0d71dd0dbb26#l10 ··· 115 131 case "source.bazel.build": 116 132 // https://source.bazel.build/bazel/+/57bc201346e61c62a921c1cbf32ad24f185c10c9 117 133 // https://source.bazel.build/bazel/+/57bc201346e61c62a921c1cbf32ad24f185c10c9:tools/cpp/BUILD.empty;l=10 118 - repo.CommitURLTemplate = u.String() + "/+/{{.Version}}" 119 - repo.FileURLTemplate = u.String() + "/+/{{.Version}}:{{.Path}}" 134 + repo.CommitURLTemplate = u.String() + "/%2B/{{.Version}}" 135 + repo.FileURLTemplate = u.String() + "/%2B/{{.Version}}:{{.Path}}" 120 136 repo.LineFragmentTemplate = ";l={{.LineNumber}}" 121 137 case "bitbucket-server": 122 138 // https://<bitbucketserver-host>/projects/<project>/repos/<repo>/commits/5be7ca73b898bf17a08e607918accfdeafe1e0bc 123 139 // https://<bitbucketserver-host>/projects/<project>/repos/<repo>/browse/<file>?at=5be7ca73b898bf17a08e607918accfdeafe1e0bc 124 - repo.CommitURLTemplate = u.String() + "/commits/{{.Version}}" 125 - repo.FileURLTemplate = u.String() + "/{{.Path}}?at={{.Version}}" 140 + repo.CommitURLTemplate = urlJoinPath("commits", varVersion) 141 + repo.FileURLTemplate = urlJoinPath(varPath) + "?at={{.Version}}" 126 142 repo.LineFragmentTemplate = "#{{.LineNumber}}" 127 143 case "gitlab": 128 144 // https://gitlab.com/gitlab-org/omnibus-gitlab/-/commit/b152c864303dae0e55377a1e2c53c9592380ffed 129 145 // https://gitlab.com/gitlab-org/omnibus-gitlab/-/blob/aad04155b3f6fc50ede88aedaee7fc624d481149/files/gitlab-config-template/gitlab.rb.template 130 - repo.CommitURLTemplate = u.String() + "/-/commit/{{.Version}}" 131 - repo.FileURLTemplate = u.String() + "/-/blob/{{.Version}}/{{.Path}}" 146 + repo.CommitURLTemplate = urlJoinPath("-/commit", varVersion) 147 + repo.FileURLTemplate = urlJoinPath("-/blob", varVersion, varPath) 132 148 repo.LineFragmentTemplate = "#L{{.LineNumber}}" 133 149 case "gitea": 134 - repo.CommitURLTemplate = u.String() + "/commit/{{.Version}}" 150 + repo.CommitURLTemplate = urlJoinPath("commit", varVersion) 135 151 // NOTE The `display=source` query parameter is required to disable file rendering. 136 152 // Since line numbers are disabled in rendered files, you wouldn't be able to jump to 137 153 // a line without `display=source`. This is supported since gitea 1.17.0. 138 154 // When /src/{{.Version}} is used it will redirect to /src/commit/{{.Version}}, 139 155 // but the query parameters are obmitted. 140 - repo.FileURLTemplate = u.String() + "/src/commit/{{.Version}}/{{.Path}}?display=source" 156 + repo.FileURLTemplate = urlJoinPath("src/commit", varVersion, varPath) + "?display=source" 141 157 repo.LineFragmentTemplate = "#L{{.LineNumber}}" 142 158 default: 143 159 return fmt.Errorf("URL scheme type %q unknown", typ)
+93 -2
gitindex/index_test.go
··· 18 18 "bytes" 19 19 "context" 20 20 "fmt" 21 + "net/url" 21 22 "os" 22 23 "os/exec" 23 24 "path/filepath" 24 25 "runtime" 25 26 "sort" 27 + "strings" 26 28 "testing" 27 29 28 30 "github.com/go-git/go-git/v5" ··· 770 772 } 771 773 } 772 774 773 - func TestSetTemplate(t *testing.T) { 775 + func TestSetTemplates_e2e(t *testing.T) { 774 776 repositoryDir := t.TempDir() 775 777 776 778 // setup: initialize the repository and all of its branches ··· 781 783 t.Fatalf("setTemplatesFromConfig: %v", err) 782 784 } 783 785 784 - if got, want := desc.FileURLTemplate, "https://github.com/sourcegraph/zoekt/blob/{{.Version}}/{{.Path}}"; got != want { 786 + if got, want := desc.FileURLTemplate, `{{URLJoinPath "https://github.com/sourcegraph/zoekt" "blob" .Version .Path}}`; got != want { 785 787 t.Errorf("got %q, want %q", got, want) 788 + } 789 + } 790 + 791 + func TestSetTemplates(t *testing.T) { 792 + base := "https://example.com/repo/name" 793 + version := "VERSION" 794 + path := "dir/name.txt" 795 + lineNumber := 10 796 + cases := []struct { 797 + typ string 798 + commit string 799 + file string 800 + line string 801 + }{{ 802 + typ: "gitiles", 803 + commit: "https://example.com/repo/name/%2B/VERSION", 804 + file: "https://example.com/repo/name/%2B/VERSION/dir/name.txt", 805 + line: "#10", 806 + }, { 807 + typ: "github", 808 + commit: "https://example.com/repo/name/commit/VERSION", 809 + file: "https://example.com/repo/name/blob/VERSION/dir/name.txt", 810 + line: "#L10", 811 + }, { 812 + typ: "cgit", 813 + commit: "https://example.com/repo/name/commit/?id=VERSION", 814 + file: "https://example.com/repo/name/tree/dir/name.txt/?id=VERSION", 815 + line: "#n10", 816 + }, { 817 + typ: "gitweb", 818 + commit: "https://example.com/repo/name;a=commit;h=VERSION", 819 + file: "https://example.com/repo/name;a=blob;f=dir/name.txt;hb=VERSION", 820 + line: "#l10", 821 + }, { 822 + typ: "source.bazel.build", 823 + commit: "https://example.com/repo/name/%2B/VERSION", 824 + file: "https://example.com/repo/name/%2B/VERSION:dir/name.txt", 825 + line: ";l=10", 826 + }, { 827 + typ: "bitbucket-server", 828 + commit: "https://example.com/repo/name/commits/VERSION", 829 + file: "https://example.com/repo/name/dir/name.txt?at=VERSION", 830 + line: "#10", 831 + }, { 832 + typ: "gitlab", 833 + commit: "https://example.com/repo/name/-/commit/VERSION", 834 + file: "https://example.com/repo/name/-/blob/VERSION/dir/name.txt", 835 + line: "#L10", 836 + }, { 837 + typ: "gitea", 838 + commit: "https://example.com/repo/name/commit/VERSION", 839 + file: "https://example.com/repo/name/src/commit/VERSION/dir/name.txt?display=source", 840 + line: "#L10", 841 + }} 842 + 843 + for _, tc := range cases { 844 + t.Run(tc.typ, func(t *testing.T) { 845 + assertOutput := func(templateText string, want string) { 846 + t.Helper() 847 + 848 + tt, err := zoekt.ParseTemplate(templateText) 849 + if err != nil { 850 + t.Fatal(err) 851 + } 852 + 853 + var sb strings.Builder 854 + err = tt.Execute(&sb, map[string]any{ 855 + "Version": version, 856 + "Path": path, 857 + "LineNumber": lineNumber, 858 + }) 859 + if err != nil { 860 + t.Fatal(err) 861 + } 862 + if got := sb.String(); got != want { 863 + t.Fatalf("want: %q\ngot: %q", want, got) 864 + } 865 + } 866 + 867 + var repo zoekt.Repository 868 + u, _ := url.Parse(base) 869 + err := setTemplates(&repo, u, tc.typ) 870 + if err != nil { 871 + t.Fatal(err) 872 + } 873 + assertOutput(repo.CommitURLTemplate, tc.commit) 874 + assertOutput(repo.FileURLTemplate, tc.file) 875 + assertOutput(repo.LineFragmentTemplate, tc.line) 876 + }) 786 877 } 787 878 } 788 879
+34 -2
indexbuilder.go
··· 19 19 "encoding/binary" 20 20 "fmt" 21 21 "hash/crc64" 22 - "html/template" 23 22 "log" 23 + "net/url" 24 24 "os" 25 25 "path/filepath" 26 26 "sort" 27 + "strings" 28 + "text/template" 27 29 "time" 28 30 "unicode/utf8" 29 31 ··· 216 218 217 219 func (d *Repository) verify() error { 218 220 for _, t := range []string{d.FileURLTemplate, d.LineFragmentTemplate, d.CommitURLTemplate} { 219 - if _, err := template.New("").Parse(t); err != nil { 221 + if _, err := ParseTemplate(t); err != nil { 220 222 return err 221 223 } 222 224 } 223 225 return nil 226 + } 227 + 228 + func urlJoinPath(base string, elem ...string) string { 229 + // golangs html/template always escapes "+" appearing in an HTML attribute 230 + // [1]. We may even want to treat more characters, differently but this 231 + // atleast makes it possible to visit URLs like [2]. 232 + // 233 + // We only do this to elem since base will normally be a hardcoded string. 234 + // 235 + // [1]: https://sourcegraph.com/github.com/golang/go@go1.23.2/-/blob/src/html/template/html.go?L71-80 236 + // [2]: https://github.com/apple/swift-system/blob/main/Sources/System/Util+StringArray.swift 237 + elem = append([]string{}, elem...) // copy to mutate 238 + for i := range elem { 239 + elem[i] = strings.ReplaceAll(elem[i], "+", "%2B") 240 + } 241 + u, err := url.JoinPath(base, elem...) 242 + if err != nil { 243 + return "#!error: " + err.Error() 244 + } 245 + return u 246 + } 247 + 248 + // ParseTemplate will parse the templates for FileURLTemplate, 249 + // LineFragmentTemplate and CommitURLTemplate. 250 + // 251 + // It makes available the extra function UrlJoinPath. 252 + func ParseTemplate(text string) (*template.Template, error) { 253 + return template.New("").Funcs(template.FuncMap{ 254 + "URLJoinPath": urlJoinPath, 255 + }).Parse(text) 224 256 } 225 257 226 258 // ContentSize returns the number of content bytes so far ingested.
+7 -6
web/e2e_test.go
··· 86 86 b, err := zoekt.NewIndexBuilder(&zoekt.Repository{ 87 87 Name: "name", 88 88 URL: "repo-url", 89 - CommitURLTemplate: "{{.Version}}", 90 - FileURLTemplate: "file-url", 91 - LineFragmentTemplate: "#line", 89 + CommitURLTemplate: `{{ URLJoinPath "https://github.com/org/repo/commit/" .Version}}`, 90 + FileURLTemplate: `{{ URLJoinPath "https://github.com/org/repo/blob/" .Version .Path}}`, 91 + LineFragmentTemplate: "#L{{.LineNumber}}", 92 92 Branches: []zoekt.RepositoryBranch{{Name: "master", Version: "1234"}}, 93 93 }) 94 94 if err != nil { 95 95 t.Fatalf("NewIndexBuilder: %v", err) 96 96 } 97 97 if err := b.Add(zoekt.Document{ 98 - Name: "f2", 98 + // use a name which requires correct escaping. https://github.com/sourcegraph/zoekt/issues/807 99 + Name: "foo/bar+baz", 99 100 Content: []byte("to carry water in the no later bla"), 100 101 // --------------0123456789012345678901234567890123 101 102 // --------------0 1 2 3 ··· 123 124 for req, needles := range map[string][]string{ 124 125 "/": {"from 1 repositories"}, 125 126 "/search?q=water": { 126 - "href=\"file-url#line", 127 + `href="https://github.com/org/repo/blob/1234/foo/bar%2Bbaz"`, 127 128 "carry <b>water</b>", 128 129 }, 129 130 "/search?q=r:": { ··· 131 132 "Found 1 repositories", 132 133 nowStr, 133 134 "repo-url\">name", 134 - "1 files (36B)", 135 + "1 files (45B)", 135 136 }, 136 137 "/search?q=magic": { 137 138 `value=magic`,
+24 -4
web/server.go
··· 28 28 "strconv" 29 29 "strings" 30 30 "sync" 31 + texttemplate "text/template" 31 32 "time" 32 33 33 34 "github.com/grafana/regexp" ··· 116 117 117 118 startTime time.Time 118 119 119 - templateMu sync.Mutex 120 - templateCache map[string]*template.Template 120 + templateMu sync.Mutex 121 + templateCache map[string]*template.Template 122 + textTemplateCache map[string]*texttemplate.Template 121 123 122 124 lastStatsMu sync.Mutex 123 125 lastStats *zoekt.RepoStats ··· 134 136 135 137 t, err := template.New("cache").Parse(str) 136 138 if err != nil { 137 - log.Printf("template parse error: %v", err) 139 + log.Printf("html template parse error: %v", err) 138 140 t = template.Must(template.New("empty").Parse("")) 139 141 } 140 142 s.templateCache[str] = t 141 143 return t 142 144 } 143 145 146 + func (s *Server) getTextTemplate(str string) *texttemplate.Template { 147 + s.templateMu.Lock() 148 + defer s.templateMu.Unlock() 149 + t := s.textTemplateCache[str] 150 + if t != nil { 151 + return t 152 + } 153 + 154 + t, err := zoekt.ParseTemplate(str) 155 + if err != nil { 156 + log.Printf("text template parse error: %v", err) 157 + t = texttemplate.Must(texttemplate.New("empty").Parse("")) 158 + } 159 + s.textTemplateCache[str] = t 160 + return t 161 + } 162 + 144 163 func NewMux(s *Server) (*http.ServeMux, error) { 145 164 s.print = s.Top.Lookup("print") 146 165 if s.print == nil { ··· 162 181 } 163 182 164 183 s.templateCache = map[string]*template.Template{} 184 + s.textTemplateCache = map[string]*texttemplate.Template{} 165 185 s.startTime = time.Now() 166 186 167 187 mux := http.NewServeMux() ··· 510 530 } 511 531 512 532 for _, r := range repos.Repos { 513 - t := s.getTemplate(r.Repository.CommitURLTemplate) 533 + t := s.getTextTemplate(r.Repository.CommitURLTemplate) 514 534 515 535 repo := Repository{ 516 536 Name: r.Repository.Name,
+3 -3
web/snippets.go
··· 16 16 17 17 import ( 18 18 "bytes" 19 - "html/template" 20 19 "log" 21 20 "net/url" 22 21 "strconv" 23 22 "strings" 23 + "text/template" 24 24 25 25 "github.com/sourcegraph/zoekt" 26 26 ) ··· 33 33 if !localPrint { 34 34 for repo, str := range result.RepoURLs { 35 35 if str != "" { 36 - templateMap[repo] = s.getTemplate(str) 36 + templateMap[repo] = s.getTextTemplate(str) 37 37 } 38 38 } 39 39 for repo, str := range result.LineFragments { 40 40 if str != "" { 41 - fragmentMap[repo] = s.getTemplate(str) 41 + fragmentMap[repo] = s.getTextTemplate(str) 42 42 } 43 43 } 44 44 }