fork of https://github.com/sourcegraph/zoekt
0

Configure Feed

Select the types of activity you want to include in your feed.

archive: use ModTime as proxy for LatestCommitDate (#836)

This is motivated by #832

We use archive index in our e2e tests. In order to test our latest improvements to ranking, archive index needs to set the latest commit date.

Test plan:
- new unit test
- I checked that the tar files downloaded from github have the correct mod time.

+93 -10
+6 -2
internal/archive/archive.go
··· 11 11 "net/url" 12 12 "os" 13 13 "strings" 14 + "time" 14 15 ) 15 16 16 17 type Archive interface { ··· 20 21 21 22 type File struct { 22 23 io.ReadCloser 23 - Name string 24 - Size int64 24 + Name string 25 + Size int64 26 + ModTime time.Time 25 27 } 26 28 27 29 type tarArchive struct { ··· 45 47 ReadCloser: io.NopCloser(a.tr), 46 48 Name: hdr.Name, 47 49 Size: hdr.Size, 50 + ModTime: hdr.ModTime, 48 51 }, nil 49 52 } 50 53 } ··· 71 74 ReadCloser: r, 72 75 Name: f.Name, 73 76 Size: int64(f.UncompressedSize64), 77 + ModTime: f.Modified, 74 78 }, nil 75 79 } 76 80
+74 -4
internal/archive/e2e_test.go
··· 11 11 "io" 12 12 "log" 13 13 "os" 14 + "path/filepath" 14 15 "strings" 15 16 "testing" 17 + "time" 18 + 19 + "github.com/stretchr/testify/require" 16 20 17 21 "github.com/sourcegraph/zoekt" 18 22 "github.com/sourcegraph/zoekt/build" ··· 28 32 os.Exit(m.Run()) 29 33 } 30 34 35 + var modTime = time.Date(2024, 9, 26, 0, 0, 0, 0, time.UTC) 36 + 31 37 func writeArchive(w io.Writer, format string, files map[string]string) (err error) { 32 38 if format == "zip" { 33 39 zw := zip.NewWriter(w) 34 40 for name, body := range files { 35 - f, err := zw.Create(name) 41 + header := &zip.FileHeader{ 42 + Name: name, 43 + Method: zip.Deflate, 44 + Modified: modTime, 45 + } 46 + f, err := zw.CreateHeader(header) 36 47 if err != nil { 37 48 return err 38 49 } ··· 63 74 64 75 for name, body := range files { 65 76 hdr := &tar.Header{ 66 - Name: name, 67 - Mode: 0o600, 68 - Size: int64(len(body)), 77 + Name: name, 78 + Mode: 0o600, 79 + Size: int64(len(body)), 80 + ModTime: modTime, 69 81 } 70 82 if err := tw.WriteHeader(hdr); err != nil { 71 83 return err ··· 189 201 } 190 202 } 191 203 } 204 + 205 + // TestLatestCommitDate tests that the latest commit date is set correctly if 206 + // the mod time of the files has been set during the archive creation. 207 + func TestLatestCommitDate(t *testing.T) { 208 + for _, format := range []string{"tar", "tgz", "zip"} { 209 + t.Run(format, func(t *testing.T) { 210 + testLatestCommitDate(t, format) 211 + }) 212 + } 213 + } 214 + 215 + func testLatestCommitDate(t *testing.T, format string) { 216 + // Create an archive 217 + archive, err := os.CreateTemp("", "TestLatestCommitDate") 218 + require.NoError(t, err) 219 + defer os.Remove(archive.Name()) 220 + 221 + fileSize := 10 222 + files := map[string]string{} 223 + for i := 0; i < 4; i++ { 224 + s := fmt.Sprintf("%d", i) 225 + files["F"+s] = strings.Repeat("a", fileSize) 226 + files["!F"+s] = strings.Repeat("a", fileSize) 227 + } 228 + 229 + err = writeArchive(archive, format, files) 230 + if err != nil { 231 + t.Fatalf("unable to create archive %v", err) 232 + } 233 + archive.Close() 234 + 235 + // Index 236 + indexDir := t.TempDir() 237 + bopts := build.Options{ 238 + IndexDir: indexDir, 239 + } 240 + opts := Options{ 241 + Archive: archive.Name(), 242 + Name: "repo", 243 + Branch: "master", 244 + Commit: "cccccccccccccccccccccccccccccccccccccccc", 245 + } 246 + 247 + err = Index(opts, bopts) 248 + require.NoError(t, err) 249 + 250 + // Read the metadata of the index we just created and check the latest commit date. 251 + f, err := os.Open(indexDir) 252 + require.NoError(t, err) 253 + 254 + indexFiles, err := f.Readdirnames(1) 255 + require.Len(t, indexFiles, 1) 256 + 257 + repos, _, err := zoekt.ReadMetadataPath(filepath.Join(indexDir, indexFiles[0])) 258 + require.NoError(t, err) 259 + require.Len(t, repos, 1) 260 + require.True(t, repos[0].LatestCommitDate.Equal(modTime)) 261 + }
+13 -4
internal/archive/index.go
··· 7 7 "io" 8 8 "net/url" 9 9 "strings" 10 + "sync" 10 11 11 12 "github.com/sourcegraph/zoekt" 12 13 "github.com/sourcegraph/zoekt/build" ··· 113 114 defer a.Close() 114 115 115 116 bopts.RepositoryDescription.Source = opts.Archive 116 - builder, err := build.NewBuilder(bopts) 117 - if err != nil { 118 - return err 119 - } 117 + var builder *build.Builder 120 118 119 + once := sync.Once{} 120 + var onceErr error 121 121 add := func(f *File) error { 122 122 defer f.Close() 123 + 124 + once.Do(func() { 125 + // We use the ModTime of the first file as a proxy for the latest commit date. 126 + bopts.RepositoryDescription.LatestCommitDate = f.ModTime 127 + builder, onceErr = build.NewBuilder(bopts) 128 + }) 129 + if onceErr != nil { 130 + return onceErr 131 + } 123 132 124 133 contents, err := io.ReadAll(f) 125 134 if err != nil {