fork of https://github.com/sourcegraph/zoekt
1package archive
2
3import (
4 "archive/tar"
5 "archive/zip"
6 "bytes"
7 "compress/gzip"
8 "fmt"
9 "io"
10 "net/http"
11 "net/url"
12 "os"
13 "strings"
14 "time"
15)
16
17type Archive interface {
18 Next() (*File, error)
19 Close() error
20}
21
22type File struct {
23 io.ReadCloser
24 Name string
25 Size int64
26 ModTime time.Time
27}
28
29type tarArchive struct {
30 io.Closer
31 tr *tar.Reader
32}
33
34func (a *tarArchive) Next() (*File, error) {
35 for {
36 hdr, err := a.tr.Next()
37 if err != nil {
38 return nil, err
39 }
40
41 // We only care about files
42 if hdr.Typeflag != tar.TypeReg && hdr.Typeflag != tar.TypeRegA {
43 continue
44 }
45
46 return &File{
47 ReadCloser: io.NopCloser(a.tr),
48 Name: hdr.Name,
49 Size: hdr.Size,
50 ModTime: hdr.ModTime,
51 }, nil
52 }
53}
54
55type zipArchive struct {
56 io.Closer
57 files []*zip.File
58}
59
60func (a *zipArchive) Next() (*File, error) {
61 if len(a.files) == 0 {
62 return nil, io.EOF
63 }
64
65 f := a.files[0]
66 a.files = a.files[1:]
67
68 r, err := f.Open()
69 if err != nil {
70 return nil, err
71 }
72
73 return &File{
74 ReadCloser: r,
75 Name: f.Name,
76 Size: int64(f.UncompressedSize64),
77 ModTime: f.Modified,
78 }, nil
79}
80
81func newZipArchive(r io.Reader, closer io.Closer) (*zipArchive, error) {
82 f, ok := r.(interface {
83 io.ReaderAt
84 Stat() (os.FileInfo, error)
85 })
86 if !ok {
87 return nil, fmt.Errorf("streaming zip files not supported")
88 }
89
90 fi, err := f.Stat()
91 if err != nil {
92 return nil, err
93 }
94
95 zr, err := zip.NewReader(f, fi.Size())
96 if err != nil {
97 return nil, err
98 }
99
100 // Filter out non files
101 files := zr.File[:0]
102 for _, f := range zr.File {
103 if f.Mode().IsRegular() {
104 files = append(files, f)
105 }
106 }
107
108 return &zipArchive{
109 Closer: closer,
110 files: files,
111 }, nil
112}
113
114func detectContentType(r io.Reader) (string, io.Reader, error) {
115 var buf [512]byte
116 n, err := io.ReadFull(r, buf[:])
117 if err != nil && err != io.ErrUnexpectedEOF {
118 return "", nil, err
119 }
120
121 ct := http.DetectContentType(buf[:n])
122
123 // If we are a seeker, we can just undo our read
124 if s, ok := r.(io.Seeker); ok {
125 _, err := s.Seek(int64(-n), io.SeekCurrent)
126 return ct, r, err
127 }
128
129 // Otherwise return a new reader which merges in the read bytes
130 return ct, io.MultiReader(bytes.NewReader(buf[:n]), r), nil
131}
132
133// OpenReader returns a reader for the archive at the URL u.
134func OpenReader(u string) (io.ReadCloser, error) {
135 if strings.HasPrefix(u, "https://") || strings.HasPrefix(u, "http://") {
136 resp, err := http.Get(u)
137 if err != nil {
138 return nil, err
139 }
140 if resp.StatusCode < 200 || resp.StatusCode >= 300 {
141 b, err := io.ReadAll(io.LimitReader(resp.Body, 1024))
142 _ = resp.Body.Close()
143 if err != nil {
144 return nil, err
145 }
146 return nil, &url.Error{
147 Op: "Get",
148 URL: u,
149 Err: fmt.Errorf("%s: %s", resp.Status, string(b)),
150 }
151 }
152 return resp.Body, nil
153 } else if u == "-" {
154 return io.NopCloser(os.Stdin), nil
155 }
156
157 return os.Open(u)
158}
159
160// openArchive opens the tar at the URL or filepath u. Also supported is tgz
161// files over http.
162func openArchive(u string) (ar Archive, err error) {
163 readCloser, err := OpenReader(u)
164 if err != nil {
165 return nil, err
166 }
167 defer func() {
168 if err != nil {
169 _ = readCloser.Close()
170 }
171 }()
172
173 ct, r, err := detectContentType(readCloser)
174 if err != nil {
175 return nil, err
176 }
177 switch ct {
178 case "application/x-gzip":
179 r, err = gzip.NewReader(r)
180 if err != nil {
181 return nil, err
182 }
183
184 case "application/zip":
185 return newZipArchive(r, readCloser)
186 }
187
188 return &tarArchive{
189 Closer: readCloser,
190 tr: tar.NewReader(r),
191 }, nil
192}