fork of https://github.com/sourcegraph/zoekt
1package archive
2
3import (
4 "archive/tar"
5 "archive/zip"
6 "bytes"
7 "compress/gzip"
8 "fmt"
9 "io"
10 "net/http"
11 "net/url"
12 "os"
13 "strings"
14)
15
16type Archive interface {
17 Next() (*File, error)
18 Close() error
19}
20
21type File struct {
22 io.ReadCloser
23 Name string
24 Size int64
25}
26
27type tarArchive struct {
28 io.Closer
29 tr *tar.Reader
30}
31
32func (a *tarArchive) Next() (*File, error) {
33 for {
34 hdr, err := a.tr.Next()
35 if err != nil {
36 return nil, err
37 }
38
39 // We only care about files
40 if hdr.Typeflag != tar.TypeReg && hdr.Typeflag != tar.TypeRegA {
41 continue
42 }
43
44 return &File{
45 ReadCloser: io.NopCloser(a.tr),
46 Name: hdr.Name,
47 Size: hdr.Size,
48 }, nil
49 }
50}
51
52type zipArchive struct {
53 io.Closer
54 files []*zip.File
55}
56
57func (a *zipArchive) Next() (*File, error) {
58 if len(a.files) == 0 {
59 return nil, io.EOF
60 }
61
62 f := a.files[0]
63 a.files = a.files[1:]
64
65 r, err := f.Open()
66 if err != nil {
67 return nil, err
68 }
69
70 return &File{
71 ReadCloser: r,
72 Name: f.Name,
73 Size: int64(f.UncompressedSize64),
74 }, nil
75}
76
77func newZipArchive(r io.Reader, closer io.Closer) (*zipArchive, error) {
78 f, ok := r.(interface {
79 io.ReaderAt
80 Stat() (os.FileInfo, error)
81 })
82 if !ok {
83 return nil, fmt.Errorf("streaming zip files not supported")
84 }
85
86 fi, err := f.Stat()
87 if err != nil {
88 return nil, err
89 }
90
91 zr, err := zip.NewReader(f, fi.Size())
92 if err != nil {
93 return nil, err
94 }
95
96 // Filter out non files
97 files := zr.File[:0]
98 for _, f := range zr.File {
99 if f.Mode().IsRegular() {
100 files = append(files, f)
101 }
102 }
103
104 return &zipArchive{
105 Closer: closer,
106 files: files,
107 }, nil
108}
109
110func detectContentType(r io.Reader) (string, io.Reader, error) {
111 var buf [512]byte
112 n, err := io.ReadFull(r, buf[:])
113 if err != nil && err != io.ErrUnexpectedEOF {
114 return "", nil, err
115 }
116
117 ct := http.DetectContentType(buf[:n])
118
119 // If we are a seeker, we can just undo our read
120 if s, ok := r.(io.Seeker); ok {
121 _, err := s.Seek(int64(-n), io.SeekCurrent)
122 return ct, r, err
123 }
124
125 // Otherwise return a new reader which merges in the read bytes
126 return ct, io.MultiReader(bytes.NewReader(buf[:n]), r), nil
127}
128
129// OpenReader returns a reader for the archive at the URL u.
130func OpenReader(u string) (io.ReadCloser, error) {
131 if strings.HasPrefix(u, "https://") || strings.HasPrefix(u, "http://") {
132 resp, err := http.Get(u)
133 if err != nil {
134 return nil, err
135 }
136 if resp.StatusCode < 200 || resp.StatusCode >= 300 {
137 b, err := io.ReadAll(io.LimitReader(resp.Body, 1024))
138 _ = resp.Body.Close()
139 if err != nil {
140 return nil, err
141 }
142 return nil, &url.Error{
143 Op: "Get",
144 URL: u,
145 Err: fmt.Errorf("%s: %s", resp.Status, string(b)),
146 }
147 }
148 return resp.Body, nil
149 } else if u == "-" {
150 return io.NopCloser(os.Stdin), nil
151 }
152
153 return os.Open(u)
154}
155
156// openArchive opens the tar at the URL or filepath u. Also supported is tgz
157// files over http.
158func openArchive(u string) (ar Archive, err error) {
159 readCloser, err := OpenReader(u)
160 if err != nil {
161 return nil, err
162 }
163 defer func() {
164 if err != nil {
165 _ = readCloser.Close()
166 }
167 }()
168
169 ct, r, err := detectContentType(readCloser)
170 if err != nil {
171 return nil, err
172 }
173 switch ct {
174 case "application/x-gzip":
175 r, err = gzip.NewReader(r)
176 if err != nil {
177 return nil, err
178 }
179
180 case "application/zip":
181 return newZipArchive(r, readCloser)
182 }
183
184 return &tarArchive{
185 Closer: readCloser,
186 tr: tar.NewReader(r),
187 }, nil
188}