fork of https://github.com/sourcegraph/zoekt
1// Copyright 2016 Google Inc. All rights reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15package zoekt
16
17import (
18 "bytes"
19 "encoding/binary"
20 "encoding/gob"
21 "fmt"
22 "io"
23 "log"
24)
25
26var _ = log.Println
27
28// writer is an io.Writer that keeps track of errors and offsets
29type writer struct {
30 err error
31 w io.Writer
32 off uint32
33}
34
35func (w *writer) Write(b []byte) (int, error) {
36 if w.err != nil {
37 return 0, w.err
38 }
39
40 var n int
41 n, w.err = w.w.Write(b)
42 w.off += uint32(n)
43 return n, w.err
44}
45
46func (w *writer) Off() uint32 { return w.off }
47
48func (w *writer) B(b byte) {
49 s := []byte{b}
50 w.Write(s)
51}
52
53func (w *writer) U32(n uint32) {
54 var enc [4]byte
55 binary.BigEndian.PutUint32(enc[:], n)
56 w.Write(enc[:])
57}
58
59func (w *writer) U64(n uint64) {
60 var enc [8]byte
61 binary.BigEndian.PutUint64(enc[:], n)
62 w.Write(enc[:])
63}
64
65func (w *writer) Varint(n uint32) {
66 var enc [8]byte
67 m := binary.PutUvarint(enc[:], uint64(n))
68 w.Write(enc[:m])
69}
70
71func (w *writer) String(s string) {
72 b := []byte(s)
73 w.Varint(uint32(len(b)))
74 w.Write(b)
75}
76
77func encodeRanks(w io.Writer, ranks [][]float64) error {
78 hasRank := false
79 for _, r := range ranks {
80 if len(r) > 0 {
81 hasRank = true
82 break
83 }
84 }
85
86 if !hasRank {
87 return nil
88 }
89
90 // We use the first byte to announce the encoding. This way we can easily change the
91 // encoding without loosing backward compatability.
92 _, err := w.Write([]byte{0}) // 0 = gob-encoding
93 if err != nil {
94 return err
95 }
96
97 return gob.NewEncoder(w).Encode(ranks)
98}
99
100func decodeRanks(blob []byte, ranks *[][]float64) error {
101 if len(blob) == 0 {
102 return nil
103 }
104
105 switch encoding := blob[0]; encoding {
106 case 0: // gob-encoding
107 dec := gob.NewDecoder(bytes.NewReader(blob[1:]))
108 err := dec.Decode(ranks)
109 if err != nil {
110 return err
111 }
112 default:
113 return fmt.Errorf("unknown encoding for ranks: %d\n", encoding)
114 }
115
116 return nil
117}
118
119func (s *simpleSection) start(w *writer) {
120 s.off = w.Off()
121}
122
123func (s *simpleSection) end(w *writer) {
124 s.sz = w.Off() - s.off
125}
126
127// section is a range of bytes in the index file.
128type section interface {
129 read(*reader) error
130 write(*writer)
131 kind() sectionKind // simple or complex, used in serialization
132}
133
134type sectionKind int
135
136const (
137 sectionKindSimple sectionKind = 0
138 sectionKindCompound sectionKind = 1
139 sectionKindCompoundLazy sectionKind = 2
140)
141
142// simpleSection is a simple range of bytes.
143type simpleSection struct {
144 off uint32
145 sz uint32
146}
147
148func (s *simpleSection) kind() sectionKind {
149 return sectionKindSimple
150}
151
152func (s *simpleSection) read(r *reader) error {
153 var err error
154 s.off, err = r.U32()
155 if err != nil {
156 return err
157 }
158 s.sz, err = r.U32()
159 if err != nil {
160 return err
161 }
162 return nil
163}
164
165func (s *simpleSection) write(w *writer) {
166 w.U32(s.off)
167 w.U32(s.sz)
168}
169
170// compoundSection is a range of bytes containg a list of variable
171// sized items.
172type compoundSection struct {
173 data simpleSection
174
175 offsets []uint32
176 index simpleSection
177}
178
179func (s *compoundSection) kind() sectionKind {
180 return sectionKindCompound
181}
182
183func (s *compoundSection) start(w *writer) {
184 s.data.start(w)
185}
186
187func (s *compoundSection) end(w *writer) {
188 s.data.end(w)
189 s.index.start(w)
190 for _, o := range s.offsets {
191 w.U32(o)
192 }
193 s.index.end(w)
194}
195
196func (s *compoundSection) addItem(w *writer, item []byte) {
197 s.offsets = append(s.offsets, w.Off())
198 w.Write(item)
199}
200
201func (s *compoundSection) write(w *writer) {
202 s.data.write(w)
203 s.index.write(w)
204}
205
206func (s *compoundSection) read(r *reader) error {
207 if err := s.data.read(r); err != nil {
208 return err
209 }
210 if err := s.index.read(r); err != nil {
211 return err
212 }
213 var err error
214 s.offsets, err = readSectionU32(r.r, s.index)
215 return err
216}
217
218// relativeIndex returns the relative offsets of the items (first
219// element is 0), plus a final marking the end of the last item.
220func (s *compoundSection) relativeIndex() []uint32 {
221 ri := make([]uint32, 0, len(s.offsets)+1)
222 for _, o := range s.offsets {
223 ri = append(ri, o-s.offsets[0])
224 }
225 if len(s.offsets) > 0 {
226 ri = append(ri, s.data.sz)
227 }
228 return ri
229}
230
231type lazyCompoundSection struct {
232 compoundSection
233}
234
235func (s *lazyCompoundSection) kind() sectionKind {
236 return sectionKindCompoundLazy
237}
238
239func (s *lazyCompoundSection) read(r *reader) error {
240 // We do the same thing compoundSection.read does, except we don't read the
241 // offsets.
242 if err := s.data.read(r); err != nil {
243 return err
244 }
245 return s.index.read(r)
246}