write.go at dc41c6e3cc84a1c8c830bd7365b71cc2621e6a61 · boltless.me/zoekt

fork of https://github.com/sourcegraph/zoekt
zoekt / write.go
at dc41c6e3cc84a1c8c830bd7365b71cc2621e6a61 5.7 kB View raw
Keegan Carruthers-Smith list: add indextime to MinimalRepoListEntry (#596) 3y ago
  1// Copyright 2016 Google Inc. All rights reserved.
  2//
  3// Licensed under the Apache License, Version 2.0 (the "License");
  4// you may not use this file except in compliance with the License.
  5// You may obtain a copy of the License at
  6//
  7//    http://www.apache.org/licenses/LICENSE-2.0
  8//
  9// Unless required by applicable law or agreed to in writing, software
 10// distributed under the License is distributed on an "AS IS" BASIS,
 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12// See the License for the specific language governing permissions and
 13// limitations under the License.
 14
 15package zoekt
 16
 17import (
 18	"bufio"
 19	"bytes"
 20	"encoding/binary"
 21	"encoding/json"
 22	"fmt"
 23	"io"
 24	"sort"
 25	"time"
 26)
 27
 28func (w *writer) writeTOC(toc *indexTOC) {
 29	// Tagged sections are indicated with a 0 section count.
 30	// Tagged sections allow easier forwards and backwards
 31	// compatibility when evolving zoekt index files with new
 32	// sections.
 33	//
 34	// A tagged section is:
 35	// Varint TagLen, Tag String, Varint Kind, Section
 36	//
 37	// Section kind is indicated because simpleSections and
 38	// compoundSections have different lengths.
 39	w.U32(0)
 40	secs := toc.sectionsTaggedList()
 41	for _, s := range secs {
 42		w.String(s.tag)
 43		w.Varint(uint32(s.sec.kind()))
 44		s.sec.write(w)
 45	}
 46}
 47
 48func (s *compoundSection) writeStrings(w *writer, strs []*searchableString) {
 49	s.start(w)
 50	for _, f := range strs {
 51		s.addItem(w, f.data)
 52	}
 53	s.end(w)
 54}
 55
 56func (s *compoundSection) writeMap(w *writer, m map[string]uint32) {
 57	keys := make([]*searchableString, 0, len(m))
 58	for k := range m {
 59		keys = append(keys, &searchableString{
 60			data: []byte(k),
 61		})
 62	}
 63	sort.Slice(keys, func(i, j int) bool {
 64		return m[string(keys[i].data)] < m[string(keys[j].data)]
 65	})
 66	s.writeStrings(w, keys)
 67}
 68
 69func writePostings(w *writer, s *postingsBuilder, ngramText *simpleSection,
 70	charOffsets *simpleSection, postings *compoundSection, endRunes *simpleSection) {
 71	keys := make(ngramSlice, 0, len(s.postings))
 72	for k := range s.postings {
 73		keys = append(keys, k)
 74	}
 75	sort.Sort(keys)
 76
 77	ngramText.start(w)
 78	for _, k := range keys {
 79		var buf [8]byte
 80		binary.BigEndian.PutUint64(buf[:], uint64(k))
 81		w.Write(buf[:])
 82	}
 83	ngramText.end(w)
 84
 85	postings.start(w)
 86	for _, k := range keys {
 87		postings.addItem(w, s.postings[k])
 88	}
 89	postings.end(w)
 90
 91	charOffsets.start(w)
 92	w.Write(toSizedDeltas(s.runeOffsets))
 93	charOffsets.end(w)
 94
 95	endRunes.start(w)
 96	w.Write(toSizedDeltas(s.endRunes))
 97	endRunes.end(w)
 98}
 99
100func (b *IndexBuilder) Write(out io.Writer) error {
101	next := b.indexFormatVersion == NextIndexFormatVersion
102
103	buffered := bufio.NewWriterSize(out, 1<<20)
104	defer buffered.Flush()
105
106	w := &writer{w: buffered}
107	toc := indexTOC{}
108
109	toc.fileContents.writeStrings(w, b.contentStrings)
110	toc.newlines.start(w)
111	for _, f := range b.contentStrings {
112		toc.newlines.addItem(w, toSizedDeltas(newLinesIndices(f.data)))
113	}
114	toc.newlines.end(w)
115
116	toc.fileEndSymbol.start(w)
117	for _, m := range b.fileEndSymbol {
118		w.U32(m)
119	}
120	toc.fileEndSymbol.end(w)
121
122	toc.symbolMap.writeMap(w, b.symIndex)
123	toc.symbolKindMap.writeMap(w, b.symKindIndex)
124	toc.symbolMetaData.start(w)
125	for _, m := range b.symMetaData {
126		w.U32(m)
127	}
128	toc.symbolMetaData.end(w)
129
130	toc.branchMasks.start(w)
131	for _, m := range b.branchMasks {
132		w.U64(m)
133	}
134	toc.branchMasks.end(w)
135
136	toc.fileSections.start(w)
137	for _, s := range b.docSections {
138		toc.fileSections.addItem(w, marshalDocSections(s))
139	}
140	toc.fileSections.end(w)
141
142	writePostings(w, b.contentPostings, &toc.ngramText, &toc.runeOffsets, &toc.postings, &toc.fileEndRunes)
143
144	// names.
145	toc.fileNames.writeStrings(w, b.nameStrings)
146
147	writePostings(w, b.namePostings, &toc.nameNgramText, &toc.nameRuneOffsets, &toc.namePostings, &toc.nameEndRunes)
148
149	toc.subRepos.start(w)
150	w.Write(toSizedDeltas(b.subRepos))
151	toc.subRepos.end(w)
152
153	toc.contentChecksums.start(w)
154	w.Write(b.checksums)
155	toc.contentChecksums.end(w)
156
157	toc.languages.start(w)
158	w.Write(b.languages)
159	toc.languages.end(w)
160
161	toc.runeDocSections.start(w)
162	w.Write(marshalDocSections(b.runeDocSections))
163	toc.runeDocSections.end(w)
164
165	if next {
166		toc.repos.start(w)
167		w.Write(toSizedDeltas16(b.repos))
168		toc.repos.end(w)
169	}
170
171	indexTime := b.IndexTime
172	if indexTime.IsZero() {
173		indexTime = time.Now().UTC()
174	}
175
176	if err := b.writeJSON(&IndexMetadata{
177		IndexFormatVersion:    b.indexFormatVersion,
178		IndexTime:             indexTime,
179		IndexFeatureVersion:   b.featureVersion,
180		IndexMinReaderVersion: WriteMinFeatureVersion,
181		PlainASCII:            b.contentPostings.isPlainASCII && b.namePostings.isPlainASCII,
182		LanguageMap:           b.languageMap,
183		ZoektVersion:          Version,
184		ID:                    b.ID,
185	}, &toc.metaData, w); err != nil {
186		return err
187	}
188
189	if next {
190		if err := b.writeJSON(b.repoList, &toc.repoMetaData, w); err != nil {
191			return err
192		}
193	} else {
194		if len(b.repoList) != 1 {
195			return fmt.Errorf("have %d repos, but only support 1 in index format version %d", len(b.repoList), b.indexFormatVersion)
196		}
197		if err := b.writeJSON(b.repoList[0], &toc.repoMetaData, w); err != nil {
198			return err
199		}
200	}
201
202	toc.ranks.start(w)
203	if err := encodeRanks(w, b.ranks); err != nil {
204		return err
205	}
206	toc.ranks.end(w)
207
208	var tocSection simpleSection
209
210	tocSection.start(w)
211	w.writeTOC(&toc)
212	tocSection.end(w)
213	tocSection.write(w)
214	return w.err
215}
216
217func (b *IndexBuilder) writeJSON(data interface{}, sec *simpleSection, w *writer) error {
218	blob, err := json.Marshal(data)
219	if err != nil {
220		return err
221	}
222	sec.start(w)
223	w.Write(blob)
224	sec.end(w)
225	return nil
226}
227
228func newLinesIndices(in []byte) []uint32 {
229	out := make([]uint32, 0, bytes.Count(in, []byte{'\n'}))
230	for i, c := range in {
231		if c == '\n' {
232			out = append(out, uint32(i))
233		}
234	}
235	return out
236}
Configure Feed

Configure Feed