fork of https://github.com/sourcegraph/zoekt
0

Configure Feed

Select the types of activity you want to include in your feed.

1// Copyright 2016 Google Inc. All rights reserved. 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15package zoekt 16 17import ( 18 "bufio" 19 "bytes" 20 "encoding/binary" 21 "encoding/json" 22 "fmt" 23 "io" 24 "sort" 25 "time" 26) 27 28func (w *writer) writeTOC(toc *indexTOC) { 29 // Tagged sections are indicated with a 0 section count. 30 // Tagged sections allow easier forwards and backwards 31 // compatibility when evolving zoekt index files with new 32 // sections. 33 // 34 // A tagged section is: 35 // Varint TagLen, Tag String, Varint Kind, Section 36 // 37 // Section kind is indicated because simpleSections and 38 // compoundSections have different lengths. 39 w.U32(0) 40 secs := toc.sectionsTaggedList() 41 for _, s := range secs { 42 w.String(s.tag) 43 w.Varint(uint32(s.sec.kind())) 44 s.sec.write(w) 45 } 46} 47 48func (s *compoundSection) writeStrings(w *writer, strs []*searchableString) { 49 s.start(w) 50 for _, f := range strs { 51 s.addItem(w, f.data) 52 } 53 s.end(w) 54} 55 56func (s *compoundSection) writeMap(w *writer, m map[string]uint32) { 57 keys := make([]*searchableString, 0, len(m)) 58 for k := range m { 59 keys = append(keys, &searchableString{ 60 data: []byte(k), 61 }) 62 } 63 sort.Slice(keys, func(i, j int) bool { 64 return m[string(keys[i].data)] < m[string(keys[j].data)] 65 }) 66 s.writeStrings(w, keys) 67} 68 69func writePostings(w *writer, s *postingsBuilder, ngramText *simpleSection, 70 charOffsets *simpleSection, postings *compoundSection, endRunes *simpleSection) { 71 keys := make(ngramSlice, 0, len(s.postings)) 72 for k := range s.postings { 73 keys = append(keys, k) 74 } 75 sort.Sort(keys) 76 77 ngramText.start(w) 78 for _, k := range keys { 79 var buf [8]byte 80 binary.BigEndian.PutUint64(buf[:], uint64(k)) 81 w.Write(buf[:]) 82 } 83 ngramText.end(w) 84 85 postings.start(w) 86 for _, k := range keys { 87 postings.addItem(w, s.postings[k]) 88 } 89 postings.end(w) 90 91 charOffsets.start(w) 92 w.Write(toSizedDeltas(s.runeOffsets)) 93 charOffsets.end(w) 94 95 endRunes.start(w) 96 w.Write(toSizedDeltas(s.endRunes)) 97 endRunes.end(w) 98} 99 100func (b *IndexBuilder) Write(out io.Writer) error { 101 next := b.indexFormatVersion == NextIndexFormatVersion 102 103 buffered := bufio.NewWriterSize(out, 1<<20) 104 defer buffered.Flush() 105 106 w := &writer{w: buffered} 107 toc := indexTOC{} 108 109 toc.fileContents.writeStrings(w, b.contentStrings) 110 toc.newlines.start(w) 111 for _, f := range b.contentStrings { 112 toc.newlines.addItem(w, toSizedDeltas(newLinesIndices(f.data))) 113 } 114 toc.newlines.end(w) 115 116 toc.fileEndSymbol.start(w) 117 for _, m := range b.fileEndSymbol { 118 w.U32(m) 119 } 120 toc.fileEndSymbol.end(w) 121 122 toc.symbolMap.writeMap(w, b.symIndex) 123 toc.symbolKindMap.writeMap(w, b.symKindIndex) 124 toc.symbolMetaData.start(w) 125 for _, m := range b.symMetaData { 126 w.U32(m) 127 } 128 toc.symbolMetaData.end(w) 129 130 toc.branchMasks.start(w) 131 for _, m := range b.branchMasks { 132 w.U64(m) 133 } 134 toc.branchMasks.end(w) 135 136 toc.fileSections.start(w) 137 for _, s := range b.docSections { 138 toc.fileSections.addItem(w, marshalDocSections(s)) 139 } 140 toc.fileSections.end(w) 141 142 writePostings(w, b.contentPostings, &toc.ngramText, &toc.runeOffsets, &toc.postings, &toc.fileEndRunes) 143 144 // names. 145 toc.fileNames.writeStrings(w, b.nameStrings) 146 147 writePostings(w, b.namePostings, &toc.nameNgramText, &toc.nameRuneOffsets, &toc.namePostings, &toc.nameEndRunes) 148 149 toc.subRepos.start(w) 150 w.Write(toSizedDeltas(b.subRepos)) 151 toc.subRepos.end(w) 152 153 toc.contentChecksums.start(w) 154 w.Write(b.checksums) 155 toc.contentChecksums.end(w) 156 157 toc.languages.start(w) 158 w.Write(b.languages) 159 toc.languages.end(w) 160 161 toc.runeDocSections.start(w) 162 w.Write(marshalDocSections(b.runeDocSections)) 163 toc.runeDocSections.end(w) 164 165 if next { 166 toc.repos.start(w) 167 w.Write(toSizedDeltas16(b.repos)) 168 toc.repos.end(w) 169 } 170 171 indexTime := b.IndexTime 172 if indexTime.IsZero() { 173 indexTime = time.Now().UTC() 174 } 175 176 if err := b.writeJSON(&IndexMetadata{ 177 IndexFormatVersion: b.indexFormatVersion, 178 IndexTime: indexTime, 179 IndexFeatureVersion: b.featureVersion, 180 IndexMinReaderVersion: WriteMinFeatureVersion, 181 PlainASCII: b.contentPostings.isPlainASCII && b.namePostings.isPlainASCII, 182 LanguageMap: b.languageMap, 183 ZoektVersion: Version, 184 ID: b.ID, 185 }, &toc.metaData, w); err != nil { 186 return err 187 } 188 189 if next { 190 if err := b.writeJSON(b.repoList, &toc.repoMetaData, w); err != nil { 191 return err 192 } 193 } else { 194 if len(b.repoList) != 1 { 195 return fmt.Errorf("have %d repos, but only support 1 in index format version %d", len(b.repoList), b.indexFormatVersion) 196 } 197 if err := b.writeJSON(b.repoList[0], &toc.repoMetaData, w); err != nil { 198 return err 199 } 200 } 201 202 toc.ranks.start(w) 203 if err := encodeRanks(w, b.ranks); err != nil { 204 return err 205 } 206 toc.ranks.end(w) 207 208 var tocSection simpleSection 209 210 tocSection.start(w) 211 w.writeTOC(&toc) 212 tocSection.end(w) 213 tocSection.write(w) 214 return w.err 215} 216 217func (b *IndexBuilder) writeJSON(data interface{}, sec *simpleSection, w *writer) error { 218 blob, err := json.Marshal(data) 219 if err != nil { 220 return err 221 } 222 sec.start(w) 223 w.Write(blob) 224 sec.end(w) 225 return nil 226} 227 228func newLinesIndices(in []byte) []uint32 { 229 out := make([]uint32, 0, bytes.Count(in, []byte{'\n'})) 230 for i, c := range in { 231 if c == '\n' { 232 out = append(out, uint32(i)) 233 } 234 } 235 return out 236}