fork of https://github.com/sourcegraph/zoekt
0

Configure Feed

Select the types of activity you want to include in your feed.

1// Copyright 2016 Google Inc. All rights reserved. 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15package index 16 17import ( 18 "bufio" 19 "bytes" 20 "encoding/binary" 21 "encoding/json" 22 "fmt" 23 "io" 24 "sort" 25 "time" 26 27 "github.com/sourcegraph/zoekt" 28) 29 30func (w *writer) writeTOC(toc *indexTOC) { 31 // Tagged sections are indicated with a 0 section count. 32 // Tagged sections allow easier forwards and backwards 33 // compatibility when evolving zoekt index files with new 34 // sections. 35 // 36 // A tagged section is: 37 // Varint TagLen, Tag String, Varint Kind, Section 38 // 39 // Section kind is indicated because simpleSections and 40 // compoundSections have different lengths. 41 w.U32(0) 42 secs := toc.sectionsTaggedList() 43 for _, s := range secs { 44 w.String(s.tag) 45 w.Varint(uint32(s.sec.kind())) 46 s.sec.write(w) 47 } 48} 49 50func (s *compoundSection) writeStrings(w *writer, strs []*searchableString) { 51 s.start(w) 52 for _, f := range strs { 53 s.addItem(w, f.data) 54 } 55 s.end(w) 56} 57 58func (s *compoundSection) writeMap(w *writer, m map[string]uint32) { 59 keys := make([]*searchableString, 0, len(m)) 60 for k := range m { 61 keys = append(keys, &searchableString{ 62 data: []byte(k), 63 }) 64 } 65 sort.Slice(keys, func(i, j int) bool { 66 return m[string(keys[i].data)] < m[string(keys[j].data)] 67 }) 68 s.writeStrings(w, keys) 69} 70 71func writePostings(w *writer, s *postingsBuilder, ngramText *simpleSection, 72 charOffsets *simpleSection, postings *compoundSection, endRunes *simpleSection, 73) { 74 keys := make(ngramSlice, 0, len(s.postings)) 75 for k := range s.postings { 76 keys = append(keys, k) 77 } 78 sort.Sort(keys) 79 80 ngramText.start(w) 81 for _, k := range keys { 82 var buf [8]byte 83 binary.BigEndian.PutUint64(buf[:], uint64(k)) 84 w.Write(buf[:]) 85 } 86 ngramText.end(w) 87 88 postings.start(w) 89 for _, k := range keys { 90 postings.addItem(w, s.postings[k]) 91 } 92 postings.end(w) 93 94 charOffsets.start(w) 95 w.Write(toSizedDeltas(s.runeOffsets)) 96 charOffsets.end(w) 97 98 endRunes.start(w) 99 w.Write(toSizedDeltas(s.endRunes)) 100 endRunes.end(w) 101} 102 103func (b *ShardBuilder) Write(out io.Writer) error { 104 next := b.indexFormatVersion == NextIndexFormatVersion 105 106 buffered := bufio.NewWriterSize(out, 1<<20) 107 defer buffered.Flush() 108 109 w := &writer{w: buffered} 110 toc := indexTOC{} 111 112 toc.fileContents.writeStrings(w, b.contentStrings) 113 toc.newlines.start(w) 114 for _, f := range b.contentStrings { 115 toc.newlines.addItem(w, toSizedDeltas(newLinesIndices(f.data))) 116 } 117 toc.newlines.end(w) 118 119 toc.fileEndSymbol.start(w) 120 for _, m := range b.fileEndSymbol { 121 w.U32(m) 122 } 123 toc.fileEndSymbol.end(w) 124 125 toc.symbolMap.writeMap(w, b.symIndex) 126 toc.symbolKindMap.writeMap(w, b.symKindIndex) 127 toc.symbolMetaData.start(w) 128 for _, m := range b.symMetaData { 129 w.U32(m) 130 } 131 toc.symbolMetaData.end(w) 132 133 toc.branchMasks.start(w) 134 for _, m := range b.branchMasks { 135 w.U64(m) 136 } 137 toc.branchMasks.end(w) 138 139 toc.fileSections.start(w) 140 for _, s := range b.docSections { 141 toc.fileSections.addItem(w, marshalDocSections(s)) 142 } 143 toc.fileSections.end(w) 144 145 writePostings(w, b.contentPostings, &toc.ngramText, &toc.runeOffsets, &toc.postings, &toc.fileEndRunes) 146 147 // names. 148 toc.fileNames.writeStrings(w, b.nameStrings) 149 150 writePostings(w, b.namePostings, &toc.nameNgramText, &toc.nameRuneOffsets, &toc.namePostings, &toc.nameEndRunes) 151 152 toc.subRepos.start(w) 153 w.Write(toSizedDeltas(b.subRepos)) 154 toc.subRepos.end(w) 155 156 toc.contentChecksums.start(w) 157 w.Write(b.checksums) 158 toc.contentChecksums.end(w) 159 160 toc.languages.start(w) 161 w.Write(b.languages) 162 toc.languages.end(w) 163 164 toc.runeDocSections.start(w) 165 w.Write(marshalDocSections(b.runeDocSections)) 166 toc.runeDocSections.end(w) 167 168 if next { 169 toc.repos.start(w) 170 w.Write(toSizedDeltas16(b.repos)) 171 toc.repos.end(w) 172 } 173 174 indexTime := b.IndexTime 175 if indexTime.IsZero() { 176 indexTime = time.Now().UTC() 177 } 178 179 if err := b.writeJSON(&zoekt.IndexMetadata{ 180 IndexFormatVersion: b.indexFormatVersion, 181 IndexTime: indexTime, 182 IndexFeatureVersion: b.featureVersion, 183 IndexMinReaderVersion: WriteMinFeatureVersion, 184 PlainASCII: b.contentPostings.isPlainASCII && b.namePostings.isPlainASCII, 185 LanguageMap: b.languageMap, 186 ZoektVersion: Version, 187 ID: b.ID, 188 }, &toc.metaData, w); err != nil { 189 return err 190 } 191 192 if next { 193 if err := b.writeJSON(b.repoList, &toc.repoMetaData, w); err != nil { 194 return err 195 } 196 } else { 197 if len(b.repoList) != 1 { 198 return fmt.Errorf("have %d repos, but only support 1 in index format version %d", len(b.repoList), b.indexFormatVersion) 199 } 200 if err := b.writeJSON(b.repoList[0], &toc.repoMetaData, w); err != nil { 201 return err 202 } 203 } 204 205 var tocSection simpleSection 206 207 tocSection.start(w) 208 w.writeTOC(&toc) 209 tocSection.end(w) 210 tocSection.write(w) 211 return w.err 212} 213 214func (b *ShardBuilder) writeJSON(data interface{}, sec *simpleSection, w *writer) error { 215 blob, err := json.Marshal(data) 216 if err != nil { 217 return err 218 } 219 sec.start(w) 220 w.Write(blob) 221 sec.end(w) 222 return nil 223} 224 225func newLinesIndices(in []byte) []uint32 { 226 out := make([]uint32, 0, bytes.Count(in, []byte{'\n'})) 227 for i, c := range in { 228 if c == '\n' { 229 out = append(out, uint32(i)) 230 } 231 } 232 return out 233}