fork of https://github.com/sourcegraph/zoekt
0

Configure Feed

Select the types of activity you want to include in your feed.

1// Copyright 2016 Google Inc. All rights reserved. 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15package index 16 17import ( 18 "bufio" 19 "bytes" 20 "encoding/binary" 21 "encoding/json" 22 "fmt" 23 "io" 24 "sort" 25 "time" 26 27 "github.com/sourcegraph/zoekt" 28 29 "github.com/RoaringBitmap/roaring" 30) 31 32func (w *writer) writeTOC(toc *indexTOC) { 33 // Tagged sections are indicated with a 0 section count. 34 // Tagged sections allow easier forwards and backwards 35 // compatibility when evolving zoekt index files with new 36 // sections. 37 // 38 // A tagged section is: 39 // Varint TagLen, Tag String, Varint Kind, Section 40 // 41 // Section kind is indicated because simpleSections and 42 // compoundSections have different lengths. 43 w.U32(0) 44 secs := toc.sectionsTaggedList() 45 for _, s := range secs { 46 w.String(s.tag) 47 w.Varint(uint32(s.sec.kind())) 48 s.sec.write(w) 49 } 50} 51 52func (s *compoundSection) writeStrings(w *writer, strs []*searchableString) { 53 s.start(w) 54 for _, f := range strs { 55 s.addItem(w, f.data) 56 } 57 s.end(w) 58} 59 60func (s *compoundSection) writeMap(w *writer, m map[string]uint32) { 61 keys := make([]*searchableString, 0, len(m)) 62 for k := range m { 63 keys = append(keys, &searchableString{ 64 data: []byte(k), 65 }) 66 } 67 sort.Slice(keys, func(i, j int) bool { 68 return m[string(keys[i].data)] < m[string(keys[j].data)] 69 }) 70 s.writeStrings(w, keys) 71} 72 73func writeUint32Bitmap(w *writer, dat []uint32) { 74 rb := roaring.BitmapOf(dat...) 75 rb.RunOptimize() 76 rb.WriteTo(w) 77} 78 79func writePostings(w *writer, s *postingsBuilder, ngramText *simpleSection, 80 charOffsets *simpleSection, postings *compoundSection, endRunes *simpleSection, 81) { 82 keys := make(ngramSlice, 0, len(s.postings)) 83 for k := range s.postings { 84 keys = append(keys, k) 85 } 86 sort.Sort(keys) 87 88 ngramText.start(w) 89 for _, k := range keys { 90 var buf [8]byte 91 binary.BigEndian.PutUint64(buf[:], uint64(k)) 92 w.Write(buf[:]) 93 } 94 ngramText.end(w) 95 96 postings.start(w) 97 for _, k := range keys { 98 postings.addItem(w, s.postings[k]) 99 } 100 postings.end(w) 101 102 charOffsets.start(w) 103 w.Write(toSizedDeltas(s.runeOffsets)) 104 charOffsets.end(w) 105 106 endRunes.start(w) 107 w.Write(toSizedDeltas(s.endRunes)) 108 endRunes.end(w) 109} 110 111func (b *ShardBuilder) Write(out io.Writer) error { 112 next := b.indexFormatVersion == NextIndexFormatVersion 113 114 buffered := bufio.NewWriterSize(out, 1<<20) 115 defer buffered.Flush() 116 117 w := &writer{w: buffered} 118 toc := indexTOC{} 119 120 toc.fileContents.writeStrings(w, b.contentStrings) 121 toc.newlines.start(w) 122 for _, f := range b.contentStrings { 123 toc.newlines.addItem(w, toSizedDeltas(newLinesIndices(f.data))) 124 } 125 toc.newlines.end(w) 126 127 toc.fileEndSymbol.start(w) 128 for _, m := range b.fileEndSymbol { 129 w.U32(m) 130 } 131 toc.fileEndSymbol.end(w) 132 133 toc.symbolMap.writeMap(w, b.symIndex) 134 toc.symbolKindMap.writeMap(w, b.symKindIndex) 135 toc.symbolMetaData.start(w) 136 for _, m := range b.symMetaData { 137 w.U32(m) 138 } 139 toc.symbolMetaData.end(w) 140 141 toc.branchMasks.start(w) 142 for _, m := range b.branchMasks { 143 w.U64(m) 144 } 145 toc.branchMasks.end(w) 146 147 toc.fileSections.start(w) 148 for _, s := range b.docSections { 149 toc.fileSections.addItem(w, marshalDocSections(s)) 150 } 151 toc.fileSections.end(w) 152 153 writePostings(w, b.contentPostings, &toc.ngramText, &toc.runeOffsets, &toc.postings, &toc.fileEndRunes) 154 155 // names. 156 toc.fileNames.writeStrings(w, b.nameStrings) 157 158 writePostings(w, b.namePostings, &toc.nameNgramText, &toc.nameRuneOffsets, &toc.namePostings, &toc.nameEndRunes) 159 160 toc.subRepos.start(w) 161 w.Write(toSizedDeltas(b.subRepos)) 162 toc.subRepos.end(w) 163 164 toc.contentChecksums.start(w) 165 w.Write(b.checksums) 166 toc.contentChecksums.end(w) 167 168 toc.languages.start(w) 169 w.Write(b.languages) 170 toc.languages.end(w) 171 172 toc.categories.start(w) 173 w.Write(b.categories) 174 toc.categories.end(w) 175 176 toc.runeDocSections.start(w) 177 w.Write(marshalDocSections(b.runeDocSections)) 178 toc.runeDocSections.end(w) 179 180 if next { 181 toc.repos.start(w) 182 w.Write(toSizedDeltas16(b.repos)) 183 toc.repos.end(w) 184 } 185 186 if repoIDs, ok := b.repoIDs(); ok && next { 187 toc.reposIDsBitmap.start(w) 188 writeUint32Bitmap(w, repoIDs) 189 toc.reposIDsBitmap.end(w) 190 } 191 192 indexTime := b.IndexTime 193 if indexTime.IsZero() { 194 indexTime = time.Now().UTC() 195 } 196 197 if err := b.writeJSON(&zoekt.IndexMetadata{ 198 IndexFormatVersion: b.indexFormatVersion, 199 IndexTime: indexTime, 200 IndexFeatureVersion: b.featureVersion, 201 IndexMinReaderVersion: WriteMinFeatureVersion, 202 PlainASCII: b.contentPostings.isPlainASCII && b.namePostings.isPlainASCII, 203 LanguageMap: b.languageMap, 204 ZoektVersion: Version, 205 ID: b.ID, 206 }, &toc.metaData, w); err != nil { 207 return err 208 } 209 210 if next { 211 if err := b.writeJSON(b.repoList, &toc.repoMetaData, w); err != nil { 212 return err 213 } 214 } else { 215 if len(b.repoList) != 1 { 216 return fmt.Errorf("have %d repos, but only support 1 in index format version %d", len(b.repoList), b.indexFormatVersion) 217 } 218 if err := b.writeJSON(b.repoList[0], &toc.repoMetaData, w); err != nil { 219 return err 220 } 221 } 222 223 var tocSection simpleSection 224 225 tocSection.start(w) 226 w.writeTOC(&toc) 227 tocSection.end(w) 228 tocSection.write(w) 229 return w.err 230} 231 232func (b *ShardBuilder) writeJSON(data interface{}, sec *simpleSection, w *writer) error { 233 blob, err := json.Marshal(data) 234 if err != nil { 235 return err 236 } 237 sec.start(w) 238 w.Write(blob) 239 sec.end(w) 240 return nil 241} 242 243func newLinesIndices(in []byte) []uint32 { 244 out := make([]uint32, 0, bytes.Count(in, []byte{'\n'})) 245 for i, c := range in { 246 if c == '\n' { 247 out = append(out, uint32(i)) 248 } 249 } 250 return out 251}