func saveInts(values []int, out DataOutput) error { length := len(values) assert(length > 0) if length == 1 { return out.WriteVInt(int32(values[0])) } var allEqual = true var sentinel = values[0] for _, v := range values[1:] { if v != sentinel { allEqual = false break } } if allEqual { err := out.WriteVInt(0) if err == nil { err = out.WriteVInt(int32(values[0])) } return err } var max int64 = 0 for _, v := range values { max |= int64(v) } var bitsRequired = packed.BitsRequired(max) err := out.WriteVInt(int32(bitsRequired)) if err != nil { return err } w := packed.WriterNoHeader(out, packed.PackedFormat(packed.PACKED), length, bitsRequired, 1) for _, v := range values { if err = w.Add(int64(v)); err != nil { return err } } return w.Finish() }
func (w *StoredFieldsIndexWriter) writeBlock() error { assert(w.blockChunks > 0) err := w.fieldsIndexOut.WriteVInt(int32(w.blockChunks)) if err != nil { return err } // The trick here is that we only store the difference from the // average start pointer or doc base, this helps save bits per // value. And in order to prevent a few chunks that would be far // from the average to raise the number of bits per value for all // of them, we only encode blocks of 1024 chunks at once. // See LUCENE-4512 // doc bases var avgChunkDocs int if w.blockChunks == 1 { avgChunkDocs = 0 } else { avgChunkDocs = int(math.Floor(float64(w.blockDocs-w.docBaseDeltas[w.blockChunks-1])/float64(w.blockChunks-1) + 0.5)) } err = w.fieldsIndexOut.WriteVInt(int32(w.totalDocs - w.blockDocs)) // doc base if err == nil { err = w.fieldsIndexOut.WriteVInt(int32(avgChunkDocs)) } if err != nil { return err } var docBase int = 0 var maxDelta int64 = 0 for i := 0; i < w.blockChunks; i++ { delta := docBase - avgChunkDocs*i maxDelta |= moveSignToLowOrderBit(int64(delta)) docBase += w.docBaseDeltas[i] } bitsPerDocbase := packed.BitsRequired(maxDelta) err = w.fieldsIndexOut.WriteVInt(int32(bitsPerDocbase)) if err != nil { return err } writer := packed.WriterNoHeader(w.fieldsIndexOut, packed.PackedFormat(packed.PACKED), w.blockChunks, bitsPerDocbase, 1) docBase = 0 for i := 0; i < w.blockChunks; i++ { delta := docBase - avgChunkDocs*i assert(packed.BitsRequired(moveSignToLowOrderBit(int64(delta))) <= writer.BitsPerValue()) err = writer.Add(moveSignToLowOrderBit(int64(delta))) if err != nil { return err } docBase += w.docBaseDeltas[i] } err = writer.Finish() if err != nil { return err } // start pointers w.fieldsIndexOut.WriteVLong(w.firstStartPointer) var avgChunkSize int64 if w.blockChunks == 1 { avgChunkSize = 0 } else { avgChunkSize = (w.maxStartPointer - w.firstStartPointer) / int64(w.blockChunks-1) } err = w.fieldsIndexOut.WriteVLong(avgChunkSize) if err != nil { return err } var startPointer int64 = 0 maxDelta = 0 for i := 0; i < w.blockChunks; i++ { startPointer += w.startPointerDeltas[i] delta := startPointer - avgChunkSize*int64(i) maxDelta |= moveSignToLowOrderBit(delta) } bitsPerStartPointer := packed.BitsRequired(maxDelta) err = w.fieldsIndexOut.WriteVInt(int32(bitsPerStartPointer)) if err != nil { return err } writer = packed.WriterNoHeader(w.fieldsIndexOut, packed.PackedFormat(packed.PACKED), w.blockChunks, bitsPerStartPointer, 1) startPointer = 0 for i := 0; i < w.blockChunks; i++ { startPointer += w.startPointerDeltas[i] delta := startPointer - avgChunkSize*int64(i) assert(packed.BitsRequired(moveSignToLowOrderBit(delta)) <= writer.BitsPerValue()) err = writer.Add(moveSignToLowOrderBit(delta)) if err != nil { return err } } return writer.Finish() }
func (nc *NormsConsumer) AddNumericField(field *FieldInfo, iter func() func() (interface{}, bool)) (err error) { if err = nc.meta.WriteVInt(field.Number); err != nil { return } minValue, maxValue := int64(math.MaxInt64), int64(math.MinInt64) // TODO: more efficient? uniqueValues := newNormMap() count := int64(0) next := iter() for { nv, ok := next() if !ok { break } assert2(nv != nil, "illegal norms data for field %v, got null for value: %v", field.Name, count) v := nv.(int64) if v < minValue { minValue = v } if v > maxValue { maxValue = v } if uniqueValues != nil && uniqueValues.add(v) && uniqueValues.size > 256 { uniqueValues = nil } count++ } assert2(count == int64(nc.maxDoc), "illegal norms data for field %v, expected %v values, got %v", field.Name, nc.maxDoc, count) if uniqueValues != nil && uniqueValues.size == 1 { // 0 bpv if err = nc.meta.WriteByte(CONST_COMPRESSED); err != nil { return } if err = nc.meta.WriteLong(minValue); err != nil { return } } else if uniqueValues != nil { // small number of unique values; this is the typical case: // we only use bpv=1,2,4,8 format := packed.PackedFormat(packed.PACKED_SINGLE_BLOCK) bitsPerValue := packed.BitsRequired(int64(uniqueValues.size) - 1) if bitsPerValue == 3 { bitsPerValue = 4 } else if bitsPerValue > 4 { bitsPerValue = 8 } if bitsPerValue == 8 && minValue >= 0 && maxValue <= 255 { if err = store.Stream(nc.meta).WriteByte(UNCOMPRESSED). // uncompressed []byte WriteLong(nc.data.FilePointer()). Close(); err != nil { return err } next = iter() for { nv, ok := next() if !ok { break } n := byte(0) if nv != nil { n = byte(nv.(int64)) } if err = nc.data.WriteByte(byte(n)); err != nil { return err } } } else { if err = store.Stream(nc.meta).WriteByte(TABLE_COMPRESSED). // table-compressed WriteLong(nc.data.FilePointer()). Close(); err != nil { return err } if err = nc.data.WriteVInt(packed.VERSION_CURRENT); err != nil { return err } decode := uniqueValues.decodeTable() // upgrade to power of two sized array size := 1 << uint(bitsPerValue) if err = nc.data.WriteVInt(int32(size)); err != nil { return err } for _, v := range decode { if err = nc.data.WriteLong(v); err != nil { return err } } for i := len(decode); i < size; i++ { if err = nc.data.WriteLong(0); err != nil { return err } } if err = store.Stream(nc.data).WriteVInt(int32(format.Id())). WriteVInt(int32(bitsPerValue)). Close(); err != nil { return err } writer := packed.WriterNoHeader(nc.data, format, nc.maxDoc, bitsPerValue, packed.DEFAULT_BUFFER_SIZE) next = iter() for { nv, ok := next() if !ok { break } if err = writer.Add(int64(uniqueValues.ord(nv.(int64)))); err != nil { return err } } if err = writer.Finish(); err != nil { return err } } } else { panic("not implemented yet") } return nil }