Ejemplo n.º 1
0
func saveInts(values []int, out DataOutput) error {
	length := len(values)
	assert(length > 0)
	if length == 1 {
		return out.WriteVInt(int32(values[0]))
	}

	var allEqual = true
	var sentinel = values[0]
	for _, v := range values[1:] {
		if v != sentinel {
			allEqual = false
			break
		}
	}
	if allEqual {
		err := out.WriteVInt(0)
		if err == nil {
			err = out.WriteVInt(int32(values[0]))
		}
		return err
	}

	var max int64 = 0
	for _, v := range values {
		max |= int64(v)
	}
	var bitsRequired = packed.BitsRequired(max)
	err := out.WriteVInt(int32(bitsRequired))
	if err != nil {
		return err
	}

	w := packed.WriterNoHeader(out, packed.PackedFormat(packed.PACKED), length, bitsRequired, 1)
	for _, v := range values {
		if err = w.Add(int64(v)); err != nil {
			return err
		}
	}
	return w.Finish()
}
func (w *StoredFieldsIndexWriter) writeBlock() error {
	assert(w.blockChunks > 0)
	err := w.fieldsIndexOut.WriteVInt(int32(w.blockChunks))
	if err != nil {
		return err
	}

	// The trick here is that we only store the difference from the
	// average start pointer or doc base, this helps save bits per
	// value. And in order to prevent a few chunks that would be far
	// from the average to raise the number of bits per value for all
	// of them, we only encode blocks of 1024 chunks at once.
	// See LUCENE-4512

	// doc bases
	var avgChunkDocs int
	if w.blockChunks == 1 {
		avgChunkDocs = 0
	} else {
		avgChunkDocs = int(math.Floor(float64(w.blockDocs-w.docBaseDeltas[w.blockChunks-1])/float64(w.blockChunks-1) + 0.5))
	}
	err = w.fieldsIndexOut.WriteVInt(int32(w.totalDocs - w.blockDocs)) // doc base
	if err == nil {
		err = w.fieldsIndexOut.WriteVInt(int32(avgChunkDocs))
	}
	if err != nil {
		return err
	}
	var docBase int = 0
	var maxDelta int64 = 0
	for i := 0; i < w.blockChunks; i++ {
		delta := docBase - avgChunkDocs*i
		maxDelta |= moveSignToLowOrderBit(int64(delta))
		docBase += w.docBaseDeltas[i]
	}

	bitsPerDocbase := packed.BitsRequired(maxDelta)
	err = w.fieldsIndexOut.WriteVInt(int32(bitsPerDocbase))
	if err != nil {
		return err
	}
	writer := packed.WriterNoHeader(w.fieldsIndexOut,
		packed.PackedFormat(packed.PACKED), w.blockChunks, bitsPerDocbase, 1)
	docBase = 0
	for i := 0; i < w.blockChunks; i++ {
		delta := docBase - avgChunkDocs*i
		assert(packed.BitsRequired(moveSignToLowOrderBit(int64(delta))) <= writer.BitsPerValue())
		err = writer.Add(moveSignToLowOrderBit(int64(delta)))
		if err != nil {
			return err
		}
		docBase += w.docBaseDeltas[i]
	}
	err = writer.Finish()
	if err != nil {
		return err
	}

	// start pointers
	w.fieldsIndexOut.WriteVLong(w.firstStartPointer)
	var avgChunkSize int64
	if w.blockChunks == 1 {
		avgChunkSize = 0
	} else {
		avgChunkSize = (w.maxStartPointer - w.firstStartPointer) / int64(w.blockChunks-1)
	}
	err = w.fieldsIndexOut.WriteVLong(avgChunkSize)
	if err != nil {
		return err
	}
	var startPointer int64 = 0
	maxDelta = 0
	for i := 0; i < w.blockChunks; i++ {
		startPointer += w.startPointerDeltas[i]
		delta := startPointer - avgChunkSize*int64(i)
		maxDelta |= moveSignToLowOrderBit(delta)
	}

	bitsPerStartPointer := packed.BitsRequired(maxDelta)
	err = w.fieldsIndexOut.WriteVInt(int32(bitsPerStartPointer))
	if err != nil {
		return err
	}
	writer = packed.WriterNoHeader(w.fieldsIndexOut,
		packed.PackedFormat(packed.PACKED), w.blockChunks, bitsPerStartPointer, 1)
	startPointer = 0
	for i := 0; i < w.blockChunks; i++ {
		startPointer += w.startPointerDeltas[i]
		delta := startPointer - avgChunkSize*int64(i)
		assert(packed.BitsRequired(moveSignToLowOrderBit(delta)) <= writer.BitsPerValue())
		err = writer.Add(moveSignToLowOrderBit(delta))
		if err != nil {
			return err
		}
	}
	return writer.Finish()
}
Ejemplo n.º 3
0
func (nc *NormsConsumer) AddNumericField(field *FieldInfo,
	iter func() func() (interface{}, bool)) (err error) {

	if err = nc.meta.WriteVInt(field.Number); err != nil {
		return
	}
	minValue, maxValue := int64(math.MaxInt64), int64(math.MinInt64)
	// TODO: more efficient?
	uniqueValues := newNormMap()

	count := int64(0)
	next := iter()
	for {
		nv, ok := next()
		if !ok {
			break
		}
		assert2(nv != nil, "illegal norms data for field %v, got null for value: %v", field.Name, count)
		v := nv.(int64)

		if v < minValue {
			minValue = v
		}
		if v > maxValue {
			maxValue = v
		}

		if uniqueValues != nil && uniqueValues.add(v) && uniqueValues.size > 256 {
			uniqueValues = nil
		}

		count++
	}
	assert2(count == int64(nc.maxDoc),
		"illegal norms data for field %v, expected %v values, got %v",
		field.Name, nc.maxDoc, count)

	if uniqueValues != nil && uniqueValues.size == 1 {
		// 0 bpv
		if err = nc.meta.WriteByte(CONST_COMPRESSED); err != nil {
			return
		}
		if err = nc.meta.WriteLong(minValue); err != nil {
			return
		}
	} else if uniqueValues != nil {
		// small number of unique values; this is the typical case:
		// we only use bpv=1,2,4,8
		format := packed.PackedFormat(packed.PACKED_SINGLE_BLOCK)
		bitsPerValue := packed.BitsRequired(int64(uniqueValues.size) - 1)
		if bitsPerValue == 3 {
			bitsPerValue = 4
		} else if bitsPerValue > 4 {
			bitsPerValue = 8
		}

		if bitsPerValue == 8 && minValue >= 0 && maxValue <= 255 {
			if err = store.Stream(nc.meta).WriteByte(UNCOMPRESSED). // uncompressed []byte
										WriteLong(nc.data.FilePointer()).
										Close(); err != nil {
				return err
			}
			next = iter()
			for {
				nv, ok := next()
				if !ok {
					break
				}
				n := byte(0)
				if nv != nil {
					n = byte(nv.(int64))
				}
				if err = nc.data.WriteByte(byte(n)); err != nil {
					return err
				}
			}
		} else {
			if err = store.Stream(nc.meta).WriteByte(TABLE_COMPRESSED). // table-compressed
											WriteLong(nc.data.FilePointer()).
											Close(); err != nil {
				return err
			}
			if err = nc.data.WriteVInt(packed.VERSION_CURRENT); err != nil {
				return err
			}

			decode := uniqueValues.decodeTable()
			// upgrade to power of two sized array
			size := 1 << uint(bitsPerValue)
			if err = nc.data.WriteVInt(int32(size)); err != nil {
				return err
			}
			for _, v := range decode {
				if err = nc.data.WriteLong(v); err != nil {
					return err
				}
			}
			for i := len(decode); i < size; i++ {
				if err = nc.data.WriteLong(0); err != nil {
					return err
				}
			}

			if err = store.Stream(nc.data).WriteVInt(int32(format.Id())).
				WriteVInt(int32(bitsPerValue)).
				Close(); err != nil {
				return err
			}

			writer := packed.WriterNoHeader(nc.data, format, nc.maxDoc, bitsPerValue, packed.DEFAULT_BUFFER_SIZE)
			next = iter()
			for {
				nv, ok := next()
				if !ok {
					break
				}
				if err = writer.Add(int64(uniqueValues.ord(nv.(int64)))); err != nil {
					return err
				}
			}
			if err = writer.Finish(); err != nil {
				return err
			}
		}
	} else {
		panic("not implemented yet")
	}
	return nil
}