Example #1
0
func newLucene49NormsConsumer(state *SegmentWriteState,
	dataCodec, dataExtension, metaCodec, metaExtension string) (nc *NormsConsumer, err error) {

	assert(packed.PackedFormat(packed.PACKED_SINGLE_BLOCK).IsSupported(1))
	assert(packed.PackedFormat(packed.PACKED_SINGLE_BLOCK).IsSupported(2))
	assert(packed.PackedFormat(packed.PACKED_SINGLE_BLOCK).IsSupported(4))

	nc = &NormsConsumer{maxDoc: state.SegmentInfo.DocCount()}
	var success = false
	defer func() {
		if !success {
			util.CloseWhileSuppressingError(nc)
		}
	}()

	dataName := util.SegmentFileName(state.SegmentInfo.Name, state.SegmentSuffix, dataExtension)
	if nc.data, err = state.Directory.CreateOutput(dataName, state.Context); err != nil {
		return nil, err
	}
	if err = codec.WriteHeader(nc.data, dataCodec, VERSION_CURRENT); err != nil {
		return nil, err
	}
	metaName := util.SegmentFileName(state.SegmentInfo.Name, state.SegmentSuffix, metaExtension)
	if nc.meta, err = state.Directory.CreateOutput(metaName, state.Context); err != nil {
		return nil, err
	}
	if err = codec.WriteHeader(nc.meta, metaCodec, VERSION_CURRENT); err != nil {
		return nil, err
	}
	success = true
	return nc, nil
}
func NewForUtil(in DataInput) (fu ForUtil, err error) {
	self := ForUtil{}
	packedIntsVersion, err := in.ReadVInt()
	if err != nil {
		return self, err
	}
	packed.CheckVersion(packedIntsVersion)
	self.encodedSizes = make([]int32, 33)
	self.encoders = make([]packed.PackedIntsEncoder, 33)
	self.decoders = make([]packed.PackedIntsDecoder, 33)
	self.iterations = make([]int32, 33)

	for bpv := 1; bpv <= 32; bpv++ {
		code, err := in.ReadVInt()
		if err != nil {
			return self, err
		}
		formatId := uint32(code) >> 5
		bitsPerValue := (uint32(code) & 31) + 1

		format := packed.PackedFormat(formatId)
		// assert format.isSupported(bitsPerValue)
		self.encodedSizes[bpv] = encodedSize(format, packedIntsVersion, bitsPerValue)
		self.encoders[bpv] = packed.GetPackedIntsEncoder(format, packedIntsVersion, bitsPerValue)
		self.decoders[bpv] = packed.GetPackedIntsDecoder(format, packedIntsVersion, bitsPerValue)
		self.iterations[bpv] = computeIterations(self.decoders[bpv])
	}
	return self, nil
}
Example #3
0
func (np *NormsProducer) loadNorms(field *FieldInfo) (NumericDocValues, error) {
	entry, ok := np.norms[int(field.Number)]
	assert(ok)
	switch entry.format {
	case CONST_COMPRESSED:
		return func(int) int64 { return entry.offset }, nil
	case UNCOMPRESSED:
		panic("not implemented yet")
	case DELTA_COMPRESSED:
		panic("not implemented yet")
	case TABLE_COMPRESSED:
		var err error
		if err = np.data.Seek(entry.offset); err == nil {
			var packedVersion int32
			if packedVersion, err = np.data.ReadVInt(); err == nil {
				var size int
				if size, err = int32ToInt(np.data.ReadVInt()); err == nil {
					if size > 256 {
						return nil, errors.New(fmt.Sprintf(
							"TABLE_COMPRESSED cannot have more than 256 distinct values, input=%v",
							np.data))
					}
					decode := make([]int64, size)
					for i, _ := range decode {
						if decode[i], err = np.data.ReadLong(); err != nil {
							break
						}
					}
					if err == nil {
						var formatId int
						if formatId, err = int32ToInt(np.data.ReadVInt()); err == nil {
							var bitsPerValue int32
							if bitsPerValue, err = np.data.ReadVInt(); err == nil {
								var ordsReader packed.PackedIntsReader
								if ordsReader, err = packed.ReaderNoHeader(np.data,
									packed.PackedFormat(formatId), packedVersion,
									int32(np.maxDoc), uint32(bitsPerValue)); err == nil {

									atomic.AddInt64(&np.ramBytesUsed, util.SizeOf(decode)+ordsReader.RamBytesUsed())
									return func(docId int) int64 {
										return decode[int(ordsReader.Get(docId))]
									}, nil
								}
							}
						}
					}
				}
			}
		}
		if err != nil {
			return nil, err
		}
	default:
		panic("assert fail")
	}
	panic("should not be here")
}
func computeMaxDataSize() int {
	maxDataSize := 0
	// for each version
	for version := packed.PACKED_VERSION_START; version <= packed.PACKED_VERSION_CURRENT; version++ {
		// for each packed format
		for format := packed.PACKED; format <= packed.PACKED_SINGLE_BLOCK; format++ {
			// for each bit-per-value
			for bpv := uint32(1); bpv <= 32; bpv++ {
				if !packed.PackedFormat(format).IsSupported(bpv) {
					continue
				}
				decoder := packed.GetPackedIntsDecoder(packed.PackedFormat(format), int32(version), bpv)
				iterations := int(computeIterations(decoder))
				if n := iterations * decoder.ByteValueCount(); n > maxDataSize {
					maxDataSize = n
				}
			}
		}
	}
	return maxDataSize
}
Example #5
0
func saveInts(values []int, out DataOutput) error {
	length := len(values)
	assert(length > 0)
	if length == 1 {
		return out.WriteVInt(int32(values[0]))
	}

	var allEqual = true
	var sentinel = values[0]
	for _, v := range values[1:] {
		if v != sentinel {
			allEqual = false
			break
		}
	}
	if allEqual {
		err := out.WriteVInt(0)
		if err == nil {
			err = out.WriteVInt(int32(values[0]))
		}
		return err
	}

	var max int64 = 0
	for _, v := range values {
		max |= int64(v)
	}
	var bitsRequired = packed.BitsRequired(max)
	err := out.WriteVInt(int32(bitsRequired))
	if err != nil {
		return err
	}

	w := packed.WriterNoHeader(out, packed.PackedFormat(packed.PACKED), length, bitsRequired, 1)
	for _, v := range values {
		if err = w.Add(int64(v)); err != nil {
			return err
		}
	}
	return w.Finish()
}
func (w *StoredFieldsIndexWriter) writeBlock() error {
	assert(w.blockChunks > 0)
	err := w.fieldsIndexOut.WriteVInt(int32(w.blockChunks))
	if err != nil {
		return err
	}

	// The trick here is that we only store the difference from the
	// average start pointer or doc base, this helps save bits per
	// value. And in order to prevent a few chunks that would be far
	// from the average to raise the number of bits per value for all
	// of them, we only encode blocks of 1024 chunks at once.
	// See LUCENE-4512

	// doc bases
	var avgChunkDocs int
	if w.blockChunks == 1 {
		avgChunkDocs = 0
	} else {
		avgChunkDocs = int(math.Floor(float64(w.blockDocs-w.docBaseDeltas[w.blockChunks-1])/float64(w.blockChunks-1) + 0.5))
	}
	err = w.fieldsIndexOut.WriteVInt(int32(w.totalDocs - w.blockDocs)) // doc base
	if err == nil {
		err = w.fieldsIndexOut.WriteVInt(int32(avgChunkDocs))
	}
	if err != nil {
		return err
	}
	var docBase int = 0
	var maxDelta int64 = 0
	for i := 0; i < w.blockChunks; i++ {
		delta := docBase - avgChunkDocs*i
		maxDelta |= moveSignToLowOrderBit(int64(delta))
		docBase += w.docBaseDeltas[i]
	}

	bitsPerDocbase := packed.BitsRequired(maxDelta)
	err = w.fieldsIndexOut.WriteVInt(int32(bitsPerDocbase))
	if err != nil {
		return err
	}
	writer := packed.WriterNoHeader(w.fieldsIndexOut,
		packed.PackedFormat(packed.PACKED), w.blockChunks, bitsPerDocbase, 1)
	docBase = 0
	for i := 0; i < w.blockChunks; i++ {
		delta := docBase - avgChunkDocs*i
		assert(packed.BitsRequired(moveSignToLowOrderBit(int64(delta))) <= writer.BitsPerValue())
		err = writer.Add(moveSignToLowOrderBit(int64(delta)))
		if err != nil {
			return err
		}
		docBase += w.docBaseDeltas[i]
	}
	err = writer.Finish()
	if err != nil {
		return err
	}

	// start pointers
	w.fieldsIndexOut.WriteVLong(w.firstStartPointer)
	var avgChunkSize int64
	if w.blockChunks == 1 {
		avgChunkSize = 0
	} else {
		avgChunkSize = (w.maxStartPointer - w.firstStartPointer) / int64(w.blockChunks-1)
	}
	err = w.fieldsIndexOut.WriteVLong(avgChunkSize)
	if err != nil {
		return err
	}
	var startPointer int64 = 0
	maxDelta = 0
	for i := 0; i < w.blockChunks; i++ {
		startPointer += w.startPointerDeltas[i]
		delta := startPointer - avgChunkSize*int64(i)
		maxDelta |= moveSignToLowOrderBit(delta)
	}

	bitsPerStartPointer := packed.BitsRequired(maxDelta)
	err = w.fieldsIndexOut.WriteVInt(int32(bitsPerStartPointer))
	if err != nil {
		return err
	}
	writer = packed.WriterNoHeader(w.fieldsIndexOut,
		packed.PackedFormat(packed.PACKED), w.blockChunks, bitsPerStartPointer, 1)
	startPointer = 0
	for i := 0; i < w.blockChunks; i++ {
		startPointer += w.startPointerDeltas[i]
		delta := startPointer - avgChunkSize*int64(i)
		assert(packed.BitsRequired(moveSignToLowOrderBit(delta)) <= writer.BitsPerValue())
		err = writer.Add(moveSignToLowOrderBit(delta))
		if err != nil {
			return err
		}
	}
	return writer.Finish()
}
Example #7
0
func (r *CompressingStoredFieldsReader) VisitDocument(docID int, visitor StoredFieldVisitor) error {
	err := r.fieldsStream.Seek(r.indexReader.startPointer(docID))
	if err != nil {
		return err
	}

	docBase, err := int32AsInt(r.fieldsStream.ReadVInt())
	if err != nil {
		return err
	}
	chunkDocs, err := int32AsInt(r.fieldsStream.ReadVInt())
	if err != nil {
		return err
	}
	if docID < docBase ||
		docID >= docBase+chunkDocs ||
		docBase+chunkDocs > r.numDocs {
		return errors.New(fmt.Sprintf(
			"Corrupted: docID=%v, docBase=%v, chunkDocs=%v, numDocs=%v (resource=%v)",
			docID, docBase, chunkDocs, r.numDocs, r.fieldsStream))
	}

	var numStoredFields, offset, length, totalLength int
	if chunkDocs == 1 {
		if numStoredFields, err = int32AsInt(r.fieldsStream.ReadVInt()); err != nil {
			return err
		}
		offset = 0
		if length, err = int32AsInt(r.fieldsStream.ReadVInt()); err != nil {
			return err
		}
		totalLength = length
	} else {
		bitsPerStoredFields, err := int32AsInt(r.fieldsStream.ReadVInt())
		if err != nil {
			return err
		}
		if bitsPerStoredFields == 0 {
			numStoredFields, err = int32AsInt(r.fieldsStream.ReadVInt())
			if err != nil {
				return err
			}
		} else if bitsPerStoredFields > 31 {
			return errors.New(fmt.Sprintf("bitsPerStoredFields=%v (resource=%v)",
				bitsPerStoredFields, r.fieldsStream))
		} else {
			panic("not implemented yet")
		}

		bitsPerLength, err := int32AsInt(r.fieldsStream.ReadVInt())
		if err != nil {
			return err
		}
		if bitsPerLength == 0 {
			if length, err = int32AsInt(r.fieldsStream.ReadVInt()); err != nil {
				return err
			}
			offset = (docID - docBase) * length
			totalLength = chunkDocs * length
		} else if bitsPerLength > 31 {
			return errors.New(fmt.Sprintf("bitsPerLength=%v (resource=%v)",
				bitsPerLength, r.fieldsStream))
		} else {
			it := packed.ReaderIteratorNoHeader(
				r.fieldsStream, packed.PackedFormat(packed.PACKED), r.packedIntsVersion,
				chunkDocs, bitsPerLength, 1)
			var n int64
			off := 0
			for i := 0; i < docID-docBase; i++ {
				if n, err = it.Next(); err != nil {
					return err
				}
				off += int(n)
			}
			offset = off
			if n, err = it.Next(); err != nil {
				return err
			}
			length = int(n)
			off += length
			for i := docID - docBase + 1; i < chunkDocs; i++ {
				if n, err = it.Next(); err != nil {
					return err
				}
				off += int(n)
			}
			totalLength = off
		}
	}

	if (length == 0) != (numStoredFields == 0) {
		return errors.New(fmt.Sprintf(
			"length=%v, numStoredFields=%v (resource=%v)",
			length, numStoredFields, r.fieldsStream))
	}
	if numStoredFields == 0 {
		// nothing to do
		return nil
	}

	var documentInput util.DataInput
	if r.version >= VERSION_BIG_CHUNKS && totalLength >= 2*r.chunkSize {
		panic("not implemented yet")
	} else {
		var bytes []byte
		if totalLength <= BUFFER_REUSE_THRESHOLD {
			bytes = r.bytes
		} else {
			bytes = make([]byte, 0)
		}
		bytes, err = r.decompressor(r.fieldsStream, totalLength, offset, length, bytes)
		if err != nil {
			return err
		}
		assert(len(bytes) == length)
		documentInput = store.NewByteArrayDataInput(bytes)
	}

	for fieldIDX := 0; fieldIDX < numStoredFields; fieldIDX++ {
		infoAndBits, err := documentInput.ReadVLong()
		if err != nil {
			return err
		}
		fieldNumber := int(uint64(infoAndBits) >> uint64(TYPE_BITS))
		fieldInfo := r.fieldInfos.FieldInfoByNumber(fieldNumber)

		bits := int(infoAndBits & int64(TYPE_MASK))
		assertWithMessage(bits <= NUMERIC_DOUBLE, fmt.Sprintf("bits=%x", bits))

		status, err := visitor.NeedsField(fieldInfo)
		if err != nil {
			return err
		}
		switch status {
		case STORED_FIELD_VISITOR_STATUS_YES:
			r.readField(documentInput, visitor, fieldInfo, bits)
		case STORED_FIELD_VISITOR_STATUS_NO:
			panic("not implemented yet")
		case STORED_FIELD_VISITOR_STATUS_STOP:
			return nil
		}
	}

	return nil
}
Example #8
0
func (nc *NormsConsumer) AddNumericField(field *FieldInfo,
	iter func() func() (interface{}, bool)) (err error) {

	if err = nc.meta.WriteVInt(field.Number); err != nil {
		return
	}
	minValue, maxValue := int64(math.MaxInt64), int64(math.MinInt64)
	// TODO: more efficient?
	uniqueValues := newNormMap()

	count := int64(0)
	next := iter()
	for {
		nv, ok := next()
		if !ok {
			break
		}
		assert2(nv != nil, "illegal norms data for field %v, got null for value: %v", field.Name, count)
		v := nv.(int64)

		if v < minValue {
			minValue = v
		}
		if v > maxValue {
			maxValue = v
		}

		if uniqueValues != nil && uniqueValues.add(v) && uniqueValues.size > 256 {
			uniqueValues = nil
		}

		count++
	}
	assert2(count == int64(nc.maxDoc),
		"illegal norms data for field %v, expected %v values, got %v",
		field.Name, nc.maxDoc, count)

	if uniqueValues != nil && uniqueValues.size == 1 {
		// 0 bpv
		if err = nc.meta.WriteByte(CONST_COMPRESSED); err != nil {
			return
		}
		if err = nc.meta.WriteLong(minValue); err != nil {
			return
		}
	} else if uniqueValues != nil {
		// small number of unique values; this is the typical case:
		// we only use bpv=1,2,4,8
		format := packed.PackedFormat(packed.PACKED_SINGLE_BLOCK)
		bitsPerValue := packed.BitsRequired(int64(uniqueValues.size) - 1)
		if bitsPerValue == 3 {
			bitsPerValue = 4
		} else if bitsPerValue > 4 {
			bitsPerValue = 8
		}

		if bitsPerValue == 8 && minValue >= 0 && maxValue <= 255 {
			if err = store.Stream(nc.meta).WriteByte(UNCOMPRESSED). // uncompressed []byte
										WriteLong(nc.data.FilePointer()).
										Close(); err != nil {
				return err
			}
			next = iter()
			for {
				nv, ok := next()
				if !ok {
					break
				}
				n := byte(0)
				if nv != nil {
					n = byte(nv.(int64))
				}
				if err = nc.data.WriteByte(byte(n)); err != nil {
					return err
				}
			}
		} else {
			if err = store.Stream(nc.meta).WriteByte(TABLE_COMPRESSED). // table-compressed
											WriteLong(nc.data.FilePointer()).
											Close(); err != nil {
				return err
			}
			if err = nc.data.WriteVInt(packed.VERSION_CURRENT); err != nil {
				return err
			}

			decode := uniqueValues.decodeTable()
			// upgrade to power of two sized array
			size := 1 << uint(bitsPerValue)
			if err = nc.data.WriteVInt(int32(size)); err != nil {
				return err
			}
			for _, v := range decode {
				if err = nc.data.WriteLong(v); err != nil {
					return err
				}
			}
			for i := len(decode); i < size; i++ {
				if err = nc.data.WriteLong(0); err != nil {
					return err
				}
			}

			if err = store.Stream(nc.data).WriteVInt(int32(format.Id())).
				WriteVInt(int32(bitsPerValue)).
				Close(); err != nil {
				return err
			}

			writer := packed.WriterNoHeader(nc.data, format, nc.maxDoc, bitsPerValue, packed.DEFAULT_BUFFER_SIZE)
			next = iter()
			for {
				nv, ok := next()
				if !ok {
					break
				}
				if err = writer.Add(int64(uniqueValues.ord(nv.(int64)))); err != nil {
					return err
				}
			}
			if err = writer.Finish(); err != nil {
				return err
			}
		}
	} else {
		panic("not implemented yet")
	}
	return nil
}