Beispiel #1
0
func newBytesStoreFromInput(in util.DataInput, numBytes int64, maxBlockSize uint32) (bs *BytesStore, err error) {
	var blockSize uint32 = 2
	var blockBits uint32 = 1
	for int64(blockSize) < numBytes && blockSize < maxBlockSize {
		blockSize *= 2
		blockBits++
	}
	self := newBytesStore()
	self.blockBits = blockBits
	self.blockSize = blockSize
	self.blockMask = blockSize - 1
	left := numBytes
	for left > 0 {
		chunk := blockSize
		if left < int64(chunk) {
			chunk = uint32(left)
		}
		block := make([]byte, chunk)
		err = in.ReadBytes(block)
		if err != nil {
			return nil, err
		}
		self.blocks = append(self.blocks, block)
		left -= int64(chunk)
	}
	// So .getPosition still works
	self.nextWrite = uint32(len(self.blocks[len(self.blocks)-1]))
	return self, nil
}
func (r *CompressingStoredFieldsReader) readField(in util.DataInput,
	visitor StoredFieldVisitor, info *model.FieldInfo, bits int) (err error) {
	switch bits & TYPE_MASK {
	case BYTE_ARR:
		panic("not implemented yet")
	case STRING:
		var length int
		if length, err = int32AsInt(in.ReadVInt()); err != nil {
			return err
		}
		data := make([]byte, length)
		if err = in.ReadBytes(data); err != nil {
			return err
		}
		visitor.StringField(info, string(data))
	case NUMERIC_INT:
		panic("not implemented yet")
	case NUMERIC_FLOAT:
		panic("not implemented yet")
	case NUMERIC_LONG:
		panic("not implemented yet")
	case NUMERIC_DOUBLE:
		panic("not implemented yet")
	default:
		panic(fmt.Sprintf("Unknown type flag: %x", bits))
	}
	return nil
}
Beispiel #3
0
func (out *ByteSequenceOutputs) Read(in util.DataInput) (e interface{}, err error) {
	log.Printf("Reading from %v...", in)
	if length, err := in.ReadVInt(); err == nil {
		log.Printf("Length: %v", length)
		if length == 0 {
			e = out.NoOutput()
		} else {
			buf := make([]byte, length)
			e = buf
			err = in.ReadBytes(buf)
		}
	} else {
		log.Printf("Failed to read length due to %v", err)
	}
	return e, err
}
Beispiel #4
0
func (r *Lucene41PostingsReader) DecodeTerm(longs []int64,
	in util.DataInput, fieldInfo *FieldInfo,
	_termState *BlockTermState, absolute bool) (err error) {

	termState := _termState.Self.(*intBlockTermState)
	fieldHasPositions := fieldInfo.IndexOptions() >= INDEX_OPT_DOCS_AND_FREQS_AND_POSITIONS
	fieldHasOffsets := fieldInfo.IndexOptions() >= INDEX_OPT_DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS
	fieldHasPayloads := fieldInfo.HasPayloads()

	if absolute {
		termState.docStartFP = 0
		termState.posStartFP = 0
		termState.payStartFP = 0
	}
	if r.version < LUCENE41_VERSION_META_ARRAY { // backward compatibility
		return r._decodeTerm(in, fieldInfo, termState)
	}
	termState.docStartFP += longs[0]
	if fieldHasPositions {
		termState.posStartFP += longs[1]
		if fieldHasOffsets || fieldHasPayloads {
			termState.payStartFP += longs[2]
		}
	}
	if termState.DocFreq == 1 {
		if termState.singletonDocID, err = asInt(in.ReadVInt()); err != nil {
			return
		}
	} else {
		termState.singletonDocID = -1
	}
	if fieldHasPositions {
		if termState.TotalTermFreq > LUCENE41_BLOCK_SIZE {
			if termState.lastPosBlockOffset, err = in.ReadVLong(); err != nil {
				return err
			}
		} else {
			termState.lastPosBlockOffset = -1
		}
	}
	if termState.DocFreq > LUCENE41_BLOCK_SIZE {
		if termState.skipOffset, err = in.ReadVLong(); err != nil {
			return
		}
	} else {
		termState.skipOffset = -1
	}
	return nil
}
Beispiel #5
0
func (t *FST) readLabel(in util.DataInput) (v int, err error) {
	switch t.inputType {
	case INPUT_TYPE_BYTE1: // Unsigned byte
		if b, err := in.ReadByte(); err == nil {
			v = int(b)
		}
	case INPUT_TYPE_BYTE2: // Unsigned short
		if s, err := in.ReadShort(); err == nil {
			v = int(s)
		}
	default:
		v, err = AsInt(in.ReadVInt())
	}
	return v, err
}
func (r *CompressingStoredFieldsReader) VisitDocument(docID int, visitor StoredFieldVisitor) error {
	err := r.fieldsStream.Seek(r.indexReader.startPointer(docID))
	if err != nil {
		return err
	}

	docBase, err := int32AsInt(r.fieldsStream.ReadVInt())
	if err != nil {
		return err
	}
	chunkDocs, err := int32AsInt(r.fieldsStream.ReadVInt())
	if err != nil {
		return err
	}
	if docID < docBase ||
		docID >= docBase+chunkDocs ||
		docBase+chunkDocs > r.numDocs {
		return errors.New(fmt.Sprintf(
			"Corrupted: docID=%v, docBase=%v, chunkDocs=%v, numDocs=%v (resource=%v)",
			docID, docBase, chunkDocs, r.numDocs, r.fieldsStream))
	}

	var numStoredFields, offset, length, totalLength int
	if chunkDocs == 1 {
		if numStoredFields, err = int32AsInt(r.fieldsStream.ReadVInt()); err != nil {
			return err
		}
		offset = 0
		if length, err = int32AsInt(r.fieldsStream.ReadVInt()); err != nil {
			return err
		}
		totalLength = length
	} else {
		bitsPerStoredFields, err := int32AsInt(r.fieldsStream.ReadVInt())
		if err != nil {
			return err
		}
		if bitsPerStoredFields == 0 {
			numStoredFields, err = int32AsInt(r.fieldsStream.ReadVInt())
			if err != nil {
				return err
			}
		} else if bitsPerStoredFields > 31 {
			return errors.New(fmt.Sprintf("bitsPerStoredFields=%v (resource=%v)",
				bitsPerStoredFields, r.fieldsStream))
		} else {
			panic("not implemented yet")
		}

		bitsPerLength, err := int32AsInt(r.fieldsStream.ReadVInt())
		if err != nil {
			return err
		}
		if bitsPerLength == 0 {
			if length, err = int32AsInt(r.fieldsStream.ReadVInt()); err != nil {
				return err
			}
			offset = (docID - docBase) * length
			totalLength = chunkDocs * length
		} else if bitsPerLength > 31 {
			return errors.New(fmt.Sprintf("bitsPerLength=%v (resource=%v)",
				bitsPerLength, r.fieldsStream))
		} else {
			it := packed.ReaderIteratorNoHeader(
				r.fieldsStream, packed.PackedFormat(packed.PACKED), r.packedIntsVersion,
				chunkDocs, bitsPerLength, 1)
			var n int64
			off := 0
			for i := 0; i < docID-docBase; i++ {
				if n, err = it.Next(); err != nil {
					return err
				}
				off += int(n)
			}
			offset = off
			if n, err = it.Next(); err != nil {
				return err
			}
			length = int(n)
			off += length
			for i := docID - docBase + 1; i < chunkDocs; i++ {
				if n, err = it.Next(); err != nil {
					return err
				}
				off += int(n)
			}
			totalLength = off
		}
	}

	if (length == 0) != (numStoredFields == 0) {
		return errors.New(fmt.Sprintf(
			"length=%v, numStoredFields=%v (resource=%v)",
			length, numStoredFields, r.fieldsStream))
	}
	if numStoredFields == 0 {
		// nothing to do
		return nil
	}

	var documentInput util.DataInput
	if r.version >= VERSION_BIG_CHUNKS && totalLength >= 2*r.chunkSize {
		panic("not implemented yet")
	} else {
		var bytes []byte
		if totalLength <= BUFFER_REUSE_THRESHOLD {
			bytes = r.bytes
		} else {
			bytes = make([]byte, 0)
		}
		bytes, err = r.decompressor(r.fieldsStream, totalLength, offset, length, bytes)
		if err != nil {
			return err
		}
		assert(len(bytes) == length)
		documentInput = store.NewByteArrayDataInput(bytes)
	}

	for fieldIDX := 0; fieldIDX < numStoredFields; fieldIDX++ {
		infoAndBits, err := documentInput.ReadVLong()
		if err != nil {
			return err
		}
		fieldNumber := int(uint64(infoAndBits) >> uint64(TYPE_BITS))
		fieldInfo := r.fieldInfos.FieldInfoByNumber(fieldNumber)

		bits := int(infoAndBits & int64(TYPE_MASK))
		assertWithMessage(bits <= NUMERIC_DOUBLE, fmt.Sprintf("bits=%x", bits))

		status, err := visitor.NeedsField(fieldInfo)
		if err != nil {
			return err
		}
		switch status {
		case STORED_FIELD_VISITOR_STATUS_YES:
			r.readField(documentInput, visitor, fieldInfo, bits)
		case STORED_FIELD_VISITOR_STATUS_NO:
			panic("not implemented yet")
		case STORED_FIELD_VISITOR_STATUS_STOP:
			return nil
		}
	}

	return nil
}
Beispiel #7
0
/** Load a previously saved FST; maxBlockBits allows you to
 *  control the size of the byte[] pages used to hold the FST bytes. */
func loadFST3(in util.DataInput, outputs Outputs, maxBlockBits uint32) (fst *FST, err error) {
	log.Printf("Loading FST from %v and output to %v...", in, outputs)
	defer func() {
		if err != nil {
			log.Print("Failed to load FST.")
			log.Printf("DEBUG ", err)
		}
	}()
	fst = &FST{outputs: outputs, startNode: -1}

	if maxBlockBits < 1 || maxBlockBits > 30 {
		panic(fmt.Sprintf("maxBlockBits should 1..30; got %v", maxBlockBits))
	}

	// NOTE: only reads most recent format; we don't have
	// back-compat promise for FSTs (they are experimental):
	fst.version, err = codec.CheckHeader(in, FST_FILE_FORMAT_NAME, FST_VERSION_PACKED, FST_VERSION_VINT_TARGET)
	if err != nil {
		return fst, err
	}
	if b, err := in.ReadByte(); err == nil {
		fst.packed = (b == 1)
	} else {
		return fst, err
	}
	if b, err := in.ReadByte(); err == nil {
		if b == 1 {
			// accepts empty string
			// 1 KB blocks:
			emptyBytes := newBytesStoreFromBits(10)
			if numBytes, err := in.ReadVInt(); err == nil {
				log.Printf("Number of bytes: %v", numBytes)
				emptyBytes.CopyBytes(in, int64(numBytes))

				// De-serialize empty-string output:
				var reader BytesReader
				if fst.packed {
					log.Printf("Forward reader.")
					reader = emptyBytes.forwardReader()
				} else {
					log.Printf("Reverse reader.")
					reader = emptyBytes.reverseReader()
					// NoOutputs uses 0 bytes when writing its output,
					// so we have to check here else BytesStore gets
					// angry:
					if numBytes > 0 {
						reader.setPosition(int64(numBytes - 1))
					}
				}
				log.Printf("Reading final output from %v to %v...", reader, outputs)
				fst.emptyOutput, err = outputs.ReadFinalOutput(reader)
			}
		} // else emptyOutput = nil
	}
	if err != nil {
		return fst, err
	}

	if t, err := in.ReadByte(); err == nil {
		switch t {
		case 0:
			fst.inputType = INPUT_TYPE_BYTE1
		case 1:
			fst.inputType = INPUT_TYPE_BYTE2
		case 2:
			fst.inputType = INPUT_TYPE_BYTE4
		default:
			panic(fmt.Sprintf("invalid input type %v", t))
		}
	}
	if err != nil {
		return fst, err
	}

	if fst.packed {
		fst.nodeRefToAddress, err = packed.NewPackedReader(in)
		if err != nil {
			return fst, err
		}
	} // else nodeRefToAddress = nil

	if fst.startNode, err = in.ReadVLong(); err == nil {
		if fst.nodeCount, err = in.ReadVLong(); err == nil {
			if fst.arcCount, err = in.ReadVLong(); err == nil {
				if fst.arcWithOutputCount, err = in.ReadVLong(); err == nil {
					if numBytes, err := in.ReadVLong(); err == nil {
						if fst.bytes, err = newBytesStoreFromInput(in, numBytes, 1<<maxBlockBits); err == nil {
							fst.NO_OUTPUT = outputs.NoOutput()

							err = fst.cacheRootArcs()

							// NOTE: bogus because this is only used during
							// building; we need to break out mutable FST from
							// immutable
							// fst.allowArrayArcs = false
						}
					}
				}
			}
		}
	}
	return fst, err
}
Beispiel #8
0
func (r *Lucene41PostingsReader) _decodeTerm(in util.DataInput,
	fieldInfo *FieldInfo, termState *intBlockTermState) (err error) {

	fieldHasPositions := fieldInfo.IndexOptions() >= INDEX_OPT_DOCS_AND_FREQS_AND_POSITIONS
	fieldHasOffsets := fieldInfo.IndexOptions() >= INDEX_OPT_DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS
	fieldHasPaylods := fieldInfo.HasPayloads()
	if termState.DocFreq == 1 {
		if termState.singletonDocID, err = asInt(in.ReadVInt()); err != nil {
			return
		}
	} else {
		termState.singletonDocID = -1
		var n int64
		if n, err = in.ReadVLong(); err != nil {
			return
		}
		termState.docStartFP += n
	}
	if fieldHasPositions {
		var n int64
		if n, err = in.ReadVLong(); err != nil {
			return
		}
		termState.posStartFP += n
		if termState.TotalTermFreq > LUCENE41_BLOCK_SIZE {
			if n, err = in.ReadVLong(); err != nil {
				return
			}
			termState.lastPosBlockOffset += n
		} else {
			termState.lastPosBlockOffset = -1
		}
		if (fieldHasPaylods || fieldHasOffsets) && termState.TotalTermFreq >= LUCENE41_BLOCK_SIZE {
			if n, err = in.ReadVLong(); err != nil {
				return
			}
			termState.payStartFP += n
		}
	}
	if termState.DocFreq > LUCENE41_BLOCK_SIZE {
		if termState.skipOffset, err = in.ReadVLong(); err != nil {
			return
		}
	} else {
		termState.skipOffset = -1
	}
	return nil
}