func newBytesStoreFromInput(in util.DataInput, numBytes int64, maxBlockSize uint32) (bs *BytesStore, err error) { var blockSize uint32 = 2 var blockBits uint32 = 1 for int64(blockSize) < numBytes && blockSize < maxBlockSize { blockSize *= 2 blockBits++ } self := newBytesStore() self.blockBits = blockBits self.blockSize = blockSize self.blockMask = blockSize - 1 left := numBytes for left > 0 { chunk := blockSize if left < int64(chunk) { chunk = uint32(left) } block := make([]byte, chunk) err = in.ReadBytes(block) if err != nil { return nil, err } self.blocks = append(self.blocks, block) left -= int64(chunk) } // So .getPosition still works self.nextWrite = uint32(len(self.blocks[len(self.blocks)-1])) return self, nil }
func (r *CompressingStoredFieldsReader) readField(in util.DataInput, visitor StoredFieldVisitor, info *model.FieldInfo, bits int) (err error) { switch bits & TYPE_MASK { case BYTE_ARR: panic("not implemented yet") case STRING: var length int if length, err = int32AsInt(in.ReadVInt()); err != nil { return err } data := make([]byte, length) if err = in.ReadBytes(data); err != nil { return err } visitor.StringField(info, string(data)) case NUMERIC_INT: panic("not implemented yet") case NUMERIC_FLOAT: panic("not implemented yet") case NUMERIC_LONG: panic("not implemented yet") case NUMERIC_DOUBLE: panic("not implemented yet") default: panic(fmt.Sprintf("Unknown type flag: %x", bits)) } return nil }
func (out *ByteSequenceOutputs) Read(in util.DataInput) (e interface{}, err error) { log.Printf("Reading from %v...", in) if length, err := in.ReadVInt(); err == nil { log.Printf("Length: %v", length) if length == 0 { e = out.NoOutput() } else { buf := make([]byte, length) e = buf err = in.ReadBytes(buf) } } else { log.Printf("Failed to read length due to %v", err) } return e, err }
func (r *Lucene41PostingsReader) DecodeTerm(longs []int64, in util.DataInput, fieldInfo *FieldInfo, _termState *BlockTermState, absolute bool) (err error) { termState := _termState.Self.(*intBlockTermState) fieldHasPositions := fieldInfo.IndexOptions() >= INDEX_OPT_DOCS_AND_FREQS_AND_POSITIONS fieldHasOffsets := fieldInfo.IndexOptions() >= INDEX_OPT_DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS fieldHasPayloads := fieldInfo.HasPayloads() if absolute { termState.docStartFP = 0 termState.posStartFP = 0 termState.payStartFP = 0 } if r.version < LUCENE41_VERSION_META_ARRAY { // backward compatibility return r._decodeTerm(in, fieldInfo, termState) } termState.docStartFP += longs[0] if fieldHasPositions { termState.posStartFP += longs[1] if fieldHasOffsets || fieldHasPayloads { termState.payStartFP += longs[2] } } if termState.DocFreq == 1 { if termState.singletonDocID, err = asInt(in.ReadVInt()); err != nil { return } } else { termState.singletonDocID = -1 } if fieldHasPositions { if termState.TotalTermFreq > LUCENE41_BLOCK_SIZE { if termState.lastPosBlockOffset, err = in.ReadVLong(); err != nil { return err } } else { termState.lastPosBlockOffset = -1 } } if termState.DocFreq > LUCENE41_BLOCK_SIZE { if termState.skipOffset, err = in.ReadVLong(); err != nil { return } } else { termState.skipOffset = -1 } return nil }
func (t *FST) readLabel(in util.DataInput) (v int, err error) { switch t.inputType { case INPUT_TYPE_BYTE1: // Unsigned byte if b, err := in.ReadByte(); err == nil { v = int(b) } case INPUT_TYPE_BYTE2: // Unsigned short if s, err := in.ReadShort(); err == nil { v = int(s) } default: v, err = AsInt(in.ReadVInt()) } return v, err }
func (r *CompressingStoredFieldsReader) VisitDocument(docID int, visitor StoredFieldVisitor) error { err := r.fieldsStream.Seek(r.indexReader.startPointer(docID)) if err != nil { return err } docBase, err := int32AsInt(r.fieldsStream.ReadVInt()) if err != nil { return err } chunkDocs, err := int32AsInt(r.fieldsStream.ReadVInt()) if err != nil { return err } if docID < docBase || docID >= docBase+chunkDocs || docBase+chunkDocs > r.numDocs { return errors.New(fmt.Sprintf( "Corrupted: docID=%v, docBase=%v, chunkDocs=%v, numDocs=%v (resource=%v)", docID, docBase, chunkDocs, r.numDocs, r.fieldsStream)) } var numStoredFields, offset, length, totalLength int if chunkDocs == 1 { if numStoredFields, err = int32AsInt(r.fieldsStream.ReadVInt()); err != nil { return err } offset = 0 if length, err = int32AsInt(r.fieldsStream.ReadVInt()); err != nil { return err } totalLength = length } else { bitsPerStoredFields, err := int32AsInt(r.fieldsStream.ReadVInt()) if err != nil { return err } if bitsPerStoredFields == 0 { numStoredFields, err = int32AsInt(r.fieldsStream.ReadVInt()) if err != nil { return err } } else if bitsPerStoredFields > 31 { return errors.New(fmt.Sprintf("bitsPerStoredFields=%v (resource=%v)", bitsPerStoredFields, r.fieldsStream)) } else { panic("not implemented yet") } bitsPerLength, err := int32AsInt(r.fieldsStream.ReadVInt()) if err != nil { return err } if bitsPerLength == 0 { if length, err = int32AsInt(r.fieldsStream.ReadVInt()); err != nil { return err } offset = (docID - docBase) * length totalLength = chunkDocs * length } else if bitsPerLength > 31 { return errors.New(fmt.Sprintf("bitsPerLength=%v (resource=%v)", bitsPerLength, r.fieldsStream)) } else { it := packed.ReaderIteratorNoHeader( r.fieldsStream, packed.PackedFormat(packed.PACKED), r.packedIntsVersion, chunkDocs, bitsPerLength, 1) var n int64 off := 0 for i := 0; i < docID-docBase; i++ { if n, err = it.Next(); err != nil { return err } off += int(n) } offset = off if n, err = it.Next(); err != nil { return err } length = int(n) off += length for i := docID - docBase + 1; i < chunkDocs; i++ { if n, err = it.Next(); err != nil { return err } off += int(n) } totalLength = off } } if (length == 0) != (numStoredFields == 0) { return errors.New(fmt.Sprintf( "length=%v, numStoredFields=%v (resource=%v)", length, numStoredFields, r.fieldsStream)) } if numStoredFields == 0 { // nothing to do return nil } var documentInput util.DataInput if r.version >= VERSION_BIG_CHUNKS && totalLength >= 2*r.chunkSize { panic("not implemented yet") } else { var bytes []byte if totalLength <= BUFFER_REUSE_THRESHOLD { bytes = r.bytes } else { bytes = make([]byte, 0) } bytes, err = r.decompressor(r.fieldsStream, totalLength, offset, length, bytes) if err != nil { return err } assert(len(bytes) == length) documentInput = store.NewByteArrayDataInput(bytes) } for fieldIDX := 0; fieldIDX < numStoredFields; fieldIDX++ { infoAndBits, err := documentInput.ReadVLong() if err != nil { return err } fieldNumber := int(uint64(infoAndBits) >> uint64(TYPE_BITS)) fieldInfo := r.fieldInfos.FieldInfoByNumber(fieldNumber) bits := int(infoAndBits & int64(TYPE_MASK)) assertWithMessage(bits <= NUMERIC_DOUBLE, fmt.Sprintf("bits=%x", bits)) status, err := visitor.NeedsField(fieldInfo) if err != nil { return err } switch status { case STORED_FIELD_VISITOR_STATUS_YES: r.readField(documentInput, visitor, fieldInfo, bits) case STORED_FIELD_VISITOR_STATUS_NO: panic("not implemented yet") case STORED_FIELD_VISITOR_STATUS_STOP: return nil } } return nil }
/** Load a previously saved FST; maxBlockBits allows you to * control the size of the byte[] pages used to hold the FST bytes. */ func loadFST3(in util.DataInput, outputs Outputs, maxBlockBits uint32) (fst *FST, err error) { log.Printf("Loading FST from %v and output to %v...", in, outputs) defer func() { if err != nil { log.Print("Failed to load FST.") log.Printf("DEBUG ", err) } }() fst = &FST{outputs: outputs, startNode: -1} if maxBlockBits < 1 || maxBlockBits > 30 { panic(fmt.Sprintf("maxBlockBits should 1..30; got %v", maxBlockBits)) } // NOTE: only reads most recent format; we don't have // back-compat promise for FSTs (they are experimental): fst.version, err = codec.CheckHeader(in, FST_FILE_FORMAT_NAME, FST_VERSION_PACKED, FST_VERSION_VINT_TARGET) if err != nil { return fst, err } if b, err := in.ReadByte(); err == nil { fst.packed = (b == 1) } else { return fst, err } if b, err := in.ReadByte(); err == nil { if b == 1 { // accepts empty string // 1 KB blocks: emptyBytes := newBytesStoreFromBits(10) if numBytes, err := in.ReadVInt(); err == nil { log.Printf("Number of bytes: %v", numBytes) emptyBytes.CopyBytes(in, int64(numBytes)) // De-serialize empty-string output: var reader BytesReader if fst.packed { log.Printf("Forward reader.") reader = emptyBytes.forwardReader() } else { log.Printf("Reverse reader.") reader = emptyBytes.reverseReader() // NoOutputs uses 0 bytes when writing its output, // so we have to check here else BytesStore gets // angry: if numBytes > 0 { reader.setPosition(int64(numBytes - 1)) } } log.Printf("Reading final output from %v to %v...", reader, outputs) fst.emptyOutput, err = outputs.ReadFinalOutput(reader) } } // else emptyOutput = nil } if err != nil { return fst, err } if t, err := in.ReadByte(); err == nil { switch t { case 0: fst.inputType = INPUT_TYPE_BYTE1 case 1: fst.inputType = INPUT_TYPE_BYTE2 case 2: fst.inputType = INPUT_TYPE_BYTE4 default: panic(fmt.Sprintf("invalid input type %v", t)) } } if err != nil { return fst, err } if fst.packed { fst.nodeRefToAddress, err = packed.NewPackedReader(in) if err != nil { return fst, err } } // else nodeRefToAddress = nil if fst.startNode, err = in.ReadVLong(); err == nil { if fst.nodeCount, err = in.ReadVLong(); err == nil { if fst.arcCount, err = in.ReadVLong(); err == nil { if fst.arcWithOutputCount, err = in.ReadVLong(); err == nil { if numBytes, err := in.ReadVLong(); err == nil { if fst.bytes, err = newBytesStoreFromInput(in, numBytes, 1<<maxBlockBits); err == nil { fst.NO_OUTPUT = outputs.NoOutput() err = fst.cacheRootArcs() // NOTE: bogus because this is only used during // building; we need to break out mutable FST from // immutable // fst.allowArrayArcs = false } } } } } } return fst, err }
func (r *Lucene41PostingsReader) _decodeTerm(in util.DataInput, fieldInfo *FieldInfo, termState *intBlockTermState) (err error) { fieldHasPositions := fieldInfo.IndexOptions() >= INDEX_OPT_DOCS_AND_FREQS_AND_POSITIONS fieldHasOffsets := fieldInfo.IndexOptions() >= INDEX_OPT_DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS fieldHasPaylods := fieldInfo.HasPayloads() if termState.DocFreq == 1 { if termState.singletonDocID, err = asInt(in.ReadVInt()); err != nil { return } } else { termState.singletonDocID = -1 var n int64 if n, err = in.ReadVLong(); err != nil { return } termState.docStartFP += n } if fieldHasPositions { var n int64 if n, err = in.ReadVLong(); err != nil { return } termState.posStartFP += n if termState.TotalTermFreq > LUCENE41_BLOCK_SIZE { if n, err = in.ReadVLong(); err != nil { return } termState.lastPosBlockOffset += n } else { termState.lastPosBlockOffset = -1 } if (fieldHasPaylods || fieldHasOffsets) && termState.TotalTermFreq >= LUCENE41_BLOCK_SIZE { if n, err = in.ReadVLong(); err != nil { return } termState.payStartFP += n } } if termState.DocFreq > LUCENE41_BLOCK_SIZE { if termState.skipOffset, err = in.ReadVLong(); err != nil { return } } else { termState.skipOffset = -1 } return nil }