func TestClone(t *testing.T) { fmt.Println("Testing Loading FST...") path := "../search/testdata/belfrysample" d, err := OpenFSDirectory(path) if err != nil { t.Error(err) } ctx := NewIOContextBool(false) in, err := d.OpenInput("_0_Lucene41_0.tip", ctx) if err != nil { t.Error(err) } version, err := codec.CheckHeader(in, "BLOCK_TREE_TERMS_INDEX", 0, 1) var indexDirOffset int64 = 0 if version < 1 { indexDirOffset, err = in.ReadLong() if err != nil { t.Error(err) } } else { // >= 1 in.Seek(in.Length() - 8) indexDirOffset, err = in.ReadLong() if err != nil { t.Error(err) } } fmt.Println("indexDirOffset:", indexDirOffset) in.Seek(indexDirOffset) indexStartFP, err := in.ReadVLong() if err != nil { t.Error(err) } fmt.Println("indexStartFP:", indexStartFP) fmt.Println("Before clone", in) clone := in.Clone() fmt.Println("After clone", clone) if _, ok := clone.(*SimpleFSIndexInput); !ok { t.Error("Clone() should return *SimpleFSIndexInput.") } clone.Seek(indexStartFP) fmt.Println("After clone.Seek()", clone) _, err = codec.CheckHeader(clone, "FST", 3, 4) if err != nil { t.Error(err) } // panic("bingo") // _, err = util.LoadFST(in, util.ByteSequenceOutputsSingleton()) // if err != nil { // t.Error(err) // } }
func NewLucene41PostingsReader(dir store.Directory, fis FieldInfos, si SegmentInfo, ctx store.IOContext, segmentSuffix string) (r PostingsReaderBase, err error) { log.Print("Initializing Lucene41PostingsReader...") success := false var docIn, posIn, payIn store.IndexInput = nil, nil, nil defer func() { if !success { log.Print("Failed to initialize Lucene41PostingsReader.") if err != nil { log.Print("DEBUG ", err) } util.CloseWhileSuppressingError(docIn, posIn, payIn) } }() docIn, err = dir.OpenInput(util.SegmentFileName(si.name, segmentSuffix, LUCENE41_DOC_EXTENSION), ctx) if err != nil { return r, err } _, err = codec.CheckHeader(docIn, LUCENE41_DOC_CODEC, LUCENE41_VERSION_CURRENT, LUCENE41_VERSION_CURRENT) if err != nil { return r, err } forUtil, err := NewForUtil(docIn) if err != nil { return r, err } if fis.hasProx { posIn, err = dir.OpenInput(util.SegmentFileName(si.name, segmentSuffix, LUCENE41_POS_EXTENSION), ctx) if err != nil { return r, err } _, err = codec.CheckHeader(posIn, LUCENE41_POS_CODEC, LUCENE41_VERSION_CURRENT, LUCENE41_VERSION_CURRENT) if err != nil { return r, err } if fis.hasPayloads || fis.hasOffsets { payIn, err = dir.OpenInput(util.SegmentFileName(si.name, segmentSuffix, LUCENE41_PAY_EXTENSION), ctx) if err != nil { return r, err } _, err = codec.CheckHeader(payIn, LUCENE41_PAY_CODEC, LUCENE41_VERSION_CURRENT, LUCENE41_VERSION_CURRENT) if err != nil { return r, err } } } success = true return &Lucene41PostingsReader{docIn, posIn, payIn, forUtil}, nil }
func newLucene42DocValuesProducer(state SegmentReadState, dataCodec, dataExtension, metaCodec, metaExtension string) (dvp *Lucene42DocValuesProducer, err error) { dvp = &Lucene42DocValuesProducer{} dvp.maxDoc = int(state.segmentInfo.docCount) metaName := util.SegmentFileName(state.segmentInfo.name, state.segmentSuffix, metaExtension) // read in the entries from the metadata file. in, err := state.dir.OpenInput(metaName, state.context) if err != nil { return dvp, err } success := false defer func() { if success { err = util.Close(in) } else { util.CloseWhileSuppressingError(in) } }() version, err := codec.CheckHeader(in, metaCodec, LUCENE42_DV_VERSION_START, LUCENE42_DV_VERSION_CURRENT) if err != nil { return dvp, err } dvp.numerics = make(map[int]NumericEntry) dvp.binaries = make(map[int]BinaryEntry) dvp.fsts = make(map[int]FSTEntry) err = dvp.readFields(in, state.fieldInfos) if err != nil { return dvp, err } success = true success = false dataName := util.SegmentFileName(state.segmentInfo.name, state.segmentSuffix, dataExtension) dvp.data, err = state.dir.OpenInput(dataName, state.context) if err != nil { return dvp, err } version2, err := codec.CheckHeader(dvp.data, dataCodec, LUCENE42_DV_VERSION_START, LUCENE42_DV_VERSION_CURRENT) if err != nil { return dvp, err } if version != version2 { return dvp, errors.New("Format versions mismatch") } return dvp, nil }
func (r *BlockTreeTermsReader) readIndexHeader(input store.IndexInput) (version int, err error) { version, err = asInt(codec.CheckHeader(input, BTT_INDEX_CODEC_NAME, BTT_INDEX_VERSION_START, BTT_INDEX_VERSION_CURRENT)) if err != nil { return version, err } if version < BTT_INDEX_VERSION_APPEND_ONLY { r.indexDirOffset, err = input.ReadLong() if err != nil { return version, err } } return version, nil }
func newPackedReader(in DataInput) (r PackedIntsReader, err error) { if version, err := codec.CheckHeader(in, PACKED_CODEC_NAME, PACKED_VERSION_START, PACKED_VERSION_CURRENT); err == nil { if bitsPerValue, err := asUint32(in.ReadVInt()); err == nil { // assert bitsPerValue > 0 && bitsPerValue <= 64 if valueCount, err := in.ReadVInt(); err == nil { if id, err := in.ReadVInt(); err == nil { format := PackedFormat(id) return NewPackedReaderNoHeader(in, format, version, valueCount, bitsPerValue) } } } } return }
func (r *Lucene41PostingsReader) Init(termsIn store.IndexInput) error { log.Printf("Initializing from: %v", termsIn) // Make sure we are talking to the matching postings writer _, err := codec.CheckHeader(termsIn, LUCENE41_TERMS_CODEC, LUCENE41_VERSION_START, LUCENE41_VERSION_CURRENT) if err != nil { return err } indexBlockSize, err := termsIn.ReadVInt() if err != nil { return err } log.Printf("Index block size: %v", indexBlockSize) if indexBlockSize != LUCENE41_BLOCK_SIZE { panic(fmt.Sprintf("index-time BLOCK_SIZE (%v) != read-time BLOCK_SIZE (%v)", indexBlockSize, LUCENE41_BLOCK_SIZE)) } return nil }
func TestReadingFromSlicedIndexInputOSX(t *testing.T) { path := "../search/testdata/osx/belfrysample" d, err := OpenFSDirectory(path) if err != nil { t.Error(err) } ctx := NewIOContextBool(false) cd, err := NewCompoundFileDirectory(d, "_0.cfs", ctx, false) name := util.SegmentFileName("_0", "Lucene41_0", "pos") posIn, err := cd.OpenInput(name, ctx) if err != nil { t.Error(err) } t.Log(posIn) codec.CheckHeader(posIn, "Lucene41PostingsWriterPos", 0, 0) // codec header mismatch: actual header=0 vs expected header=1071082519 (resource: SlicedIndexInput(SlicedIndexInput(_0_Lucene41_0.pos in SimpleFSIndexInput(path='/private/tmp/kc/index/belfrysample/_0.cfs')) in SimpleFSIndexInput(path='/private/tmp/kc/index/belfrysample/_0.cfs') slice=1461:3426)) }
func TestCheckHeaderWin8(t *testing.T) { path := "../search/testdata/win8/belfrysample" d, err := OpenFSDirectory(path) if err != nil { t.Error(err) } ctx := NewIOContextBool(false) cd, err := NewCompoundFileDirectory(d, "_0.cfs", ctx, false) if err != nil { t.Error(err) } r, err := cd.OpenInput("_0_Lucene41_0.pos", ctx) _, err = codec.CheckHeader(r, "Lucene41PostingsWriterPos", 0, 0) if err != nil { t.Error(err) } }
if err != nil { return fi, err } log.Printf("Reading %v", input) success := false defer func() { if success { input.Close() } else { util.CloseWhileHandlingError(err, input) } }() _, err = codec.CheckHeader(input, LUCENE42_FI_CODEC_NAME, LUCENE42_FI_FORMAT_START, LUCENE42_FI_FORMAT_CURRENT) if err != nil { return fi, err } size, err := input.ReadVInt() //read in the size if err != nil { return fi, err } log.Printf("Found %v FieldInfos.", size) infos := make([]FieldInfo, size) for i, _ := range infos { name, err := input.ReadString() if err != nil {
// CompressingStoredFieldsReader.java L90 func newCompressingStoredFieldsReader(d store.Directory, si SegmentInfo, segmentSuffix string, fn FieldInfos, ctx store.IOContext, formatName string, compressionMode codec.CompressionMode) (r *CompressingStoredFieldsReader, err error) { r = &CompressingStoredFieldsReader{} r.compressionMode = compressionMode segment := si.name r.fieldInfos = fn r.numDocs = int(si.docCount) var indexStream store.IndexInput success := false defer func() { if !success { log.Println("Failed to initialize CompressionStoredFieldsReader.") if err != nil { log.Print(err) } util.Close(r, indexStream) } }() // Load the index into memory indexStreamFN := util.SegmentFileName(segment, segmentSuffix, LUCENE40_SF_FIELDS_INDEX_EXTENSION) indexStream, err = d.OpenInput(indexStreamFN, ctx) if err != nil { return nil, err } codecNameIdx := formatName + CODEC_SFX_IDX codec.CheckHeader(indexStream, codecNameIdx, CODEC_SFX_VERSION_START, CODEC_SFX_VERSION_CURRENT) if int64(codec.HeaderLength(codecNameIdx)) != indexStream.FilePointer() { panic("assert fail") } r.indexReader, err = newCompressingStoredFieldsIndexReader(indexStream, si) if err != nil { return nil, err } err = indexStream.Close() if err != nil { return nil, err } indexStream = nil // Open the data file and read metadata fieldsStreamFN := util.SegmentFileName(segment, segmentSuffix, LUCENE40_SF_FIELDS_EXTENSION) r.fieldsStream, err = d.OpenInput(fieldsStreamFN, ctx) if err != nil { return nil, err } codecNameDat := formatName + CODEC_SFX_DAT codec.CheckHeader(r.fieldsStream, codecNameDat, CODEC_SFX_VERSION_START, CODEC_SFX_VERSION_CURRENT) if int64(codec.HeaderLength(codecNameDat)) != r.fieldsStream.FilePointer() { panic("assert fail") } n, err := r.fieldsStream.ReadVInt() if err != nil { return nil, err } r.packedIntsVersion = int(n) r.decompressor = compressionMode.NewDecompressor() r.bytes = make([]byte, 0) success = true return r, nil }
/** Load a previously saved FST; maxBlockBits allows you to * control the size of the byte[] pages used to hold the FST bytes. */ func loadFST3(in DataInput, outputs Outputs, maxBlockBits uint32) (fst *FST, err error) { log.Printf("Loading FST from %v and output to %v...", in, outputs) defer func() { if err != nil { log.Print("Failed to load FST.") log.Printf("DEBUG ", err) } }() fst = &FST{outputs: outputs, startNode: -1} if maxBlockBits < 1 || maxBlockBits > 30 { panic(fmt.Sprintf("maxBlockBits should 1..30; got %v", maxBlockBits)) } // NOTE: only reads most recent format; we don't have // back-compat promise for FSTs (they are experimental): fst.version, err = codec.CheckHeader(in, FST_FILE_FORMAT_NAME, FST_VERSION_PACKED, FST_VERSION_VINT_TARGET) if err != nil { return fst, err } if b, err := in.ReadByte(); err == nil { fst.packed = (b == 1) } else { return fst, err } if b, err := in.ReadByte(); err == nil { if b == 1 { // accepts empty string // 1 KB blocks: emptyBytes := newBytesStoreFromBits(10) if numBytes, err := in.ReadVInt(); err == nil { log.Printf("Number of bytes: %v", numBytes) emptyBytes.CopyBytes(in, int64(numBytes)) // De-serialize empty-string output: var reader BytesReader if fst.packed { log.Printf("Forward reader.") reader = emptyBytes.forwardReader() } else { log.Printf("Reverse reader.") reader = emptyBytes.reverseReader() // NoOutputs uses 0 bytes when writing its output, // so we have to check here else BytesStore gets // angry: if numBytes > 0 { reader.setPosition(int64(numBytes - 1)) } } log.Printf("Reading final output from %v to %v...", reader, outputs) fst.emptyOutput, err = outputs.ReadFinalOutput(reader) } } // else emptyOutput = nil } if err != nil { return fst, err } if t, err := in.ReadByte(); err == nil { switch t { case 0: fst.inputType = INPUT_TYPE_BYTE1 case 1: fst.inputType = INPUT_TYPE_BYTE2 case 2: fst.inputType = INPUT_TYPE_BYTE4 default: panic(fmt.Sprintf("invalid input type %v", t)) } } if err != nil { return fst, err } if fst.packed { fst.nodeRefToAddress, err = newPackedReader(in) if err != nil { return fst, err } } // else nodeRefToAddress = nil if fst.startNode, err = in.ReadVLong(); err == nil { if fst.nodeCount, err = in.ReadVLong(); err == nil { if fst.arcCount, err = in.ReadVLong(); err == nil { if fst.arcWithOutputCount, err = in.ReadVLong(); err == nil { if numBytes, err := in.ReadVLong(); err == nil { if fst.bytes, err = newBytesStoreFromInput(in, numBytes, 1<<maxBlockBits); err == nil { log.Println("DEBUG ", outputs.NoOutput()) fst.NO_OUTPUT = outputs.NoOutput() fst.cacheRootArcs() // NOTE: bogus because this is only used during // building; we need to break out mutable FST from // immutable fst.allowArrayArcs = false } } } } } } return fst, err }
func readEntries(handle IndexInputSlicer, dir Directory, name string) (mapping map[string]FileEntry, err error) { var stream, entriesStream IndexInput = nil, nil defer func() { err = util.CloseWhileHandlingError(err, stream, entriesStream) }() // read the first VInt. If it is negative, it's the version number // otherwise it's the count (pre-3.1 indexes) mapping = make(map[string]FileEntry) stream = handle.openFullSlice() log.Printf("Reading from stream: %v", stream) firstInt, err := stream.ReadVInt() if err != nil { return mapping, err } // impossible for 3.0 to have 63 files in a .cfs, CFS writer was not visible // and separate norms/etc are outside of cfs. if firstInt == int32(CODEC_MAGIC_BYTE1) { if secondByte, err := stream.ReadByte(); err == nil { if thirdByte, err := stream.ReadByte(); err == nil { if fourthByte, err := stream.ReadByte(); err == nil { if secondByte != CODEC_MAGIC_BYTE2 || thirdByte != CODEC_MAGIC_BYTE3 || fourthByte != CODEC_MAGIC_BYTE4 { return mapping, errors.New(fmt.Sprintf( "Illegal/impossible header for CFS file: %v,%v,%v", secondByte, thirdByte, fourthByte)) } } } } if err != nil { return mapping, err } _, err = codec.CheckHeaderNoMagic(stream, CFD_DATA_CODEC, CFD_VERSION_START, CFD_VERSION_START) if err != nil { return mapping, err } entriesFileName := util.SegmentFileName(util.StripExtension(name), "", COMPOUND_FILE_ENTRIES_EXTENSION) entriesStream, err = dir.OpenInput(entriesFileName, IO_CONTEXT_READONCE) if err != nil { return mapping, err } _, err = codec.CheckHeader(entriesStream, CFD_ENTRY_CODEC, CFD_VERSION_START, CFD_VERSION_START) if err != nil { return mapping, err } numEntries, err := entriesStream.ReadVInt() if err != nil { return mapping, err } log.Printf("Entries number: %v", numEntries) for i := int32(0); i < numEntries; i++ { id, err := entriesStream.ReadString() if err != nil { return mapping, err } if _, ok := mapping[id]; ok { return mapping, errors.New(fmt.Sprintf( "Duplicate cfs entry id=%v in CFS: %v", id, entriesStream)) } log.Printf("Found entry: %v", id) offset, err := entriesStream.ReadLong() if err != nil { return mapping, err } length, err := entriesStream.ReadLong() if err != nil { return mapping, err } mapping[id] = FileEntry{offset, length} } } else { // TODO remove once 3.x is not supported anymore panic("not supported yet; will also be obsolete soon") } return mapping, nil }
fileName := util.SegmentFileName(segment, "", LUCENE40_SI_EXTENSION) input, err := dir.OpenInput(fileName, context) if err != nil { return si, err } success := false defer func() { if !success { util.CloseWhileSuppressingError(input) } else { input.Close() } }() _, err = codec.CheckHeader(input, LUCENE40_CODEC_NAME, LUCENE40_VERSION_START, LUCENE40_VERSION_CURRENT) if err != nil { return si, err } version, err := input.ReadString() if err != nil { return si, err } docCount, err := input.ReadInt() if err != nil { return si, err } if docCount < 0 { return si, errors.New(fmt.Sprintf("invalid docCount: %v (resource=%v)", docCount, input)) } sicf, err := input.ReadByte()