func (this *CompressedInputStream) readHeader() error { defer func() { if r := recover(); r != nil { panic(NewIOError("Cannot read bitstream header: "+r.(error).Error(), ERR_READ_FILE)) } }() // Read stream type fileType := this.ibs.ReadBits(32) // Sanity check if fileType != BITSTREAM_TYPE { errMsg := fmt.Sprintf("Invalid stream type: expected %#x, got %#x", BITSTREAM_TYPE, fileType) return NewIOError(errMsg, ERR_INVALID_FILE) } version := this.ibs.ReadBits(7) // Sanity check if version != BITSTREAM_FORMAT_VERSION { errMsg := fmt.Sprintf("Invalid bitstream, cannot read this version of the stream: %d", version) return NewIOError(errMsg, ERR_STREAM_VERSION) } // Read block checksum if this.ibs.ReadBit() == 1 { var err error this.hasher, err = util.NewXXHash(BITSTREAM_TYPE) if err != nil { return err } } // Read entropy codec this.entropyType = byte(this.ibs.ReadBits(5)) // Read transform this.transformType = byte(this.ibs.ReadBits(5)) // Read block size this.blockSize = uint(this.ibs.ReadBits(26)) << 4 if this.blockSize < MIN_BITSTREAM_BLOCK_SIZE || this.blockSize > MAX_BITSTREAM_BLOCK_SIZE { errMsg := fmt.Sprintf("Invalid bitstream, incorrect block size: %d", this.blockSize) return NewIOError(errMsg, ERR_BLOCK_SIZE) } // Read reserved bits this.ibs.ReadBits(4) if this.debugWriter != nil { fmt.Fprintf(this.debugWriter, "Checksum set to %v\n", (this.hasher != nil)) fmt.Fprintf(this.debugWriter, "Block size set to %d bytes\n", this.blockSize) w1 := function.GetByteFunctionName(this.transformType) if w1 == "NONE" { w1 = "no" } fmt.Fprintf(this.debugWriter, "Using %v transform (stage 1)\n", w1) w2 := entropy.GetEntropyCodecName(this.entropyType) if w2 == "NONE" { w2 = "no" } fmt.Fprintf(this.debugWriter, "Using %v entropy codec (stage 2)\n", w2) } return nil }
func main() { var filename = flag.String("input", "c:\\temp\\rt.jar", "name of the input file") flag.Parse() iter := 500 fmt.Printf("Processing %v\n", *filename) fmt.Printf("%v iterations\n", iter) { fmt.Printf("XXHash speed test\n") file, err := os.Open(*filename) if err != nil { fmt.Printf("Cannot open %s", *filename) return } defer file.Close() buffer := make([]byte, 16384) before := time.Now() hash, err := util.NewXXHash(uint32(0)) if err != nil { fmt.Printf("Failed to create hash: %v\n", err) return } length, err := file.Read(buffer) size := int64(0) res := uint32(0) for length > 0 { if err != nil { fmt.Printf("Failed to read the next chunk of input file '%v': %v\n", *filename, err) return } for i := 0; i < iter; i++ { res += hash.Hash(buffer[0:length]) } size += int64(length * iter) length, err = file.Read(buffer) } after := time.Now() delta := after.Sub(before).Nanoseconds() / 1000000 // convert to ms fmt.Printf("XXHash res=%x\n", res) fmt.Printf("Elapsed [ms]: %v\n", delta) fmt.Printf("Throughput [MB/s]: %v\n", (size/1024*1000/1024)/delta) } fmt.Printf("\n") { fmt.Printf("MurmurHash3 speed test\n") file, err := os.Open(*filename) if err != nil { fmt.Printf("Cannot open %s", *filename) return } defer file.Close() buffer := make([]byte, 16384) before := time.Now() hash, err := util.NewMurMurHash3(uint32(0)) if err != nil { fmt.Printf("Failed to create hash: %v\n", err) return } length, err := file.Read(buffer) size := int64(0) res := uint32(0) for length > 0 { if err != nil { fmt.Printf("Failed to read the next chunk of input file '%v': %v\n", *filename, err) return } for i := 0; i < iter; i++ { res += hash.Hash(buffer[0:length]) } size += int64(length * iter) length, err = file.Read(buffer) } after := time.Now() delta := after.Sub(before).Nanoseconds() / 1000000 // convert to ms fmt.Printf("MurmurHash3 res=%x\n", res) fmt.Printf("Elapsed [ms]: %v\n", delta) fmt.Printf("Throughput [MB/s]: %v\n", (size/1024*1000/1024)/delta) } }
func NewCompressedOutputStream(entropyCodec string, functionType string, os io.WriteCloser, blockSize uint, checksum bool, debugWriter io.Writer, jobs uint) (*CompressedOutputStream, error) { if os == nil { return nil, NewIOError("Invalid null output stream parameter", ERR_CREATE_STREAM) } if blockSize > MAX_BITSTREAM_BLOCK_SIZE { errMsg := fmt.Sprintf("The block size must be at most %d MB", MAX_BITSTREAM_BLOCK_SIZE>>20) return nil, NewIOError(errMsg, ERR_CREATE_STREAM) } if blockSize < MIN_BITSTREAM_BLOCK_SIZE { errMsg := fmt.Sprintf("The block size must be at least %d", MIN_BITSTREAM_BLOCK_SIZE) return nil, NewIOError(errMsg, ERR_CREATE_STREAM) } if int(blockSize)&-16 != int(blockSize) { return nil, NewIOError("The block size must be a multiple of 16", ERR_CREATE_STREAM) } if jobs < 1 || jobs > 16 { return nil, NewIOError("The number of jobs must be in [1..16]", ERR_CREATE_STREAM) } this := new(CompressedOutputStream) var err error bufferSize := blockSize if bufferSize > 65536 { bufferSize = 65536 } if this.obs, err = bitstream.NewDefaultOutputBitStream(os, bufferSize); err != nil { return nil, err } // Check entropy type validity (panic on error) this.entropyType = entropy.GetEntropyCodecType(entropyCodec) // Check transform type validity (panic on error) this.transformType = function.GetByteFunctionType(functionType) this.blockSize = blockSize if checksum == true { this.hasher, err = util.NewXXHash(BITSTREAM_TYPE) if err != nil { return nil, err } } this.data = make([]byte, jobs*blockSize) this.buffers = make([][]byte, jobs) for i := range this.buffers { this.buffers[i] = EMPTY_BYTE_SLICE } this.debugWriter = debugWriter this.jobs = int(jobs) this.blockId = 0 this.channels = make([]chan error, this.jobs+1) for i := range this.channels { this.channels[i] = make(chan error) } this.listeners = make([]BlockListener, 0) return this, nil }