func GetHash(a string) (hash.Hash, error) { var h hash.Hash switch a { case "adler32": h = adler32.New() case "crc32", "crc32ieee": h = crc32.New(crc32.MakeTable(crc32.IEEE)) case "crc32castagnoli": h = crc32.New(crc32.MakeTable(crc32.Castagnoli)) case "crc32koopman": h = crc32.New(crc32.MakeTable(crc32.Koopman)) case "crc64", "crc64iso": h = crc64.New(crc64.MakeTable(crc64.ISO)) case "crc64ecma": h = crc64.New(crc64.MakeTable(crc64.ECMA)) case "fnv", "fnv32": h = fnv.New32() case "fnv32a": h = fnv.New32a() case "fnv64": h = fnv.New64() case "fnv64a": h = fnv.New64a() case "hmac", "hmacsha256": h = hmac.New(sha256.New, []byte(key)) case "hmacmd5": h = hmac.New(md5.New, []byte(key)) case "hmacsha1": h = hmac.New(sha1.New, []byte(key)) case "hmacsha512": h = hmac.New(sha512.New, []byte(key)) case "md4": h = md4.New() case "md5": h = md5.New() case "ripemd160": h = ripemd160.New() case "sha1": h = sha1.New() case "sha224": h = sha256.New224() case "sha256": h = sha256.New() case "sha384": h = sha512.New384() case "sha512": h = sha512.New() default: return nil, errors.New("Invalid algorithm") } return h, nil }
func computeOffsets(index *nodeIndex, n *trieNode) uint16 { if n.leaf { return n.value } hasher := crc32.New(crc32.MakeTable(crc32.IEEE)) // We only index continuation bytes. for i := 0; i < 64; i++ { var v uint16 = 0 if nn := n.table[0x80+i]; nn != nil { v = computeOffsets(index, nn) } hasher.Write([]byte{uint8(v >> 8), uint8(v)}) } h := hasher.Sum32() if n.isInternal() { v, ok := index.lookupBlockIdx[h] if !ok { v = uint16(len(index.lookupBlocks)) index.lookupBlocks = append(index.lookupBlocks, n) index.lookupBlockIdx[h] = v } n.value = v } else { v, ok := index.valueBlockIdx[h] if !ok { v = uint16(len(index.valueBlocks)) index.valueBlocks = append(index.valueBlocks, n) index.valueBlockIdx[h] = v } n.value = v } return n.value }
func (b *backend) Hash(ignores map[IgnoreKey]struct{}) (uint32, error) { h := crc32.New(crc32.MakeTable(crc32.Castagnoli)) b.mu.RLock() defer b.mu.RUnlock() err := b.db.View(func(tx *bolt.Tx) error { c := tx.Cursor() for next, _ := c.First(); next != nil; next, _ = c.Next() { b := tx.Bucket(next) if b == nil { return fmt.Errorf("cannot get hash of bucket %s", string(next)) } h.Write(next) b.ForEach(func(k, v []byte) error { bk := IgnoreKey{Bucket: string(next), Key: string(k)} if _, ok := ignores[bk]; !ok { h.Write(k) h.Write(v) } return nil }) } return nil }) if err != nil { return 0, err } return h.Sum32(), nil }
func process_file(filename string, complete chan Sumlist) { sumlist := Sumlist{} sumlist.filename = filename // Open the file and bail if we fail infile, err := os.Open(filename) if err != nil { log.Printf("Unable to open %s: %s", filename, err) complete <- sumlist return } defer infile.Close() // Create the checksum objects if flag_crc32 { sumlist.sums = append(sumlist.sums, Checksum{"CRC32", crc32.New(crc32.IEEETable)}) } if flag_crc64 { sumlist.sums = append(sumlist.sums, Checksum{"CRC64", crc64.New(crc64.MakeTable(crc64.ISO))}) } if flag_sha224 { sumlist.sums = append(sumlist.sums, Checksum{"SHA224", sha256.New224()}) } if flag_sha256 { sumlist.sums = append(sumlist.sums, Checksum{"SHA256", sha256.New()}) } if flag_sha384 { sumlist.sums = append(sumlist.sums, Checksum{"SHA384", sha512.New384()}) } if flag_sha512 { sumlist.sums = append(sumlist.sums, Checksum{"SHA512", sha512.New()}) } // Create our file reader reader := bufio.NewReader(infile) // Start a buffer and loop to read the entire file buf := make([]byte, 4096) for { read_count, err := reader.Read(buf) // If we get an error that is not EOF, then we have a problem if err != nil && err != io.EOF { log.Printf("Unable to open %s: %s", filename, err) complete <- sumlist return } // If the returned size is zero, we're at the end of the file if read_count == 0 { break } // Add the buffer contents to the checksum calculation for _, sum := range sumlist.sums { sum.hashFunc.Write(buf[:read_count]) } } complete <- sumlist }
/* Decode the Stream Header field (the first 12 bytes of the .xz Stream). */ func decStreamHeader(s *xzDec) xzRet { if string(s.temp.buf[:len(headerMagic)]) != headerMagic { return xzFormatError } if xzCRC32(s.temp.buf[len(headerMagic):len(headerMagic)+2], 0) != getLE32(s.temp.buf[len(headerMagic)+2:]) { return xzDataError } if s.temp.buf[len(headerMagic)] != 0 { return xzOptionsError } /* * Of integrity checks, we support none (Check ID = 0), * CRC32 (Check ID = 1), CRC64 (Check ID = 4) and SHA256 (Check ID = 10) * However, we will accept other check types too, but then the check * won't be verified and a warning (xzUnsupportedCheck) will be given. */ s.checkType = xzCheck(s.temp.buf[len(headerMagic)+1]) if s.checkType > xzCheckMax { return xzOptionsError } switch s.checkType { case xzCheckNone: // xzCheckNone: no action needed case xzCheckCRC32: s.check = crc32.New(xzCRC32Table) case xzCheckCRC64: s.check = crc64.New(xzCRC64Table) case xzCheckSHA256: s.check = sha256.New() default: return xzUnsupportedCheck } return xzOK }
func (e *Engine) crc32_koopman() error { data, err := computeHash(crc32.New(crc32.MakeTable(crc32.Koopman)), e.stack.Pop()) if err == nil { e.stack.Push(data) } return err }
func sendMessage(conn io.Writer, message *Message) { _, err := io.WriteString(conn, "AA") // preamble if err != nil { log.Fatal("unable to send data: ", err) } data, err := proto.Marshal(message) if err != nil { log.Fatal("marshaling error: ", err) } err = binary.Write(conn, binary.LittleEndian, int32(len(data))) if err != nil { log.Fatal("unable to send data: ", err) } _, err = conn.Write(data) if err != nil { log.Fatal("unable to send data: ", err) } crc := crc32.New(crcTable) crc.Write(data) err = binary.Write(conn, binary.LittleEndian, int32(crc.Sum32())) if err != nil { log.Fatal("unable to send data: ", err) } }
func (e *Engine) crc32_castagnoli() error { data, err := computeHash(crc32.New(crc32.MakeTable(crc32.Castagnoli)), e.stack.Pop()) if err == nil { e.stack.Push(data) } return err }
func GetFileChecksum(file *os.File) uint32 { fileInfo, err := file.Stat() if err != nil { log.Println(err) return 0 } if fileInfo.Size() > CheckSumMaxSize && CheckSumMaxSize != -1 { return 0 } hasher := crc32.New(crc32.MakeTable(crc32.Castagnoli)) byteBuf := make([]byte, ChunkSize) byteChan := make(chan []byte, ChunkSize) go func() { for val := range byteChan { hasher.Write(val) } }() for done := false; !done; { numRead, err := file.Read(byteBuf) if err != nil && err != io.EOF { log.Println(err) } if numRead < ChunkSize { byteBuf = byteBuf[:numRead] done = true } byteChan <- byteBuf } close(byteChan) return hasher.Sum32() }
func (b *backend) Hash() (uint32, error) { h := crc32.New(crc32.MakeTable(crc32.Castagnoli)) err := b.db.View(func(tx *bolt.Tx) error { c := tx.Cursor() for next, _ := c.First(); next != nil; next, _ = c.Next() { b := tx.Bucket(next) if b == nil { return fmt.Errorf("cannot get hash of bucket %s", string(next)) } h.Write(next) b.ForEach(func(k, v []byte) error { h.Write(k) h.Write(v) return nil }) } return nil }) if err != nil { return 0, err } return h.Sum32(), nil }
func DecodePage(in io.Reader) (Page, error) { var page Page err := binary.Read(in, binary.LittleEndian, &page.HeaderFixed) if err != nil { return page, err } page.Segment_table = make([]uint8, int(page.Page_segments)) _, err = io.ReadFull(in, page.Segment_table) if err != nil { return page, err } remaining_data := 0 for _, v := range page.Segment_table { remaining_data += int(v) } page.Data = make([]byte, remaining_data) _, err = io.ReadFull(in, page.Data) if err != nil { return page, err } // The checksum is made by zeroing the checksum value and CRC-ing the entire page checksum := page.Crc_checksum page.Crc_checksum = 0 crc := crc32.New(ogg_table) binary.Write(crc, binary.LittleEndian, &page.HeaderFixed) crc.Write(page.Segment_table) crc.Write(page.Data) if crc.Sum32() != checksum { // TODO: Figure out why this CRC isn't working // return page, os.NewError(fmt.Sprintf("CRC failed: expected %x, got %x.", checksum, crc.Sum32())) } return page, nil }
func (s *store) Hash() (uint32, error) { h := crc32.New(crc32.MakeTable(crc32.Castagnoli)) _, err := s.Snapshot(h) if err != nil { return 0, err } return h.Sum32(), nil }
func emptyHashes() []HashSum { return []HashSum{ {Name: "md5", hash: md5.New()}, {Name: "sha1", hash: sha1.New()}, {Name: "sha256", hash: sha256.New()}, {Name: "sha512", hash: sha512.New()}, {Name: "adler32", hash: adler32.New()}, {Name: "crc32 (IEEE)", hash: crc32.New(crc32.MakeTable(crc32.IEEE))}, {Name: "crc32 (Castagnoli)", hash: crc32.New(crc32.MakeTable(crc32.Castagnoli))}, {Name: "crc32 (Koopman)", hash: crc32.New(crc32.MakeTable(crc32.Koopman))}, {Name: "crc64 (ISO)", hash: crc64.New(crc64.MakeTable(crc64.ISO))}, {Name: "crc64 (ECMA)", hash: crc64.New(crc64.MakeTable(crc64.ECMA))}, {Name: "fnv32-1", hash: fnv.New32()}, {Name: "fnv32-1a", hash: fnv.New32a()}, {Name: "fnv64-1", hash: fnv.New64()}, {Name: "fnv64-1a", hash: fnv.New64a()}, } }
func main() { flag.Parse() if flag.NArg() < 1 || flag.Arg(0) == "" { fmt.Printf("usage: crc32 <file>\n") os.Exit(1) } filename := flag.Arg(0) var poly uint32 switch strings.ToLower(*polynomial) { case "ieee": poly = crc32.IEEE case "castagnoli": poly = crc32.Castagnoli case "koopman": poly = crc32.Koopman default: fmt.Printf("unknown -polynomial %s\n", *polynomial) os.Exit(1) } var format string switch strings.ToLower(*output) { case "hex": format = "%x\n" case "dec": format = "%d\n" case "oct": format = "%o\n" default: fmt.Printf("unknown -output %s\n", *output) os.Exit(1) } f, err := os.Open(filename) if err != nil { fmt.Printf("%s: %s\n", filename, err) os.Exit(1) } defer f.Close() // http://blog.vzv.ca/2012/06/crc64-file-hash-in-gogolang.html h := crc32.New(crc32.MakeTable(poly)) buf := make([]byte, 8192) read, err := f.Read(buf) for read > -1 && err == nil { h.Write(buf) read, err = f.Read(buf) } s := h.Sum32() fmt.Printf(format, s) }
// Sum - io.Reader based crc helper func Sum(reader io.Reader) (uint32, error) { h := crc32.New(castanagoliTable) var err error for err == nil { length := 0 byteBuffer := make([]byte, 1024*1024) length, err = reader.Read(byteBuffer) byteBuffer = byteBuffer[0:length] h.Write(byteBuffer) } if err != io.EOF { return 0, err } return h.Sum32(), nil }
// New creates a new Checksum of the given type func (t ChecksumType) New() Checksum { switch t { case ChecksumTypeNone: return nullChecksum{} case ChecksumTypeCrc32: return newHashChecksum(t, crc32.NewIEEE()) case ChecksumTypeCrc32C: return newHashChecksum(t, crc32.New(crc32CastagnoliTable)) case ChecksumTypeFarmhash: // TODO(mmihic): Implement return nil default: return nil } }
func dbStatus(p string) dbstatus { if _, err := os.Stat(p); err != nil { ExitWithError(ExitError, err) } ds := dbstatus{} db, err := bolt.Open(p, 0400, nil) if err != nil { ExitWithError(ExitError, err) } defer db.Close() h := crc32.New(crc32.MakeTable(crc32.Castagnoli)) err = db.View(func(tx *bolt.Tx) error { ds.TotalSize = tx.Size() c := tx.Cursor() for next, _ := c.First(); next != nil; next, _ = c.Next() { b := tx.Bucket(next) if b == nil { return fmt.Errorf("cannot get hash of bucket %s", string(next)) } h.Write(next) iskeyb := (string(next) == "key") b.ForEach(func(k, v []byte) error { h.Write(k) h.Write(v) if iskeyb { rev := bytesToRev(k) ds.Revision = rev.main } ds.TotalKey++ return nil }) } return nil }) if err != nil { ExitWithError(ExitError, err) } ds.Hash = h.Sum32() return ds }
func checkHashCRC32C(hashValue []byte, file string) error { fr, err := os.Open(file) if err != nil { return errors.Wrap(err, 1) } defer fr.Close() hasher := crc32.New(crc32c.Table) io.Copy(hasher, fr) hashComputed := hasher.Sum(nil) if !bytes.Equal(hashValue, hashComputed) { err = fmt.Errorf("crc32c hash mismatch: got %x, expected %x", hashComputed, hashValue) } return err }
// TestHash32 tests that Hash32 provided by this package can take an initial // crc and behaves exactly the same as the standard one in the following calls. func TestHash32(t *testing.T) { stdhash := crc32.New(crc32.IEEETable) if _, err := stdhash.Write([]byte("test data")); err != nil { t.Fatalf("unexpected write error: %v", err) } // create a new hash with stdhash.Sum32() as initial crc hash := New(stdhash.Sum32(), crc32.IEEETable) wsize := stdhash.Size() if g := hash.Size(); g != wsize { t.Errorf("size = %d, want %d", g, wsize) } wbsize := stdhash.BlockSize() if g := hash.BlockSize(); g != wbsize { t.Errorf("block size = %d, want %d", g, wbsize) } wsum32 := stdhash.Sum32() if g := hash.Sum32(); g != wsum32 { t.Errorf("Sum32 = %d, want %d", g, wsum32) } wsum := stdhash.Sum(make([]byte, 32)) if g := hash.Sum(make([]byte, 32)); !reflect.DeepEqual(g, wsum) { t.Errorf("sum = %v, want %v", g, wsum) } // write something if _, err := stdhash.Write([]byte("test data")); err != nil { t.Fatalf("unexpected write error: %v", err) } if _, err := hash.Write([]byte("test data")); err != nil { t.Fatalf("unexpected write error: %v", err) } wsum32 = stdhash.Sum32() if g := hash.Sum32(); g != wsum32 { t.Errorf("Sum32 after write = %d, want %d", g, wsum32) } // reset stdhash.Reset() hash.Reset() wsum32 = stdhash.Sum32() if g := hash.Sum32(); g != wsum32 { t.Errorf("Sum32 after reset = %d, want %d", g, wsum32) } }
func init() { crc32CastagnoliTable := crc32.MakeTable(crc32.Castagnoli) ChecksumTypeNone.pool().New = func() interface{} { return nullChecksum{} } ChecksumTypeCrc32.pool().New = func() interface{} { return newHashChecksum(ChecksumTypeCrc32, crc32.NewIEEE()) } ChecksumTypeCrc32C.pool().New = func() interface{} { return newHashChecksum(ChecksumTypeCrc32C, crc32.New(crc32CastagnoliTable)) } // TODO: Implement farm hash. ChecksumTypeFarmhash.pool().New = func() interface{} { return nullChecksum{} } }
func computeOffsets(index *nodeIndex, n *trieNode) int { if n.leaf { return n.value } hasher := crc32.New(crc32.MakeTable(crc32.IEEE)) // We only index continuation bytes. for i := 0; i < blockSize; i++ { v := 0 if nn := n.table[0x80+i]; nn != nil { v = computeOffsets(index, nn) } hasher.Write([]byte{uint8(v >> 8), uint8(v)}) } h := hasher.Sum32() if n.isInternal() { v, ok := index.lookupBlockIdx[h] if !ok { v = len(index.lookupBlocks) index.lookupBlocks = append(index.lookupBlocks, n) index.lookupBlockIdx[h] = v } n.value = v } else { v, ok := index.valueBlockIdx[h] if !ok { if c := n.countSparseEntries(); c > maxSparseEntries { v = len(index.valueBlocks) index.valueBlocks = append(index.valueBlocks, n) index.valueBlockIdx[h] = v } else { v = -len(index.sparseOffset) index.sparseBlocks = append(index.sparseBlocks, n) index.sparseOffset = append(index.sparseOffset, uint16(index.sparseCount)) index.sparseCount += c + 1 index.valueBlockIdx[h] = v } } n.value = v } return n.value }
// New creates a new hash.Hash32 computing the CRC-32 checksum using // Castagnoli's polynomial. func NewCRC32C() hash.Hash32 { return crc32.New(crc32tab) }
// writeBlock appends the specified raw block bytes to the store's write cursor // location and increments it accordingly. When the block would exceed the max // file size for the current flat file, this function will close the current // file, create the next file, update the write cursor, and write the block to // the new file. // // The write cursor will also be advanced the number of bytes actually written // in the event of failure. // // Format: <network><block length><serialized block><checksum> func (s *blockStore) writeBlock(rawBlock []byte) (blockLocation, error) { // Compute how many bytes will be written. // 4 bytes each for block network + 4 bytes for block length + // length of raw block + 4 bytes for checksum. blockLen := uint32(len(rawBlock)) fullLen := blockLen + 12 // Move to the next block file if adding the new block would exceed the // max allowed size for the current block file. Also detect overflow // to be paranoid, even though it isn't possible currently, numbers // might change in the future to make it possible. // // NOTE: The writeCursor.offset field isn't protected by the mutex // since it's only read/changed during this function which can only be // called during a write transaction, of which there can be only one at // a time. wc := s.writeCursor finalOffset := wc.curOffset + fullLen if finalOffset < wc.curOffset || finalOffset > s.maxBlockFileSize { // This is done under the write cursor lock since the curFileNum // field is accessed elsewhere by readers. // // Close the current write file to force a read-only reopen // with LRU tracking. The close is done under the write lock // for the file to prevent it from being closed out from under // any readers currently reading from it. wc.Lock() wc.curFile.Lock() if wc.curFile.file != nil { _ = wc.curFile.file.Close() wc.curFile.file = nil } wc.curFile.Unlock() // Start writes into next file. wc.curFileNum++ wc.curOffset = 0 wc.Unlock() } // All writes are done under the write lock for the file to ensure any // readers are finished and blocked first. wc.curFile.Lock() defer wc.curFile.Unlock() // Open the current file if needed. This will typically only be the // case when moving to the next file to write to or on initial database // load. However, it might also be the case if rollbacks happened after // file writes started during a transaction commit. if wc.curFile.file == nil { file, err := s.openWriteFileFunc(wc.curFileNum) if err != nil { return blockLocation{}, err } wc.curFile.file = file } // Bitcoin network. origOffset := wc.curOffset hasher := crc32.New(castagnoli) var scratch [4]byte byteOrder.PutUint32(scratch[:], uint32(s.network)) if err := s.writeData(scratch[:], "network"); err != nil { return blockLocation{}, err } _, _ = hasher.Write(scratch[:]) // Block length. byteOrder.PutUint32(scratch[:], blockLen) if err := s.writeData(scratch[:], "block length"); err != nil { return blockLocation{}, err } _, _ = hasher.Write(scratch[:]) // Serialized block. if err := s.writeData(rawBlock[:], "block"); err != nil { return blockLocation{}, err } _, _ = hasher.Write(rawBlock) // Castagnoli CRC-32 as a checksum of all the previous. if err := s.writeData(hasher.Sum(nil), "checksum"); err != nil { return blockLocation{}, err } loc := blockLocation{ blockFileNum: wc.curFileNum, fileOffset: origOffset, blockLen: fullLen, } return loc, nil }
// Sum32 - single caller crc helper func Sum32(buffer []byte) uint32 { crc := crc32.New(castanagoliTable) crc.Reset() crc.Write(buffer) return crc.Sum32() }
func HandleProtoClient(conn io.ReadCloser, updateChan chan *CounterUpdate, settingsChan chan *Settings) { var len uint32 log.Println("Connection established") //Close the connection when the function exits defer conn.Close() for { log.Print("Waiting for data") // Find the first occurrence of the magic string which is AA buf := make([]byte, 1) seen := 0 garbage := 0 for seen != 2 { _, err := conn.Read(buf) if err == io.EOF { log.Printf("Preamble: EOF reached\n") break } else if err != nil { log.Printf("Preamble: Error %s\n", err) break } if buf[0] == 'A' { seen += 1 } else { if seen > 0 { log.Printf("Discarded: %d", int('A')) } log.Printf("Discarded: %d", int(buf[0])) seen = 0 garbage += 1 } } if garbage > 0 { log.Printf("Discarded %d bytes of garbage\n", garbage) } //Read the length field err := binary.Read(conn, binary.LittleEndian, &len) if err == io.EOF { log.Printf("Length: EOF reached\n") break } else if err != nil { log.Printf("Length: Error %s\n", err) break } log.Println("len=", len) if len > MAX_PROTOBUF_MSG_LEN { log.Println("Message length unrealistically large. Skipping. len=%s", len) continue } //Create a data buffer of type byte slice with capacity for the message data := make([]byte, len) //Read the data waiting on the connection and put it in the data buffer n, err := io.ReadFull(conn, data) if err == io.EOF { log.Printf("Message: EOF reached\n") break } else if err != nil { log.Printf("Message: Error %s\n", err) break } log.Printf("n=%d", n) fp, _ := os.Create("/tmp/message.pdata") fp.Write(data) fp.Close() // Read the checksum and match it against the received data crc := crc32.New(crcTable) crc.Write(data) var expectedCRC uint32 err = binary.Read(conn, binary.LittleEndian, &expectedCRC) if err == io.EOF { log.Printf("Checksum: EOF reached\n") break } else if err != nil { log.Printf("Checksum: Error %s\n", err) break } if crc.Sum32() != expectedCRC { log.Printf("Checksum mismatch, skipping. Header says 0x%08x, calculated 0x%08x\n", expectedCRC, crc.Sum32()) continue } protodata := new(Message) //Convert all the data retrieved into the Message struct type err = proto.Unmarshal(data[0:n], protodata) if err != nil { log.Printf("Unmarshal: Error %s\n", err) break } counterUpdate := protodata.GetUpdate() if counterUpdate != nil { updateChan <- counterUpdate } settings := protodata.GetSettings() if settings != nil { settingsChan <- settings } logmsg := protodata.GetLog() if logmsg != nil { log.Printf("%s : Received '%s'", logmsg.GetType().String(), logmsg.GetText()) } } }
func testChecksums(t *testing.T, data []byte) { t.Log("Checksums:") // crc64 with go library goCrc64 := crc64.New(crc64.MakeTable(crc64.ECMA)) toChecksum := bytes.NewBuffer(data) pt := newPrettyTimer("go crc64") _, err := io.Copy(goCrc64, toChecksum) if err != nil { t.Errorf("Copy failed: %v", err) } pt.stopAndPrintUncompress(t, len(data)) // adler32 with go library goAdler32 := adler32.New() toChecksum = bytes.NewBuffer(data) pt = newPrettyTimer("go adler32") _, err = io.Copy(goAdler32, toChecksum) if err != nil { t.Errorf("Copy failed: %v", err) } goResult := goAdler32.Sum32() pt.stopAndPrintUncompress(t, len(data)) t.Log(" sum :", goResult) // adler32 with cgzip library cgzipAdler32 := NewAdler32() toChecksum = bytes.NewBuffer(data) pt = newPrettyTimer("cgzip adler32") _, err = io.Copy(cgzipAdler32, toChecksum) if err != nil { t.Errorf("Copy failed: %v", err) } cgzipResult := cgzipAdler32.Sum32() pt.stopAndPrintUncompress(t, len(data)) t.Log(" sum :", cgzipResult) // test both results are the same if goResult != cgzipResult { t.Errorf("go and cgzip adler32 mismatch") } // now test partial checksuming also works with adler32 cutoff := len(data) / 3 toChecksum = bytes.NewBuffer(data[0:cutoff]) cgzipAdler32.Reset() _, err = io.Copy(cgzipAdler32, toChecksum) if err != nil { t.Errorf("Copy failed: %v", err) } adler1 := cgzipAdler32.Sum32() t.Log(" a1 :", adler1) t.Log(" len1 :", cutoff) toChecksum = bytes.NewBuffer(data[cutoff:]) cgzipAdler32.Reset() _, err = io.Copy(cgzipAdler32, toChecksum) if err != nil { t.Errorf("Copy failed: %v", err) } adler2 := cgzipAdler32.Sum32() t.Log(" a2 :", adler2) t.Log(" len2 :", len(data)-cutoff) adlerCombined := Adler32Combine(adler1, adler2, len(data)-cutoff) t.Log(" comb :", adlerCombined) if cgzipResult != adlerCombined { t.Errorf("full and combined adler32 mismatch") } // crc32 with go library goCrc32 := crc32.New(crc32.MakeTable(crc32.IEEE)) toChecksum = bytes.NewBuffer(data) pt = newPrettyTimer("go crc32") _, err = io.Copy(goCrc32, toChecksum) if err != nil { t.Errorf("Copy failed: %v", err) } goResult = goCrc32.Sum32() pt.stopAndPrintUncompress(t, len(data)) t.Log(" sum :", goResult) // crc32 with cgzip library cgzipCrc32 := NewCrc32() toChecksum = bytes.NewBuffer(data) pt = newPrettyTimer("cgzip crc32") _, err = io.Copy(cgzipCrc32, toChecksum) if err != nil { t.Errorf("Copy failed: %v", err) } cgzipResult = cgzipCrc32.Sum32() pt.stopAndPrintUncompress(t, len(data)) t.Log(" sum :", cgzipResult) // test both results are the same if goResult != cgzipResult { t.Errorf("go and cgzip crc32 mismatch") } // now test partial checksuming also works with crc32 toChecksum = bytes.NewBuffer(data[0:cutoff]) cgzipCrc32.Reset() _, err = io.Copy(cgzipCrc32, toChecksum) if err != nil { t.Errorf("Copy failed: %v", err) } crc1 := cgzipCrc32.Sum32() t.Log(" crc1 :", crc1) t.Log(" len1 :", cutoff) toChecksum = bytes.NewBuffer(data[cutoff:]) cgzipCrc32.Reset() _, err = io.Copy(cgzipCrc32, toChecksum) if err != nil { t.Errorf("Copy failed: %v", err) } crc2 := cgzipCrc32.Sum32() t.Log(" crc2 :", crc2) t.Log(" len2 :", len(data)-cutoff) crcCombined := Crc32Combine(crc1, crc2, len(data)-cutoff) t.Log(" comb :", crcCombined) if cgzipResult != crcCombined { t.Errorf("full and combined crc32 mismatch") } }
// Backup exports a snapshot of every kv entry into ranged sstables. // // The output is an sstable per range with files in the following locations: // - /<base>/<node_id>/<key_range>/data.sst // - <base> is given by the user and is expected to eventually be cloud storage // - The <key_range>s are non-overlapping. // // TODO(dan): Bikeshed this directory structure and naming. func Backup( ctx context.Context, db client.DB, base string, endTime hlc.Timestamp, ) (desc sqlbase.BackupDescriptor, retErr error) { // TODO(dan): Optionally take a start time for an incremental backup. // TODO(dan): Take a uri for the path prefix and support various cloud storages. // TODO(dan): Figure out how permissions should work. #6713 is tracking this // for grpc. var rangeDescs []roachpb.RangeDescriptor var sqlDescs []sqlbase.Descriptor opt := client.TxnExecOptions{ AutoRetry: true, AutoCommit: true, } { // TODO(dan): Pick an appropriate end time and set it in the txn. txn := client.NewTxn(ctx, db) err := txn.Exec(opt, func(txn *client.Txn, opt *client.TxnExecOptions) error { var err error SetTxnTimestamps(txn, endTime) rangeDescs, err = AllRangeDescriptors(txn) if err != nil { return err } sqlDescs, err = allSQLDescriptors(txn) return err }) if err != nil { return sqlbase.BackupDescriptor{}, err } } var dataSize int64 backupDescs := make([]sqlbase.BackupRangeDescriptor, len(rangeDescs)) crc := crc32.New(crc32.MakeTable(crc32.Castagnoli)) for i, rangeDesc := range rangeDescs { backupDescs[i] = sqlbase.BackupRangeDescriptor{ StartKey: rangeDesc.StartKey.AsRawKey(), EndKey: rangeDesc.EndKey.AsRawKey(), StartTime: hlc.Timestamp{}, } if backupDescs[i].StartKey.Compare(keys.LocalMax) < 0 { backupDescs[i].StartKey = keys.LocalMax } nodeID := 0 dir := filepath.Join(base, fmt.Sprintf("%03d", nodeID)) dir = filepath.Join(dir, fmt.Sprintf("%x-%x", rangeDesc.StartKey, rangeDesc.EndKey)) if err := os.MkdirAll(dir, 0700); err != nil { return sqlbase.BackupDescriptor{}, err } var kvs []client.KeyValue txn := client.NewTxn(ctx, db) err := txn.Exec(opt, func(txn *client.Txn, opt *client.TxnExecOptions) error { var err error SetTxnTimestamps(txn, endTime) // TODO(dan): Iterate with some batch size. kvs, err = txn.Scan(backupDescs[i].StartKey, backupDescs[i].EndKey, 0) return err }) if err != nil { return sqlbase.BackupDescriptor{}, err } if len(kvs) == 0 { if log.V(1) { log.Infof(ctx, "skipping backup of empty range %s-%s", backupDescs[i].StartKey, backupDescs[i].EndKey) } continue } backupDescs[i].Path = filepath.Join(dir, dataSSTableName) writeSST := func() (writeSSTErr error) { // This is a function so the defered Close (and resultant flush) is // called before the checksum is computed. sst := engine.MakeRocksDBSstFileWriter() if err := sst.Open(backupDescs[i].Path); err != nil { return err } defer func() { if closeErr := sst.Close(); closeErr != nil && writeSSTErr == nil { writeSSTErr = closeErr } }() // TODO(dan): Move all this iteration into cpp to avoid the cgo calls. for _, kv := range kvs { mvccKV := engine.MVCCKeyValue{ Key: engine.MVCCKey{Key: kv.Key, Timestamp: kv.Value.Timestamp}, Value: kv.Value.RawBytes, } if err := sst.Add(mvccKV); err != nil { return err } } dataSize += sst.DataSize return nil } if err := writeSST(); err != nil { return sqlbase.BackupDescriptor{}, err } crc.Reset() f, err := os.Open(backupDescs[i].Path) if err != nil { return sqlbase.BackupDescriptor{}, err } defer f.Close() if _, err := io.Copy(crc, f); err != nil { return sqlbase.BackupDescriptor{}, err } backupDescs[i].CRC = crc.Sum32() } desc = sqlbase.BackupDescriptor{ EndTime: endTime, Ranges: backupDescs, SQL: sqlDescs, DataSize: dataSize, } descBuf, err := desc.Marshal() if err != nil { return sqlbase.BackupDescriptor{}, err } if err = ioutil.WriteFile(filepath.Join(base, backupDescriptorName), descBuf, 0600); err != nil { return sqlbase.BackupDescriptor{}, err } return desc, nil }
// Ingest loads some data in an sstable into an empty range. Only the keys // between startKey and endKey are loaded. If newTableID is non-zero, every // row's key is rewritten to be for that table. func Ingest( ctx context.Context, txn *client.Txn, path string, checksum uint32, startKey, endKey roachpb.Key, newTableID sqlbase.ID, ) error { // TODO(mjibson): An appropriate value for this should be determined. The // current value was guessed at but appears to work well. const batchSize = 10000 // TODO(dan): Check if the range being ingested into is empty. If newTableID // is non-zero, it'll have to be derived from startKey and endKey. f, err := os.Open(path) if err != nil { return err } defer f.Close() crc := crc32.New(crc32.MakeTable(crc32.Castagnoli)) if _, err := io.Copy(crc, f); err != nil { return nil } if c := crc.Sum32(); c != checksum { return errors.Errorf("%s: checksum mismatch got %d expected %d", path, c, checksum) } sst, err := engine.MakeRocksDBSstFileReader() if err != nil { return err } defer sst.Close() if err := sst.AddFile(path); err != nil { return err } b := txn.NewBatch() var v roachpb.Value count := 0 ingestFunc := func(kv engine.MVCCKeyValue) (bool, error) { v = roachpb.Value{RawBytes: kv.Value} v.ClearChecksum() if log.V(3) { log.Infof(ctx, "Put %s %s\n", kv.Key.Key, v.PrettyPrint()) } b.Put(kv.Key.Key, &v) count++ if count > batchSize { if err := txn.Run(b); err != nil { return true, err } b = txn.NewBatch() count = 0 } return false, nil } if newTableID != 0 { // MakeRekeyMVCCKeyValFunc modifies the keys, but this is safe because // the one we get back from rocksDBIterator.Key is a copy (not a // reference to the mmaped file.) ingestFunc = MakeRekeyMVCCKeyValFunc(newTableID, ingestFunc) } startKeyMVCC, endKeyMVCC := engine.MVCCKey{Key: startKey}, engine.MVCCKey{Key: endKey} if err := sst.Iterate(startKeyMVCC, endKeyMVCC, ingestFunc); err != nil { return err } return txn.Run(b) }
func crc32(bytes []byte) uint32 { crc := crc32P.New(crc32P.IEEETable) crc.Write(bytes) return crc.Sum32() }
func BenchmarkStdCastagnoli32KB(b *testing.B) { benchmark(b, crc32.New(crc32.MakeTable(Castagnoli)), 32*1024) }