func newFile(f *os.File, maxSize int64, pgBits uint) (*file, error) { if maxSize < 0 { panic("internal error") } pgSize := 1 << pgBits switch { case sysPage > pgSize: pgBits = uint(mathutil.Log2Uint64(uint64(sysPage))) default: pgBits = uint(mathutil.Log2Uint64(uint64(pgSize / sysPage * sysPage))) } pgSize = 1 << pgBits fi := &file{ f: f, m: fileMap{}, maxPages: int(mathutil.MinInt64( 1024, mathutil.MaxInt64(maxSize/int64(pgSize), 1)), ), pgBits: pgBits, pgMask: pgSize - 1, pgSize: pgSize, } info, err := f.Stat() if err != nil { return nil, err } if err = fi.Truncate(info.Size()); err != nil { return nil, err } return fi, nil }
// Verify attempts to find any structural errors in a Filer wrt the // organization of it as defined by Allocator. 'bitmap' is a scratch pad for // necessary bookkeeping and will grow to at most to Allocator's // Filer.Size()/128 (0,78%). Any problems found are reported to 'log' except // non verify related errors like disk read fails etc. If 'log' returns false // or the error doesn't allow to (reliably) continue, the verification process // is stopped and an error is returned from the Verify function. Passing a nil // log works like providing a log function always returning false. Any // non-structural errors, like for instance Filer read errors, are NOT reported // to 'log', but returned as the Verify's return value, because Verify cannot // proceed in such cases. Verify returns nil only if it fully completed // verifying Allocator's Filer without detecting any error. // // It is recommended to limit the number reported problems by returning false // from 'log' after reaching some limit. Huge and corrupted DB can produce an // overwhelming error report dataset. // // The verifying process will scan the whole DB at least 3 times (a trade // between processing space and time consumed). It doesn't read the content of // free blocks above the head/tail info bytes. If the 3rd phase detects lost // free space, then a 4th scan (a faster one) is performed to precisely report // all of them. // // If the DB/Filer to be verified is reasonably small, respective if its // size/128 can comfortably fit within process's free memory, then it is // recommended to consider using a MemFiler for the bit map. // // Statistics are returned via 'stats' if non nil. The statistics are valid // only if Verify succeeded, ie. it didn't reported anything to log and it // returned a nil error. func (a *Allocator) Verify(bitmap Filer, log func(error) bool, stats *AllocStats) (err error) { if log == nil { log = nolog } n, err := bitmap.Size() if err != nil { return } if n != 0 { return &ErrINVAL{"Allocator.Verify: bit map initial size non zero (%d)", n} } var bits int64 bitMask := [8]byte{1, 2, 4, 8, 16, 32, 64, 128} byteBuf := []byte{0} //DONE // +performance, this implementation is hopefully correct but _very_ // naive, probably good as a prototype only. Use maybe a MemFiler // "cache" etc. // ---- // Turns out the OS caching is as effective as it can probably get. bit := func(on bool, h int64) (wasOn bool, err error) { m := bitMask[h&7] off := h >> 3 var v byte sz, err := bitmap.Size() if err != nil { return } if off < sz { if n, err := bitmap.ReadAt(byteBuf, off); n != 1 { return false, &ErrILSEQ{Type: ErrOther, Off: off, More: fmt.Errorf("Allocator.Verify - reading bitmap: %s", err)} } v = byteBuf[0] } switch wasOn = v&m != 0; on { case true: if !wasOn { v |= m bits++ } case false: if wasOn { v ^= m bits-- } } byteBuf[0] = v if n, err := bitmap.WriteAt(byteBuf, off); n != 1 || err != nil { return false, &ErrILSEQ{Type: ErrOther, Off: off, More: fmt.Errorf("Allocator.Verify - writing bitmap: %s", err)} } return } // Phase 1 - sequentially scan a.f to reliably determine block // boundaries. Set a bit for every block start. var ( buf, ubuf [maxRq]byte prevH, h, atoms int64 wasOn bool tag byte st = AllocStats{ AllocMap: map[int64]int64{}, FreeMap: map[int64]int64{}, } dlen int ) fsz, err := a.f.Size() if err != nil { return } ok := fsz%16 == 0 totalAtoms := (fsz - fltSz) / atomLen if !ok { err = &ErrILSEQ{Type: ErrFileSize, Name: a.f.Name(), Arg: fsz} log(err) return } st.TotalAtoms = totalAtoms prevTag := -1 lastH := int64(-1) for h = 1; h <= totalAtoms; h += atoms { prevH = h // For checking last block == used off := h2off(h) if err = a.read(buf[:1], off); err != nil { return } switch tag = buf[0]; tag { default: // Short used fallthrough case tagUsedLong, tagUsedRelocated: var compressed bool if compressed, dlen, atoms, _, err = a.verifyUsed(h, totalAtoms, tag, buf[:], ubuf[:], log, false); err != nil { return } if compressed { st.Compression++ } st.AllocAtoms += atoms switch { case tag == tagUsedRelocated: st.AllocMap[1]++ st.Relocations++ default: st.AllocMap[atoms]++ st.AllocBytes += int64(dlen) st.Handles++ } case tagFreeShort, tagFreeLong: if prevTag == tagFreeShort || prevTag == tagFreeLong { err = &ErrILSEQ{Type: ErrAdjacentFree, Off: h2off(lastH), Arg: off} log(err) return } if atoms, _, _, err = a.verifyUnused(h, totalAtoms, tag, log, false); err != nil { return } st.FreeMap[atoms]++ st.FreeAtoms += atoms } if wasOn, err = bit(true, h); err != nil { return } if wasOn { panic("internal error") } prevTag = int(tag) lastH = h } if totalAtoms != 0 && (tag == tagFreeShort || tag == tagFreeLong) { err = &ErrILSEQ{Type: ErrFreeTailBlock, Off: h2off(prevH)} log(err) return } // Phase 2 - check used blocks, turn off the map bit for every used // block. for h = 1; h <= totalAtoms; h += atoms { off := h2off(h) if err = a.read(buf[:1], off); err != nil { return } var link int64 switch tag = buf[0]; tag { default: // Short used fallthrough case tagUsedLong, tagUsedRelocated: if _, _, atoms, link, err = a.verifyUsed(h, totalAtoms, tag, buf[:], ubuf[:], log, true); err != nil { return } case tagFreeShort, tagFreeLong: if atoms, _, _, err = a.verifyUnused(h, totalAtoms, tag, log, true); err != nil { return } } turnoff := true switch tag { case tagUsedRelocated: if err = a.read(buf[:1], h2off(link)); err != nil { return } switch linkedTag := buf[0]; linkedTag { case tagFreeShort, tagFreeLong, tagUsedRelocated: err = &ErrILSEQ{Type: ErrInvalidRelocTarget, Off: off, Arg: link} log(err) return } case tagFreeShort, tagFreeLong: turnoff = false } if !turnoff { continue } if wasOn, err = bit(false, h); err != nil { return } if !wasOn { panic("internal error") } } // Phase 3 - using the flt check heads link to proper free blocks. For // every free block, walk the list, verify the {next, prev} links and // turn the respective map bit off. After processing all free lists, // the map bits count should be zero. Otherwise there are "lost" free // blocks. var prev, next, fprev, fnext int64 rep := a.flt for _, list := range rep { prev, next = 0, list.head for ; next != 0; prev, next = next, fnext { if wasOn, err = bit(false, next); err != nil { return } if !wasOn { err = &ErrILSEQ{Type: ErrFLT, Off: h2off(next), Arg: h} log(err) return } off := h2off(next) if err = a.read(buf[:1], off); err != nil { return } switch tag = buf[0]; tag { default: panic("internal error") case tagFreeShort, tagFreeLong: if atoms, fprev, fnext, err = a.verifyUnused(next, totalAtoms, tag, log, true); err != nil { return } if min := list.minSize; atoms < min { err = &ErrILSEQ{Type: ErrFLTSize, Off: h2off(next), Arg: atoms, Arg2: min} log(err) return } if fprev != prev { err = &ErrILSEQ{Type: ErrFreeChaining, Off: h2off(next)} log(err) return } } } } if bits == 0 { // Verify succeeded if stats != nil { *stats = st } return } // Phase 4 - if after phase 3 there are lost free blocks, report all of // them to 'log' for i := range ubuf { // setup zeros for compares ubuf[i] = 0 } var off, lh int64 rem, err := bitmap.Size() if err != nil { return err } for rem != 0 { rq := int(mathutil.MinInt64(64*1024, rem)) var n int if n, err = bitmap.ReadAt(buf[:rq], off); n != rq { return &ErrILSEQ{Type: ErrOther, Off: off, More: fmt.Errorf("bitmap ReadAt(size %d, off %#x): %s", rq, off, err)} } if !bytes.Equal(buf[:rq], ubuf[:rq]) { for d, v := range buf[:rq] { if v != 0 { for i, m := range bitMask { if v&m != 0 { lh = 8*(off+int64(d)) + int64(i) err = &ErrILSEQ{Type: ErrLostFreeBlock, Off: h2off(lh)} log(err) return } } } } } off += int64(rq) rem -= int64(rq) } return }