// Creates an in memory DAG from data in the given reader func NewDagFromReaderWithSplitter(r io.Reader, spl chunk.BlockSplitter) (*dag.Node, error) { blkChan := spl.Split(r) first := <-blkChan root := &dag.Node{} mbf := new(ft.MultiBlock) for blk := range blkChan { log.Debugf("created block, size %d", len(blk)) mbf.AddBlockSize(uint64(len(blk))) child := &dag.Node{Data: ft.WrapData(blk)} err := root.AddNodeLink("", child) if err != nil { return nil, err } } mbf.Data = first data, err := mbf.GetBytes() if err != nil { return nil, err } root.Data = data return root, nil }
// Builds a DAG from the data in the given reader, writing created blocks to disk // as they are created func BuildDagFromReader(r io.Reader, ds dag.DAGService, mp pin.ManualPinner, spl chunk.BlockSplitter) (*dag.Node, error) { blkChan := spl.Split(r) // grab first block, it will go in the index MultiBlock (faster io) first := <-blkChan root := &dag.Node{} mbf := new(ft.MultiBlock) for blk := range blkChan { // Store the block size in the root node mbf.AddBlockSize(uint64(len(blk))) node := &dag.Node{Data: ft.WrapData(blk)} nk, err := ds.Add(node) if err != nil { return nil, err } if mp != nil { mp.PinWithMode(nk, pin.Indirect) } // Add a link to this node without storing a reference to the memory err = root.AddNodeLinkClean("", node) if err != nil { return nil, err } } // Generate the root node data mbf.Data = first data, err := mbf.GetBytes() if err != nil { return nil, err } root.Data = data // Add root node to the dagservice rootk, err := ds.Add(root) if err != nil { return nil, err } if mp != nil { mp.PinWithMode(rootk, pin.Recursive) } return root, nil }
// WriteAt will modify a dag file in place // NOTE: it currently assumes only a single level of indirection func (dm *DagModifier) WriteAt(b []byte, offset uint64) (int, error) { // Check bounds if dm.pbdata.GetFilesize() < offset { return 0, errors.New("Attempted to perform write starting past end of file") } // First need to find where we are writing at end := uint64(len(b)) + offset // This shouldnt be necessary if we do subblocks sizes properly newsize := dm.pbdata.GetFilesize() if end > dm.pbdata.GetFilesize() { newsize = end } zeroblocklen := uint64(len(dm.pbdata.Data)) origlen := len(b) if end <= zeroblocklen { log.Debug("Writing into zero block") // Replacing zeroeth data block (embedded in the root node) //TODO: check chunking here copy(dm.pbdata.Data[offset:], b) return len(b), nil } // Find where write should start var traversed uint64 startsubblk := len(dm.pbdata.Blocksizes) if offset < zeroblocklen { dm.pbdata.Data = dm.pbdata.Data[:offset] startsubblk = 0 } else { traversed = uint64(zeroblocklen) for i, size := range dm.pbdata.Blocksizes { if uint64(offset) < traversed+size { log.Debugf("Starting mod at block %d. [%d < %d + %d]", i, offset, traversed, size) // Here is where we start startsubblk = i lnk := dm.curNode.Links[i] node, err := dm.dagserv.Get(u.Key(lnk.Hash)) if err != nil { return 0, err } data, err := ft.UnwrapData(node.Data) if err != nil { return 0, err } // We have to rewrite the data before our write in this block. b = append(data[:offset-traversed], b...) break } traversed += size } if startsubblk == len(dm.pbdata.Blocksizes) { // TODO: Im not sure if theres any case that isnt being handled here. // leaving this note here as a future reference in case something breaks } } // Find blocks that need to be overwritten var changed []int mid := -1 var midoff uint64 for i, size := range dm.pbdata.Blocksizes[startsubblk:] { if end > traversed { changed = append(changed, i+startsubblk) } else { break } traversed += size if end < traversed { mid = i + startsubblk midoff = end - (traversed - size) break } } // If our write starts in the middle of a block... var midlnk *mdag.Link if mid >= 0 { midlnk = dm.curNode.Links[mid] midnode, err := dm.dagserv.Get(u.Key(midlnk.Hash)) if err != nil { return 0, err } // NOTE: this may have to be changed later when we have multiple // layers of indirection data, err := ft.UnwrapData(midnode.Data) if err != nil { return 0, err } b = append(b, data[midoff:]...) } // Generate new sub-blocks, and sizes subblocks := splitBytes(b, dm.splitter) var links []*mdag.Link var sizes []uint64 for _, sb := range subblocks { n := &mdag.Node{Data: ft.WrapData(sb)} _, err := dm.dagserv.Add(n) if err != nil { log.Errorf("Failed adding node to DAG service: %s", err) return 0, err } lnk, err := mdag.MakeLink(n) if err != nil { return 0, err } links = append(links, lnk) sizes = append(sizes, uint64(len(sb))) } // This is disgusting (and can be rewritten if performance demands) if len(changed) > 0 { sechalflink := append(links, dm.curNode.Links[changed[len(changed)-1]+1:]...) dm.curNode.Links = append(dm.curNode.Links[:changed[0]], sechalflink...) sechalfblks := append(sizes, dm.pbdata.Blocksizes[changed[len(changed)-1]+1:]...) dm.pbdata.Blocksizes = append(dm.pbdata.Blocksizes[:changed[0]], sechalfblks...) } else { dm.curNode.Links = append(dm.curNode.Links, links...) dm.pbdata.Blocksizes = append(dm.pbdata.Blocksizes, sizes...) } dm.pbdata.Filesize = proto.Uint64(newsize) return origlen, nil }