func storeItems(ctx context.Context, dag merkledag.DAGService, estimatedLen uint64, iter itemIterator, internalKeys keyObserver) (*merkledag.ProtoNode, error) { seed, err := randomSeed() if err != nil { return nil, err } links := make([]*node.Link, 0, defaultFanout+maxItems) for i := 0; i < defaultFanout; i++ { links = append(links, &node.Link{Cid: emptyKey}) } // add emptyKey to our set of internal pinset objects n := &merkledag.ProtoNode{} n.SetLinks(links) internalKeys(emptyKey) hdr := &pb.Set{ Version: proto.Uint32(1), Fanout: proto.Uint32(defaultFanout), Seed: proto.Uint32(seed), } if err := writeHdr(n, hdr); err != nil { return nil, err } if estimatedLen < maxItems { // it'll probably fit links := n.Links() for i := 0; i < maxItems; i++ { k, ok := iter() if !ok { // all done break } links = append(links, &node.Link{Cid: k}) } n.SetLinks(links) // sort by hash, also swap item Data s := sortByHash{ links: n.Links()[defaultFanout:], } sort.Stable(s) } hashed := make([][]*cid.Cid, defaultFanout) for { // This loop essentially enumerates every single item in the set // and maps them all into a set of buckets. Each bucket will be recursively // turned into its own sub-set, and so on down the chain. Each sub-set // gets added to the dagservice, and put into its place in a set nodes // links array. // // Previously, the bucket was selected by taking an int32 from the hash of // the input key + seed. This was erroneous as we would later be assigning // the created sub-sets into an array of length 256 by the modulus of the // int32 hash value with 256. This resulted in overwriting existing sub-sets // and losing pins. The fix (a few lines down from this comment), is to // map the hash value down to the 8 bit keyspace here while creating the // buckets. This way, we avoid any overlapping later on. k, ok := iter() if !ok { break } h := hash(seed, k) % defaultFanout hashed[h] = append(hashed[h], k) } for h, items := range hashed { if len(items) == 0 { // recursion base case continue } childIter := getCidListIterator(items) // recursively create a pinset from the items for this bucket index child, err := storeItems(ctx, dag, uint64(len(items)), childIter, internalKeys) if err != nil { return nil, err } size, err := child.Size() if err != nil { return nil, err } childKey, err := dag.Add(child) if err != nil { return nil, err } internalKeys(childKey) // overwrite the 'empty key' in the existing links array n.Links()[h] = &node.Link{ Cid: childKey, Size: size, } } return n, nil }
func storeItems(ctx context.Context, dag merkledag.DAGService, estimatedLen uint64, iter itemIterator, internalKeys keyObserver) (*merkledag.Node, error) { seed, err := randomSeed() if err != nil { return nil, err } n := &merkledag.Node{ Links: make([]*merkledag.Link, 0, defaultFanout+maxItems), } for i := 0; i < defaultFanout; i++ { n.Links = append(n.Links, &merkledag.Link{Hash: emptyKey.ToMultihash()}) } internalKeys(emptyKey) hdr := &pb.Set{ Version: proto.Uint32(1), Fanout: proto.Uint32(defaultFanout), Seed: proto.Uint32(seed), } if err := writeHdr(n, hdr); err != nil { return nil, err } hdrLen := len(n.Data()) if estimatedLen < maxItems { // it'll probably fit for i := 0; i < maxItems; i++ { k, data, ok := iter() if !ok { // all done break } n.Links = append(n.Links, &merkledag.Link{Hash: k.ToMultihash()}) n.SetData(append(n.Data(), data...)) } // sort by hash, also swap item Data s := sortByHash{ links: n.Links[defaultFanout:], data: n.Data()[hdrLen:], } sort.Stable(s) } // wasteful but simple type item struct { k key.Key data []byte } hashed := make(map[uint32][]item) for { k, data, ok := iter() if !ok { break } h := hash(seed, k) hashed[h] = append(hashed[h], item{k, data}) } for h, items := range hashed { childIter := func() (k key.Key, data []byte, ok bool) { if len(items) == 0 { return "", nil, false } first := items[0] items = items[1:] return first.k, first.data, true } child, err := storeItems(ctx, dag, uint64(len(items)), childIter, internalKeys) if err != nil { return nil, err } size, err := child.Size() if err != nil { return nil, err } childKey, err := dag.Add(child) if err != nil { return nil, err } internalKeys(childKey) l := &merkledag.Link{ Name: "", Hash: childKey.ToMultihash(), Size: size, } n.Links[int(h%defaultFanout)] = l } return n, nil }