Example #1
0
func storeItems(ctx context.Context, dag merkledag.DAGService, estimatedLen uint64, iter itemIterator, internalKeys keyObserver) (*merkledag.ProtoNode, error) {
	seed, err := randomSeed()
	if err != nil {
		return nil, err
	}
	links := make([]*node.Link, 0, defaultFanout+maxItems)
	for i := 0; i < defaultFanout; i++ {
		links = append(links, &node.Link{Cid: emptyKey})
	}

	// add emptyKey to our set of internal pinset objects
	n := &merkledag.ProtoNode{}
	n.SetLinks(links)

	internalKeys(emptyKey)

	hdr := &pb.Set{
		Version: proto.Uint32(1),
		Fanout:  proto.Uint32(defaultFanout),
		Seed:    proto.Uint32(seed),
	}
	if err := writeHdr(n, hdr); err != nil {
		return nil, err
	}

	if estimatedLen < maxItems {
		// it'll probably fit
		links := n.Links()
		for i := 0; i < maxItems; i++ {
			k, ok := iter()
			if !ok {
				// all done
				break
			}

			links = append(links, &node.Link{Cid: k})
		}

		n.SetLinks(links)

		// sort by hash, also swap item Data
		s := sortByHash{
			links: n.Links()[defaultFanout:],
		}
		sort.Stable(s)
	}

	hashed := make([][]*cid.Cid, defaultFanout)
	for {
		// This loop essentially enumerates every single item in the set
		// and maps them all into a set of buckets. Each bucket will be recursively
		// turned into its own sub-set, and so on down the chain. Each sub-set
		// gets added to the dagservice, and put into its place in a set nodes
		// links array.
		//
		// Previously, the bucket was selected by taking an int32 from the hash of
		// the input key + seed. This was erroneous as we would later be assigning
		// the created sub-sets into an array of length 256 by the modulus of the
		// int32 hash value with 256. This resulted in overwriting existing sub-sets
		// and losing pins. The fix (a few lines down from this comment), is to
		// map the hash value down to the 8 bit keyspace here while creating the
		// buckets. This way, we avoid any overlapping later on.
		k, ok := iter()
		if !ok {
			break
		}
		h := hash(seed, k) % defaultFanout
		hashed[h] = append(hashed[h], k)
	}

	for h, items := range hashed {
		if len(items) == 0 {
			// recursion base case
			continue
		}

		childIter := getCidListIterator(items)

		// recursively create a pinset from the items for this bucket index
		child, err := storeItems(ctx, dag, uint64(len(items)), childIter, internalKeys)
		if err != nil {
			return nil, err
		}

		size, err := child.Size()
		if err != nil {
			return nil, err
		}

		childKey, err := dag.Add(child)
		if err != nil {
			return nil, err
		}

		internalKeys(childKey)

		// overwrite the 'empty key' in the existing links array
		n.Links()[h] = &node.Link{
			Cid:  childKey,
			Size: size,
		}
	}
	return n, nil
}
Example #2
0
func storeItems(ctx context.Context, dag merkledag.DAGService, estimatedLen uint64, iter itemIterator, internalKeys keyObserver) (*merkledag.Node, error) {
	seed, err := randomSeed()
	if err != nil {
		return nil, err
	}
	n := &merkledag.Node{
		Links: make([]*merkledag.Link, 0, defaultFanout+maxItems),
	}
	for i := 0; i < defaultFanout; i++ {
		n.Links = append(n.Links, &merkledag.Link{Hash: emptyKey.ToMultihash()})
	}
	internalKeys(emptyKey)
	hdr := &pb.Set{
		Version: proto.Uint32(1),
		Fanout:  proto.Uint32(defaultFanout),
		Seed:    proto.Uint32(seed),
	}
	if err := writeHdr(n, hdr); err != nil {
		return nil, err
	}
	hdrLen := len(n.Data())

	if estimatedLen < maxItems {
		// it'll probably fit
		for i := 0; i < maxItems; i++ {
			k, data, ok := iter()
			if !ok {
				// all done
				break
			}
			n.Links = append(n.Links, &merkledag.Link{Hash: k.ToMultihash()})
			n.SetData(append(n.Data(), data...))
		}
		// sort by hash, also swap item Data
		s := sortByHash{
			links: n.Links[defaultFanout:],
			data:  n.Data()[hdrLen:],
		}
		sort.Stable(s)
	}

	// wasteful but simple
	type item struct {
		k    key.Key
		data []byte
	}
	hashed := make(map[uint32][]item)
	for {
		k, data, ok := iter()
		if !ok {
			break
		}
		h := hash(seed, k)
		hashed[h] = append(hashed[h], item{k, data})
	}
	for h, items := range hashed {
		childIter := func() (k key.Key, data []byte, ok bool) {
			if len(items) == 0 {
				return "", nil, false
			}
			first := items[0]
			items = items[1:]
			return first.k, first.data, true
		}
		child, err := storeItems(ctx, dag, uint64(len(items)), childIter, internalKeys)
		if err != nil {
			return nil, err
		}
		size, err := child.Size()
		if err != nil {
			return nil, err
		}
		childKey, err := dag.Add(child)
		if err != nil {
			return nil, err
		}
		internalKeys(childKey)
		l := &merkledag.Link{
			Name: "",
			Hash: childKey.ToMultihash(),
			Size: size,
		}
		n.Links[int(h%defaultFanout)] = l
	}
	return n, nil
}