Example #1
0
// newTestingFile initializes a file object with random parameters.
func newTestingFile() *file {
	key, _ := crypto.GenerateTwofishKey()
	data, _ := crypto.RandBytes(8)
	nData, _ := crypto.RandIntn(10)
	nParity, _ := crypto.RandIntn(10)
	rsc, _ := NewRSCode(nData+1, nParity+1)

	return &file{
		name:        "testfile-" + strconv.Itoa(int(data[0])),
		size:        encoding.DecUint64(data[1:5]),
		masterKey:   key,
		erasureCode: rsc,
		pieceSize:   encoding.DecUint64(data[6:8]),
	}
}
Example #2
0
// probabilisticReset will probabilistically reboot the storage manager before
// continuing. This helps to verify that the persistence is working correctly.
// The reset is probabilistic to make sure that the test is not passing because
// of the reset.
func (smt *storageManagerTester) probabilisticReset() error {
	rand, err := crypto.RandIntn(3)
	if err != nil {
		return err
	}
	if rand == 1 {
		// Grab the potentially faulty dependencies and replace them with good
		// dependencies so that closing happens without issues.
		deps := smt.sm.dependencies
		smt.sm.dependencies = productionDependencies{}
		// Close the storage manager, then create a new storage manager to
		// replace it.
		err = smt.sm.Close()
		if err != nil {
			return err
		}
		// Open the storage manager with production dependencies so that there
		// are no errors.
		sm, err := New(filepath.Join(smt.persistDir, modules.StorageManagerDir))
		if err != nil {
			return err
		}
		sm.dependencies = deps
		smt.sm = sm
	}
	return nil
}
Example #3
0
// threadedUploadPiece will upload the piece of a file to a randomly chosen
// host. If the wallet has insufficient balance to support uploading,
// uploadPiece will give up. The file uploading can be continued using a repair
// tool. Upon completion, the memory containg the piece's information is
// updated.
func (r *Renter) threadedUploadPiece(host modules.HostSettings, up modules.FileUploadParams, piece *filePiece) error {
	// Set 'Repairing' for the piece to true.
	lockID := r.mu.Lock()
	piece.Repairing = true
	r.mu.Unlock(lockID)

	// Try 'maxUploadAttempts' hosts before giving up.
	for attempts := 0; attempts < maxUploadAttempts; attempts++ {
		// Negotiate the contract with the host. If the negotiation is
		// unsuccessful, we need to try again with a new host.
		err := r.negotiateContract(host, up, piece)
		if err == nil {
			return nil
		}

		// The previous attempt didn't work. We will try again after
		// sleeping for a randomized amount of time to increase our chances
		// of success. This will help spread things out if there are
		// problems with network congestion or other randomized issues.
		r, _ := crypto.RandIntn(256)
		time.Sleep(100 * time.Millisecond * time.Duration(r))
	}

	// All attempts failed.
	return errors.New("failed to upload filePiece")
}
Example #4
0
// mangedLogError will take an error and log it to the host, depending on the
// type of error and whether or not the DEBUG flag has been set.
func (h *Host) managedLogError(err error) {
	// Determine the type of error and the number of times that this error has
	// been logged.
	var num uint64
	var probability int // Error will be logged with 1/probability chance.
	switch err.(type) {
	case ErrorCommunication:
		num = atomic.LoadUint64(&h.atomicCommunicationErrors)
		probability = errorCommunicationProbability
	case ErrorConnection:
		num = atomic.LoadUint64(&h.atomicConnectionErrors)
		probability = errorConnectionProbability
	case ErrorConsensus:
		num = atomic.LoadUint64(&h.atomicConsensusErrors)
		probability = errorConsensusProbability
	case ErrorInternal:
		num = atomic.LoadUint64(&h.atomicInternalErrors)
		probability = errorInternalProbability
	default:
		num = atomic.LoadUint64(&h.atomicNormalErrors)
		probability = errorNormalProbability
	}

	// If num > logFewLimit, substantially decrease the probability that the error
	// gets logged.
	if num > logFewLimit {
		probability = probability * 25
	}

	// If we've seen less than logAllLimit of that type of error before, log
	// the error as a normal logging statement. Otherwise, probabilistically
	// log the statement. In debugging mode, log all statements.
	logged := false
	rand, randErr := crypto.RandIntn(probability + 1)
	if randErr != nil {
		h.log.Critical("random number generation failed")
	}
	if num < logAllLimit || rand == probability {
		logged = true
		h.log.Println(err)
	} else {
		h.log.Debugln(err)
	}

	// If the error was logged, increment the log counter.
	if logged {
		switch err.(type) {
		case ErrorCommunication:
			atomic.AddUint64(&h.atomicCommunicationErrors, 1)
		case ErrorConnection:
			atomic.AddUint64(&h.atomicConnectionErrors, 1)
		case ErrorConsensus:
			atomic.AddUint64(&h.atomicConsensusErrors, 1)
		case ErrorInternal:
			atomic.AddUint64(&h.atomicInternalErrors, 1)
		default:
			atomic.AddUint64(&h.atomicNormalErrors, 1)
		}
	}
}
Example #5
0
// maybeCheckConsistency runs a consistency check with a small probability.
// Useful for detecting database corruption in production without needing to go
// through the extremely slow process of running a consistency check every
// block.
func (cs *ConsensusSet) maybeCheckConsistency(tx *bolt.Tx) {
	n, err := crypto.RandIntn(1000)
	if err != nil {
		manageErr(tx, err)
	}
	if n == 0 {
		cs.checkConsistency(tx)
	}
}
Example #6
0
func (f *testFetcher) fetch(p pieceData) ([]byte, error) {
	f.nAttempt++
	time.Sleep(f.delay)
	// randomly fail
	if n, _ := crypto.RandIntn(f.failRate); n == 0 {
		return nil, io.EOF
	}
	f.nFetch++
	return f.sectors[p.MerkleRoot], nil
}
Example #7
0
func (f *testFetcher) fetch(p pieceData) ([]byte, error) {
	f.nAttempt++
	time.Sleep(f.delay)
	// randomly fail
	if n, _ := crypto.RandIntn(f.failRate); n == 0 {
		return nil, io.EOF
	}
	f.nFetch++
	return f.data[p.Offset : p.Offset+f.pieceSize], nil
}
Example #8
0
func (h *testHost) fetch(p pieceData) ([]byte, error) {
	h.nAttempt++
	time.Sleep(h.delay)
	// randomly fail
	if n, _ := crypto.RandIntn(h.failRate); n == 0 {
		return nil, io.EOF
	}
	h.nFetch++
	return h.data[p.Offset : p.Offset+h.pieceSize], nil
}
Example #9
0
func (g *Gateway) randomNode() (modules.NetAddress, error) {
	if len(g.nodes) > 0 {
		r, _ := crypto.RandIntn(len(g.nodes))
		for node := range g.nodes {
			if r <= 0 {
				return node, nil
			}
			r--
		}
	}

	return "", errNoPeers
}
Example #10
0
// randomPeer returns a random peer from the gateway's peer list.
func (g *Gateway) randomPeer() (modules.NetAddress, error) {
	if len(g.peers) > 0 {
		r, _ := crypto.RandIntn(len(g.peers))
		for addr := range g.peers {
			if r <= 0 {
				return addr, nil
			}
			r--
		}
	}

	return "", errNoPeers
}
Example #11
0
// Upload adds a piece to the testHost. It randomly fails according to the
// testHost's parameters.
func (h *testHost) Upload(data []byte) (offset uint64, err error) {
	// simulate I/O delay
	time.Sleep(h.delay)

	h.Lock()
	defer h.Unlock()

	// randomly fail
	if n, _ := crypto.RandIntn(h.failRate); n == 0 {
		return 0, errors.New("no data")
	}

	h.data = append(h.data, data...)
	return uint64(len(h.data) - len(data)), nil
}
Example #12
0
// Upload adds a piece to the testHost. It randomly fails according to the
// testHost's parameters.
func (h *testHost) Upload(data []byte) (crypto.Hash, error) {
	// simulate I/O delay
	time.Sleep(h.delay)

	h.Lock()
	defer h.Unlock()

	// randomly fail
	if n, _ := crypto.RandIntn(h.failRate); n == 0 {
		return crypto.Hash{}, errors.New("no data")
	}

	root := crypto.MerkleRoot(data)
	h.sectors[root] = data
	return root, nil
}
Example #13
0
// Download downloads a file, identified by its nickname, to the destination
// specified.
func (r *Renter) Download(nickname, destination string) error {
	lockID := r.mu.Lock()
	// Lookup the file associated with the nickname.
	file, exists := r.files[nickname]
	if !exists {
		return errors.New("no file of that nickname")
	}

	// Create the download object and spawn the download process.
	d, err := newDownload(file, destination)
	if err != nil {
		return err
	}

	// Add the download to the download queue.
	r.downloadQueue = append(r.downloadQueue, d)
	r.mu.Unlock(lockID)

	// Download the file. We only need one piece, so iterate through the hosts
	// until a download succeeds.
	for i := 0; i < downloadAttempts; i++ {
		for _, piece := range d.pieces {
			downloadErr := d.downloadPiece(piece)
			if downloadErr == nil {
				// done
				d.complete = true
				d.file.Close()
				return nil
			}
			// Reset seek, since the file may have been partially written. The
			// next attempt will overwrite these bytes.
			d.file.Seek(0, 0)
			atomic.SwapUint64(&d.received, 0)
		}

		// This iteration failed, no hosts returned the piece. Try again
		// after waiting a random amount of time.
		r, _ := crypto.RandIntn(i * i * 256)
		time.Sleep(time.Second * time.Duration(r))
	}

	// File could not be downloaded; delete the copy on disk.
	d.file.Close()
	os.Remove(destination)

	return errors.New("could not download any file pieces")
}
Example #14
0
func (h *testHost) addPiece(p uploadPiece) error {
	// simulate I/O delay
	time.Sleep(h.delay)

	// randomly fail
	if n, _ := crypto.RandIntn(h.failRate); n == 0 {
		return crypto.ErrNilInput
	}

	h.pieceMap[p.chunkIndex] = append(h.pieceMap[p.chunkIndex], pieceData{
		p.chunkIndex,
		p.pieceIndex,
		uint64(len(h.data)),
	})
	h.data = append(h.data, p.data...)
	return nil
}
Example #15
0
// randomInboundPeer returns a random peer that initiated its connection.
func (g *Gateway) randomInboundPeer() (modules.NetAddress, error) {
	if len(g.peers) > 0 {
		r, _ := crypto.RandIntn(len(g.peers))
		for addr, peer := range g.peers {
			// only select inbound peers
			if !peer.inbound {
				continue
			}
			if r <= 0 {
				return addr, nil
			}
			r--
		}
	}

	return "", errNoPeers
}
Example #16
0
// Discover scans the local network for routers and returns the first
// UPnP-enabled router it encounters.  It will try up to 3 times to find a
// router, sleeping a random duration between each attempt.  This is to
// mitigate a race condition with many callers attempting to discover
// simultaneously.
//
// TODO: if more than one client is found, only return those on the same
// subnet as the user?
func Discover() (*IGD, error) {
	maxTries := 3
	sleepMs, _ := crypto.RandIntn(5000)
	for try := 0; try < maxTries; try++ {
		time.Sleep(time.Millisecond * time.Duration(sleepMs))
		pppclients, _, _ := internetgateway1.NewWANPPPConnection1Clients()
		if len(pppclients) > 0 {
			return &IGD{pppclients[0]}, nil
		}
		ipclients, _, _ := internetgateway1.NewWANIPConnection1Clients()
		if len(ipclients) > 0 {
			return &IGD{ipclients[0]}, nil
		}
		sleepMs *= 2
	}
	return nil, errors.New("no UPnP-enabled gateway found")
}
Example #17
0
// randomOutboundPeer returns a random outbound peer.
func (g *Gateway) randomOutboundPeer() (modules.NetAddress, error) {
	// Get the list of outbound peers.
	var addrs []modules.NetAddress
	for addr, peer := range g.peers {
		if peer.Inbound {
			continue
		}
		addrs = append(addrs, addr)
	}
	if len(addrs) == 0 {
		return "", errNoPeers
	}

	// Of the remaining options, select one at random.
	r, err := crypto.RandIntn(len(addrs))
	if err != nil {
		g.log.Severe("Random number generation failure:", err)
	}
	return addrs[r], nil
}
Example #18
0
// acceptPeer makes room for the peer if necessary by kicking out existing
// peers, then adds the peer to the peer list.
func (g *Gateway) acceptPeer(p *peer) {
	// If we are not fully connected, add the peer without kicking any out.
	if len(g.peers) < fullyConnectedThreshold {
		g.addPeer(p)
		return
	}

	// Select a peer to kick. Outbound peers and local peers are not
	// available to be kicked.
	var addrs []modules.NetAddress
	for addr := range g.peers {
		// Do not kick outbound peers or local peers.
		if !p.Inbound || p.Local {
			continue
		}

		// Prefer kicking a peer with the same hostname.
		if addr.Host() == p.NetAddress.Host() {
			addrs = []modules.NetAddress{addr}
			break
		}
		addrs = append(addrs, addr)
	}
	if len(addrs) == 0 {
		// There is nobody suitable to kick, therefore do not kick anyone.
		g.addPeer(p)
		return
	}

	// Of the remaining options, select one at random.
	r, err := crypto.RandIntn(len(addrs))
	if err != nil {
		g.log.Severe("random number generation failure:", err)
	}
	kick := addrs[r]
	g.peers[kick].sess.Close()
	delete(g.peers, kick)
	g.log.Printf("INFO: disconnected from %v to make room for %v\n", kick, p.NetAddress)
	g.addPeer(p)
}
Example #19
0
// randomNode returns a random node from the gateway. An error can be returned
// if there are no nodes in the node list.
func (g *Gateway) randomNode() (modules.NetAddress, error) {
	if len(g.nodes) == 0 {
		return "", errNoPeers
	}

	// Select a random peer. Note that the algorithm below is roughly linear in
	// the number of nodes known by the gateway, and this number can approach
	// every node on the network. If the network gets large, this algorithm
	// will either need to be refactored, or more likely a cap on the size of
	// g.nodes will need to be added.
	r, err := crypto.RandIntn(len(g.nodes))
	if err != nil {
		return "", err
	}
	for node := range g.nodes {
		if r <= 0 {
			return node, nil
		}
		r--
	}
	return "", errNoPeers
}
Example #20
0
// threadedHandleConn handles an incoming connection to the host, typically an
// RPC.
func (h *Host) threadedHandleConn(conn net.Conn) {
	h.resourceLock.RLock()
	defer h.resourceLock.RUnlock()
	if h.closed {
		return
	}

	// Set an initial duration that is generous, but finite. RPCs can extend
	// this if desired.
	err := conn.SetDeadline(time.Now().Add(5 * time.Minute))
	if err != nil {
		h.log.Println("WARN: could not set deadline on connection:", err)
		return
	}
	defer conn.Close()

	// Read a specifier indicating which action is beeing called.
	var id types.Specifier
	if err := encoding.ReadObject(conn, &id, 16); err != nil {
		atomic.AddUint64(&h.atomicUnrecognizedCalls, 1)
		atomic.AddUint64(&h.atomicErroredCalls, 1)

		// Don't clutter the logs with repeat messages - after 1000 messages
		// have been printed, only print 1-in-200.
		randInt, randErr := crypto.RandIntn(200)
		if randErr != nil {
			return
		}
		unrecognizedCalls := atomic.LoadUint64(&h.atomicUnrecognizedCalls)
		if unrecognizedCalls < 1e3 || (unrecognizedCalls > 1e3 && randInt == 0) {
			h.log.Printf("WARN: incoming conn %v was malformed: %v", conn.RemoteAddr(), err)
		}
		return
	}

	switch id {
	case modules.RPCDownload:
		atomic.AddUint64(&h.atomicDownloadCalls, 1)
		err = h.managedRPCDownload(conn)
	case modules.RPCRenew:
		atomic.AddUint64(&h.atomicRenewCalls, 1)
		err = h.managedRPCRenew(conn)
	case modules.RPCRevise:
		atomic.AddUint64(&h.atomicReviseCalls, 1)
		err = h.managedRPCRevise(conn)
	case modules.RPCSettings:
		atomic.AddUint64(&h.atomicSettingsCalls, 1)
		err = h.managedRPCSettings(conn)
	case modules.RPCUpload:
		atomic.AddUint64(&h.atomicUploadCalls, 1)
		err = h.managedRPCUpload(conn)
	default:
		atomic.AddUint64(&h.atomicErroredCalls, 1)

		// Don't clutter the logs with repeat messages - after 1000 messages
		// have been printed, only print 1-in-200.
		randInt, randErr := crypto.RandIntn(200)
		if randErr != nil {
			return
		}
		erroredCalls := atomic.LoadUint64(&h.atomicErroredCalls)
		if erroredCalls < 1e3 || (erroredCalls > 1e3 && randInt == 0) {
			h.log.Printf("WARN: incoming conn %v requested unknown RPC \"%v\"", conn.RemoteAddr(), id)
		}
		return
	}
	if err != nil {
		atomic.AddUint64(&h.atomicErroredCalls, 1)

		// Don't clutter the logs with repeat messages - after 1000 messages
		// have been printed, only print 1-in-200.
		randInt, randErr := crypto.RandIntn(200)
		if randErr != nil {
			return
		}
		erroredCalls := atomic.LoadUint64(&h.atomicErroredCalls)
		if erroredCalls < 1e3 || (erroredCalls > 1e3 && randInt == 0) {
			h.log.Printf("WARN: incoming RPC \"%v\" failed: %v", id, err)
		}
	}
}
Example #21
0
// TestAddStorageFolderUIDCollisions checks that storage folders can be added
// with no risk of producing collisions in the storage folder UIDs. This test
// relies on (explicitly checked) assumptions about the size of the name and
// the number of allowed storage folders.
func TestAddStorageFolderUIDCollisions(t *testing.T) {
	if testing.Short() {
		t.SkipNow()
	}
	t.Parallel()
	smt, err := newStorageManagerTester("TestAddStorageFolderUIDCollisions")
	if err != nil {
		t.Fatal(err)
	}
	defer smt.Close()

	// Check that the environment requirements for the test have been met.
	if storageFolderUIDSize != 1 {
		t.Fatal("For this test, the storage manager must be using storage folder UIDs that are 1 byte")
	}
	if maximumStorageFolders < 100 {
		t.Fatal("For this test, the storage manager must be allowed to have at least 100 storage folders")
	}

	// Create 100 storage folders, and check that there are no collisions
	// between any of them. Because the UID is only using 1 byte, once there
	// are more than 64 there will be at least 1/4 chance of a collision for
	// each randomly selected UID. Running into collisions is virtually
	// guaranteed, and running into repeated collisions (where two UIDs
	// consecutively collide with existing UIDs) are highly likely.
	for i := 0; i < maximumStorageFolders; i++ {
		err = smt.addRandFolder(minimumStorageFolderSize)
		if err != nil {
			t.Fatal(err)
		}
	}
	// Check that there are no collisions.
	uidMap := make(map[uint8]struct{})
	for _, sf := range smt.sm.storageFolders {
		_, exists := uidMap[uint8(sf.UID[0])]
		if exists {
			t.Error("Collision")
		}
		uidMap[uint8(sf.UID[0])] = struct{}{}
	}
	// For coverage purposes, try adding a storage folder after the maximum
	// number of storage folders has been reached.
	err = smt.addRandFolder(minimumStorageFolderSize)
	if err != errMaxStorageFolders {
		t.Fatal("expecting errMaxStorageFolders:", err)
	}

	// Try again, this time removing a random storage folder and then adding
	// another one repeatedly - enough times to exceed the 256 possible folder
	// UIDs that can be chosen in the testing environment.
	for i := 0; i < 300; i++ {
		// Replace the very first storage folder.
		err = smt.sm.RemoveStorageFolder(0, false)
		if err != nil {
			t.Fatal(err)
		}
		err = smt.addRandFolder(minimumStorageFolderSize)
		if err != nil {
			t.Fatal(err)
		}

		// Replace a random storage folder.
		n, err := crypto.RandIntn(100)
		if err != nil {
			t.Fatal(err)
		}
		err = smt.sm.RemoveStorageFolder(n, false)
		if err != nil {
			t.Fatal(err)
		}
		err = smt.addRandFolder(minimumStorageFolderSize)
		if err != nil {
			t.Fatal(err)
		}
	}
	uidMap = make(map[uint8]struct{})
	for _, sf := range smt.sm.storageFolders {
		_, exists := uidMap[uint8(sf.UID[0])]
		if exists {
			t.Error("Collision")
		}
		uidMap[uint8(sf.UID[0])] = struct{}{}
	}
}
Example #22
0
// threadedRepairUploads improves the health of files tracked by the renter by
// reuploading their missing pieces. Multiple repair attempts may be necessary
// before the file reaches full redundancy.
func (r *Renter) threadedRepairUploads() {
	// a primitive blacklist is used to augment the hostdb's weights. Each
	// negotiation failure increments the integer, and the probability of
	// selecting the host for upload is 1/n.
	blacklist := make(map[modules.NetAddress]int)

	for {
		time.Sleep(5 * time.Second)

		if !r.wallet.Unlocked() {
			continue
		}

		// make copy of repair set under lock
		repairing := make(map[string]string)
		id := r.mu.RLock()
		for name, path := range r.repairSet {
			repairing[name] = path
		}
		r.mu.RUnlock(id)

		for name, path := range repairing {
			// retrieve file object and get current height
			id = r.mu.RLock()
			f, ok := r.files[name]
			//height := r.blockHeight
			r.mu.RUnlock(id)
			if !ok {
				r.log.Printf("failed to repair %v: no longer tracking that file", name)
				id = r.mu.Lock()
				delete(r.repairSet, name)
				r.mu.Unlock(id)
				continue
			}

			// delete any expired contracts
			//f.removeExpiredContracts(height)

			// determine file health
			badChunks := f.incompleteChunks()
			if len(badChunks) == 0 {
				//badChunks = f.expiringChunks(height)
				// if len(badChunks) == 0 {
				// 	// nothing to do
				// 	continue
				// }
				continue
			}

			r.log.Printf("repairing %v chunks of %v", len(badChunks), name)

			// defer is really convenient for cleaning up resources, so an
			// inline function is justified
			err := func() error {
				// open file handle
				handle, err := os.Open(path)
				if err != nil {
					return err
				}
				defer handle.Close()

				// build host list
				bytesPerHost := f.pieceSize * f.numChunks() * 2 // 2x buffer to prevent running out of money
				var hosts []uploader
				randHosts := r.hostDB.RandomHosts(f.erasureCode.NumPieces() * 2)
				for _, h := range randHosts {
					// probabilistically filter out known bad hosts
					// unresponsive hosts will be selected with probability 1/(1+nFailures)
					nFailures, ok := blacklist[h.IPAddress]
					if n, _ := crypto.RandIntn(1 + nFailures); ok && n != 0 {
						continue
					}

					// TODO: use smarter duration
					hostUploader, err := r.newHostUploader(h, bytesPerHost, defaultDuration, f.masterKey)
					if err != nil {
						// penalize unresponsive hosts
						if strings.Contains(err.Error(), "timeout") {
							blacklist[h.IPAddress]++
						}
						continue
					}
					defer hostUploader.Close()

					hosts = append(hosts, hostUploader)
					if len(hosts) >= f.erasureCode.NumPieces() {
						break
					}
				}

				if len(hosts) < f.erasureCode.MinPieces() {
					// don't return an error in this case, since the file
					// should not be removed from the repair set
					r.log.Printf("failed to repair %v: not enough hosts", name)
					return nil
				}

				return f.repair(handle, badChunks, hosts)
			}()

			if err != nil {
				r.log.Printf("%v cannot be repaired: %v", name, err)
				id = r.mu.Lock()
				delete(r.repairSet, name)
				r.mu.Unlock(id)
			}

			// save the repaired file data
			err = r.saveFile(f)
			if err != nil {
				// definitely bad, but we probably shouldn't delete from the
				// repair set if this happens
				r.log.Printf("failed to save repaired file %v: %v", name, err)
			}
		}
	}
}
Example #23
0
File: repair.go Project: mantyr/Sia
// threadedRepairUploads improves the health of files tracked by the renter by
// reuploading their missing pieces. Multiple repair attempts may be necessary
// before the file reaches full redundancy.
func (r *Renter) threadedRepairUploads() {
	// a primitive blacklist is used to augment the hostdb's weights. Each
	// negotiation failure increments the integer, and the probability of
	// selecting the host for upload is 1/n.
	blacklist := make(map[modules.NetAddress]int)

	for {
		time.Sleep(5 * time.Second)

		if !r.wallet.Unlocked() {
			continue
		}

		// make copy of repair set under lock
		repairing := make(map[string]trackedFile)
		id := r.mu.RLock()
		for name, meta := range r.tracking {
			repairing[name] = meta
		}
		r.mu.RUnlock(id)

		for name, meta := range repairing {
			// retrieve file object and get current height
			id = r.mu.RLock()
			f, ok := r.files[name]
			height := r.blockHeight
			r.mu.RUnlock(id)
			if !ok {
				r.log.Printf("failed to repair %v: no longer tracking that file", name)
				id = r.mu.Lock()
				delete(r.tracking, name)
				r.mu.Unlock(id)
				continue
			}

			// calculate duration
			var duration types.BlockHeight
			if meta.EndHeight == 0 {
				duration = defaultDuration
			} else if meta.EndHeight > height {
				duration = meta.EndHeight - height
			} else {
				r.log.Printf("removing %v from repair set: storage period has ended", name)
				id = r.mu.Lock()
				delete(r.tracking, name)
				r.mu.Unlock(id)
				continue
			}

			// check for un-uploaded pieces
			badChunks := f.incompleteChunks()
			if len(badChunks) == 0 {
				// check for expiring contracts
				if meta.EndHeight == 0 {
					// if auto-renewing, mark any chunks expiring soon
					badChunks = f.chunksBelow(height + renewThreshold)
				} else {
					// otherwise mark any chunks expiring before desired end
					badChunks = f.chunksBelow(meta.EndHeight)
				}
				if len(badChunks) == 0 {
					// check for offline hosts (slow)
					// TODO: reenable
					//badChunks = f.offlineChunks()
					continue
				}
			}

			r.log.Printf("repairing %v chunks of %v", len(badChunks), name)

			// defer is really convenient for cleaning up resources, so an
			// inline function is justified
			err := func() error {
				// open file handle
				handle, err := os.Open(meta.RepairPath)
				if err != nil {
					return err
				}
				defer handle.Close()

				// build host list
				bytesPerHost := f.pieceSize * f.numChunks() * 2 // 2x buffer to prevent running out of money
				var hosts []uploader
				randHosts := r.hostDB.RandomHosts(f.erasureCode.NumPieces() * 2)
				for _, h := range randHosts {
					// probabilistically filter out known bad hosts
					// unresponsive hosts will be selected with probability 1/(1+nFailures)
					nFailures, ok := blacklist[h.IPAddress]
					if n, _ := crypto.RandIntn(1 + nFailures); ok && n != 0 {
						continue
					}

					hostUploader, err := r.newHostUploader(h, bytesPerHost, duration, f.masterKey)
					if err != nil {
						// penalize unresponsive hosts
						if strings.Contains(err.Error(), "timeout") {
							blacklist[h.IPAddress]++
						}
						continue
					}
					defer hostUploader.Close()

					hosts = append(hosts, hostUploader)
					if len(hosts) >= f.erasureCode.NumPieces() {
						break
					}
				}

				if len(hosts) < f.erasureCode.MinPieces() {
					// don't return an error in this case, since the file
					// should not be removed from the repair set
					r.log.Printf("failed to repair %v: not enough hosts", name)
					return nil
				}

				return f.repair(handle, badChunks, hosts)
			}()

			if err != nil {
				r.log.Printf("%v cannot be repaired: %v", name, err)
				id = r.mu.Lock()
				delete(r.tracking, name)
				r.mu.Unlock(id)
			}

			// save the repaired file data
			err = r.saveFile(f)
			if err != nil {
				// definitely bad, but we probably shouldn't delete from the
				// repair set if this happens
				r.log.Printf("failed to save repaired file %v: %v", name, err)
			}
		}
	}
}
Example #24
0
// TestHealthyNodeListPruning checks that gateways will purge nodes if they are at
// a healthy node threshold and the nodes are offline.
func TestHealthyNodeListPruning(t *testing.T) {
	if testing.Short() {
		t.SkipNow()
	}
	t.Parallel()

	// Create and connect healthyNodeListLen*2 gateways.
	var gs []*Gateway
	for i := 0; i < healthyNodeListLen*2; i++ {
		gname := "TestHealthyNodeListPruning" + strconv.Itoa(i)
		gs = append(gs, newTestingGateway(gname, t))

		// Connect this gateway to the previous gateway.
		if i != 0 {
			err := gs[i].Connect(gs[i-1].myAddr)
			if err != nil {
				t.Fatal(err)
			}
		}
		// To help speed the test up, also connect this gateway to the peer two
		// back.
		if i > 1 {
			err := gs[i].Connect(gs[i-2].myAddr)
			if err != nil {
				t.Fatal(err)
			}
		}
		// To help speed the test up, also connect this gateway to a random
		// previous peer.
		if i > 2 {
			choice, err := crypto.RandIntn(i - 2)
			if err != nil {
				t.Fatal(err)
			}
			err = gs[i].Connect(gs[choice].myAddr)
			if err != nil {
				t.Fatal(err)
			}
		}
	}

	// Spin until all gateways have a nearly full node list.
	success := false
	for i := 0; i < 80; i++ {
		success = true
		for _, g := range gs {
			g.mu.RLock()
			gNodeLen := len(g.nodes)
			g.mu.RUnlock()
			if gNodeLen < healthyNodeListLen {
				success = false
				break
			}
		}
		if !success {
			time.Sleep(time.Second * 1)
		}
	}
	if !success {
		t.Fatal("peers are not sharing nodes with eachother")
	}

	// Gateway node lists have been filled out. Take a bunch of gateways
	// offline and verify that the remaining gateways begin pruning their
	// nodelist.
	var wg sync.WaitGroup
	for i := 2; i < len(gs); i++ {
		wg.Add(1)
		go func(i int) {
			err := gs[i].Close()
			if err != nil {
				panic(err)
			}
			wg.Done()
		}(i)
	}
	wg.Wait()

	// Wait for enough iterations of the node purge loop that over-pruning is
	// possible. (Over-pruning does not need to be guaranteed, causing this
	// test to fail once in a while is sufficient.)
	time.Sleep(nodePurgeDelay * time.Duration(healthyNodeListLen-pruneNodeListLen) * 12)

	// Check that the remaining gateways have pruned nodes.
	gs[0].mu.RLock()
	gs0Nodes := len(gs[0].nodes)
	gs[0].mu.RUnlock()
	gs[1].mu.RLock()
	gs1Nodes := len(gs[1].nodes)
	gs[1].mu.RUnlock()
	if gs0Nodes >= healthyNodeListLen-1 {
		t.Error("gateway is not pruning nodes", healthyNodeListLen, gs0Nodes)
	}
	if gs1Nodes >= healthyNodeListLen-1 {
		t.Error("gateway is not pruning nodes", healthyNodeListLen, gs1Nodes)
	}
	if gs0Nodes < pruneNodeListLen {
		t.Error("gateway is pruning too many nodes", gs0Nodes, pruneNodeListLen)
	}
	if gs1Nodes < pruneNodeListLen {
		t.Error("gateway is pruning too many nodes", gs1Nodes, pruneNodeListLen)
	}

	// Close the remaining gateways.
	err := gs[0].Close()
	if err != nil {
		t.Error(err)
	}
	err = gs[1].Close()
	if err != nil {
		t.Error(err)
	}
}