Ejemplo n.º 1
0
func TestLRU(t *testing.T) {
	gopath := os.Getenv("GOPATH")
	if _, err := os.Stat(gopath + "/src/segments"); err != nil {
		if os.IsNotExist(err) {
			os.Mkdir(gopath+"/src/segments", 0777)
		} else {
			panic(err)
		}
	}
	tuple1 := Tuple{Slice: []string{"Vedha", "Vikas", "Jeffrey", "Zack"}}
	tuple2 := Tuple{Slice: []string{"Vivek", "Anuhya", "Esha"}}
	tuple3 := Tuple{Slice: []string{"Christina", "Keerti"}}
	tuple4 := Tuple{Slice: []string{"Suganya", "Arooshi"}}

	var segment1 Segment
	segment1.Partitions = make([][]Tuple, 2)
	segment1.Partitions[0] = []Tuple{tuple1, tuple2}
	segment1.Partitions[1] = []Tuple{tuple3, tuple4}
	segment1.Id = 1234

	var segment2 Segment
	segment2.Partitions = make([][]Tuple, 2)
	segment2.Partitions[0] = []Tuple{tuple1, tuple3}
	segment2.Partitions[1] = []Tuple{tuple2, tuple4}
	segment2.Id = 1111

	lru := NewLRU(1, 4)
	lru.Insert(1234, &segment1)
	lru.Insert(1111, &segment2)
	s := lru.Get(1234)
	s2 := lru.Get(1111)
	client.Debug("Here's what I got", s)
	client.Debug(s2)
	client.Debug("Length", lru.Length())
}
Ejemplo n.º 2
0
func main() {
	hd := master.GetDbConnection()

	workflows := master.GetWorkflows(hd)

	for _, w := range workflows {
		client.Debug("--------------------------Begin--------------------------")
		client.Debug("Workflow ID:", w.Id)
		client.Debug(workflow.WorkflowToString(hd, w))
		client.Debug("---------------------------End---------------------------")
	}
}
Ejemplo n.º 3
0
func main() {
	if len(os.Args) != 3 {
		printUsage()
		return
	}

	workerhost := os.Args[1]
	masterhost := os.Args[2]
	client.Debug("Starting server on", workerhost)
	client.Debug("Press Ctrl-C to stop")
	worker.StartServer(workerhost, masterhost)

	waitForInterrupt()
}
Ejemplo n.º 4
0
func main() {
	if len(os.Args) != 2 {
		printUsage()
		return
	}

	host := os.Args[1]
	hd := master.GetDbConnection()
	client.Debug("Starting server on", host)
	client.Debug("Press Ctrl-C to stop")
	master.StartServer(host, hd)

	waitForInterrupt()
}
Ejemplo n.º 5
0
// Execute a UDF command that accepts zero or more input lists of tuples, and
// returns one output list of tuples. This function blocks until the UDF is
// done executing.
func runUDF(command string, inputTuples map[int][]Tuple) []Tuple {
	// spawn the external process
	splits := strings.Split(command, " ")
	client.Debug(preprocessCommand(splits[0]))
	cmd := exec.Command(preprocessCommand(splits[0]), splits[1:]...)
	stdin, err := cmd.StdinPipe()
	if err != nil {
		log.Panic(err)
	}
	stdout, err := cmd.StdoutPipe()
	if err != nil {
		log.Panic(err)
	}
	cmd.Start()

	// write tuples to standard input on a background goroutine
	go func() {
		for index, tupleList := range inputTuples {
			for _, tuple := range tupleList {
				stdin.Write(tuple.SerializeTuple(index))
				stdin.Write([]byte{'\n'})
			}
		}
		stdin.Close()
	}()

	// read from standard output to get the output tuples
	outputTuples := make([]Tuple, 0)
	ReadTupleStream(stdout, func(tuple Tuple, index int) {
		outputTuples = append(outputTuples, tuple)
	})

	return outputTuples
}
Ejemplo n.º 6
0
func (m *Master) execLaunchJob(rddId int64, data interface{}) {
	client.Debug("execLaunchJob", rddId)

	m.mu.Lock()
	defer m.mu.Unlock()
	tx := m.hd.Begin()

	// TODO: check that all of the input RDDS are available

	rdd := GetRdd(tx, rddId)

	// check whether the rdd is already complete
	if rdd.State != RDD_COMPLETE {
		sourceRdds := rdd.GetSourceRdds(tx)

		readyToContinue := true
		for _, srcRdd := range sourceRdds {
			if srcRdd.State != RDD_COMPLETE {
				// relaunch any dependencies that are not complete
				readyToContinue = false
				e := Event{
					Type: LAUNCH_JOB,
					Id:   int64(srcRdd.Id),
				}
				m.queueEvent(e)
			}
		}

		// If all the dependencies are met, then launch the next
		// Rdd (dependencies are also checked for each individual task)
		if readyToContinue {
			// check whether we already created segments for this rdd
			segments := rdd.GetSegments(tx)
			if len(segments) > 0 {
				// if segments already present, just run the ones that are not complete
				// (this is part of the recovery protocol)
				for _, segment := range segments {
					if segment.Status != SEGMENT_COMPLETE {
						e := Event{
							Type: LAUNCH_TASK,
							Id:   int64(segment.Id),
						}
						m.queueEvent(e)
					}
				}
			} else {
				// Otherwise, create the new segments and run them
				segments, _ := rdd.CreateSegments(tx)
				for _, segment := range segments {
					e := Event{
						Type: LAUNCH_TASK,
						Id:   int64(segment.Id),
					}
					m.queueEvent(e)
				}
			}
		}
	}
	commitOrPanic(tx)
}
Ejemplo n.º 7
0
func (m *Master) tryLaunchingDependentJobs(tx *hood.Hood, rdd *Rdd, pj *Protojob) {

	destRdds := rdd.GetDestRdds(tx)

	// For each destRdd, check whether all of the srcRdds
	// for that destRdd are complete. If so, launch the job
	// for destRdd
	// TODO: this logic will have to be re-written when fault-tolerance
	// is implemented
	for _, destRdd := range destRdds {
		srcRdds := destRdd.GetSourceRdds(tx)
		isComplete := true
		for _, srcRdd := range srcRdds {
			if (srcRdd.State != RDD_COMPLETE) && (srcRdd.Id != rdd.Id) {
				isComplete = false
			}
		}
		if isComplete {
			client.Debug("launching next job", destRdd)
			e := Event{
				Type: LAUNCH_JOB,
				Id:   int64(destRdd.Id),
			}
			m.queueEvent(e)
		}
	}
}
Ejemplo n.º 8
0
func (m *Master) execTaskSuccess(segmentId int64, data interface{}) {
	client.Debug("execTaskSuccess", segmentId)

	m.mu.Lock()
	defer m.mu.Unlock()
	tx := m.hd.Begin()

	segment := GetSegment(tx, segmentId)
	rdd := segment.GetRdd(tx)
	pj := rdd.GetProtojob(tx)

	segment.Status = SEGMENT_COMPLETE
	saveOrPanic(tx, segment)

	numComplete := rdd.GetNumSegmentsComplete(tx, segment)

	if numComplete == pj.NumSegments {
		batch := rdd.GetWorkflowBatch(tx)
		workflow := batch.GetWorkflow(tx)
		fmt.Println("Job complete", rdd.Id, pj.Command, time.Now().UnixNano()/1000000-batch.StartTime-workflow.Duration)
		rdd.State = RDD_COMPLETE
		saveOrPanic(tx, rdd)
		m.tryLaunchingDependentJobs(tx, rdd, pj)
	}

	commitOrPanic(tx)
}
Ejemplo n.º 9
0
//
// server Register RPC handler.
//
func (m *Master) Register(args *client.RegisterArgs, reply *client.RegisterReply) error {
	client.Debug("Registering", args)

	m.mu.Lock()
	defer m.mu.Unlock()

	tx := m.hd.Begin()
	existingWorkers := GetWorkersAtAddress(tx, args.Me)
	for _, w := range existingWorkers {
		w.Status = WORKER_DEAD
		tx.Save(w)
	}
	newWorker := Worker{
		Url: args.Me,
	}
	tx.Save(&newWorker)
	commitOrPanic(tx)

	tx = m.hd.Begin()
	m.getNumAliveWorkers(tx)
	commitOrPanic(tx)

	reply.Err = client.OK
	reply.Id = int64(newWorker.Id)

	return nil
}
Ejemplo n.º 10
0
func waitForInterrupt() {
	c := make(chan os.Signal, 1)
	signal.Notify(c, os.Interrupt)
	for sig := range c {
		client.Debug("\ncaptured signal, stopping and exiting.\n", sig)
		return
	}
}
Ejemplo n.º 11
0
func (m *Master) HandleFailureData(data *FailureData) {
	client.Debug("HANDLING FAILURE", data)
	tx := m.hd.Begin()
	worker := GetWorker(tx, data.WorkerId)
	worker.Status = WORKER_DEAD
	saveOrPanic(tx, worker)
	m.getNumAliveWorkers(tx)
	commitOrPanic(tx)
}
Ejemplo n.º 12
0
func (w *Worker) ExecTask(args *client.ExecArgs, reply *client.ExecReply) error {
	inputTuples := make(map[int][]Tuple)
	fmt.Println("executing task", args)
	for _, segment := range args.Segments {
		localSegment := w.LocalGetSegment(segment.SegmentId)
		// fetch the segment if it is not already stored locally
		if localSegment == nil {
			client.Debug("fetching tuples", segment)
			clerk := MakeWorkerInternalClerk(segment.WorkerUrl)
			args2 := GetTuplesArgs{SegmentId: segment.SegmentId, PartitionIndex: segment.PartitionIndex}
			reply2 := clerk.GetTuples(&args2, 3)
			if reply2 != nil {
				if reply2.Err == client.OK {
					client.Debug("fetched tuples", len(reply2.Tuples))
					inputTuples[segment.Index] = append(inputTuples[segment.Index], reply2.Tuples...)
				} else {
					reply.Err = reply2.Err
					reply.WorkerId = segment.WorkerId
					client.Debug(reply.Err)
					return nil
				}
			} else {
				reply.Err = client.DEAD_SEGMENT
				reply.WorkerId = segment.WorkerId
				client.Debug(reply.Err)
				return nil
			}
		} else {
			// use the locally stored copy
			inputTuples[segment.Index] = append(inputTuples[segment.Index], localSegment.Partitions[segment.PartitionIndex]...)
		}
	}

	client.Debug("running udf")
	start := time.Now()
	outputTuples := runUDF(args.Command, inputTuples)
	end := time.Now()
	client.Debug("duration:", end.Sub(start))
	client.Debug("got output tuples", len(outputTuples))

	client.Debug("writing segment")
	segment := MakeSegment(outputTuples, args.Indices, args.Parts)
	w.LocalPutSegment(args.OutputSegmentId, segment)

	client.Debug("success")
	reply.Err = client.OK
	return nil
}
Ejemplo n.º 13
0
func (m *Master) eventLoop() {
	to := 1
	iteration := 0
	for {
		if iteration%10 == 0 {
			fmt.Println("on iteration", iteration)
		}
		iteration += 1
		if atomic.LoadInt64(&m.numAliveWorkers) >= atomic.LoadInt64(&m.minWorkers) {
			start := time.Now()
			to = 1
			e := <-m.events
			atomic.AddInt64(&m.numQueuedEvents, -1)
			switch e.Type {
			case NEW_BATCH:
				m.execNewBatch(e.Id, e.Data)
			case LAUNCH_TASK:
				m.execLaunchTask(e.Id, e.Data)
			case TASK_SUCCESS:
				m.execTaskSuccess(e.Id, e.Data)
			case TASK_FAILURE:
				m.execTaskFailure(e.Id, e.Data)
			case LAUNCH_JOB:
				m.execLaunchJob(e.Id, e.Data)
			case COPY_SUCCESS:
				m.execCopySuccess(e.Id, e.Data)
			case COPY_FAILURE:
				m.execCopyFailure(e.Id, e.Data)
			case LAUNCH_COPY:
				m.execLaunchCopy(e.Id, e.Data)
			}
			diff := time.Now().Sub(start)
			client.Debug("duration", diff)
		} else {
			client.Debug("sleeping", to)
			time.Sleep(time.Duration(to) * time.Millisecond)
			if to < 1000 {
				to *= 2
			}
		}
	}
}
Ejemplo n.º 14
0
func (w *Worker) CopySegment(args *client.CopySegmentArgs, reply *client.CopySegmentReply) error {
	client.Debug("copying segment", args)
	if w.LocalGetSegment(args.SegmentId) != nil {
		// this should never happen during normal operaiton (though it might
		// happen during the master recovery procedure)
		client.Debug("already have segment, overwriting...")
	}
	client.Debug("fetching segment", args.SegmentId)
	clerk := MakeWorkerInternalClerk(args.WorkerUrl)
	args2 := GetSegmentArgs{SegmentId: args.SegmentId}
	reply2 := clerk.GetSegment(&args2, 3)
	if reply2 != nil {
		if reply2.Err == client.OK {
			client.Debug("fetched segment", args.SegmentId)
			w.LocalPutSegment(args.SegmentId, reply2.Segment)
			reply.Err = client.OK
		} else {
			reply.Err = reply2.Err
			reply.WorkerId = args.WorkerId
			client.Debug(reply.Err)
			return nil
		}
	} else {
		reply.Err = client.DEAD_SEGMENT
		reply.WorkerId = args.WorkerId
		client.Debug(reply.Err)
		return nil
	}
	return nil
}
Ejemplo n.º 15
0
func (m *Master) execTaskFailure(segmentId int64, data interface{}) {
	client.Debug("execTaskFailure", segmentId)

	m.mu.Lock()
	defer m.mu.Unlock()

	m.HandleFailureData(data.(*FailureData))
	e := Event{
		Type: LAUNCH_TASK,
		Id:   int64(segmentId),
	}
	m.queueEvent(e)
}
Ejemplo n.º 16
0
func (m *Master) execNewBatch(workflowId int64, data interface{}) {
	client.Debug("execNewBatch", workflowId)

	m.mu.Lock()
	defer m.mu.Unlock()
	tx := m.hd.Begin()

	// look up workflow
	workflow := GetWorkflow(tx, workflowId)
	// create new workflowbatch
	lastBatch := workflow.GetLastWorkflowBatch(tx)

	now := time.Now().UnixNano() / 1000000
	var batch *WorkflowBatch
	if lastBatch == nil {
		// if no last batch, then create the first batch right now - duration - time_eror
		client.Debug("No last batch")
		batch = workflow.MakeBatch(tx, now-workflow.Duration-TIME_ERROR)
	} else {
		// TODO: figure out what exactly to do if there are multiple
		// batches to catch up on, or if it is not yet time to execute
		// the next job

		// for now, only launch a new batch if the proper time has arrived
		// (eg. the end time of the new batch has definitely passed)
		client.Debug(now, lastBatch.StartTime)
		if now > lastBatch.StartTime+2*workflow.Duration+TIME_ERROR {
			client.Debug("add new batch", workflow.Duration)
			batch = workflow.MakeBatch(tx, lastBatch.StartTime+workflow.Duration)
		}
	}
	commitOrPanic(tx)

	if batch != nil {
		m.launchBatchSourceJobs(batch)
	}
}
Ejemplo n.º 17
0
func main() {
	if len(os.Args) != 2 {
		printUsage()
		return
	}

	hd := master.GetDbConnection()

	reader, err := os.Open(os.Args[1])
	if err != nil {
		panic(err)
	}

	w, err := workflow.ReadWorkflow(hd, reader)
	if err != nil {
		panic(err)
	}

	client.Debug("Loaded workflow")
	client.Debug("--------------------------Begin--------------------------")
	client.Debug("Workflow ID:", w.Id)
	client.Debug(workflow.WorkflowToString(hd, w))
	client.Debug("---------------------------End---------------------------")
}
Ejemplo n.º 18
0
func (w *Worker) GetTuples(args *GetTuplesArgs, reply *GetTuplesReply) error {
	client.Debug("GET TUPLES RPC")
	if args.WorkerId != w.master.GetId() {
		segment := w.LocalGetSegment(args.SegmentId)
		if segment != nil {
			reply.Tuples = segment.Partitions[args.PartitionIndex]
			reply.Err = client.OK
		} else {
			reply.Err = client.SEGMENT_NOT_FOUND
		}
	} else {
		// The request is old, and this worker has died, rebooted,
		// and re-registered
		reply.Err = client.DEAD_SEGMENT
	}
	return nil
}
Ejemplo n.º 19
0
func (w *Worker) GetSegment(args *GetSegmentArgs, reply *GetSegmentReply) error {
	client.Debug("GET SEGMENT RPC")
	if args.WorkerId != w.master.GetId() {
		segment := w.LocalGetSegment(args.SegmentId)
		if segment != nil {
			reply.Segment = segment
			reply.Err = client.OK
		} else {
			reply.Err = client.SEGMENT_NOT_FOUND
		}
	} else {
		// The request is old, and this worker has died, rebooted,
		// and re-registered
		reply.Err = client.DEAD_SEGMENT
	}
	return nil
}
Ejemplo n.º 20
0
//
// server Ping RPC handler.
//
func (m *Master) Ping(args *client.PingArgs, reply *client.PingReply) error {
	client.Debug("Pinging", args.Id)

	m.mu.Lock()
	defer m.mu.Unlock()

	tx := m.hd.Begin()
	w := GetWorker(m.hd, args.Id)
	if w != nil {
		reply.Err = client.OK
		w.Status = WORKER_ALIVE
		tx.Save(w)
	} else {
		// The worker was not found in our database, so tell it to reset
		reply.Err = client.RESET
	}
	// Timestamp is automatically upated on save
	commitOrPanic(tx)

	return nil
}
Ejemplo n.º 21
0
func (m *Master) execCopySuccess(segmentCopyId int64, data interface{}) {
	client.Debug("copySuccess", segmentCopyId)

	m.mu.Lock()
	defer m.mu.Unlock()
	tx := m.hd.Begin()

	cp := GetSegmentCopy(tx, segmentCopyId)
	cp.Status = SEGMENT_COPY_COMPLETE
	saveOrPanic(tx, cp)

	segment := cp.GetSegment(tx)
	otherCopies := segment.GetSegmentCopies(tx)

	numComplete := 0
	for _, c := range otherCopies {
		if (c.Status == SEGMENT_COPY_COMPLETE) || (c.Id == cp.Id) {
			numComplete += 1
		}
	}

	rdd := segment.GetRdd(tx)
	pj := rdd.GetProtojob(tx)

	// If all of the segment copies are finished transmitting, declare
	// the task complete
	if numComplete >= pj.Copies {
		e := Event{
			Type: TASK_SUCCESS,
			Id:   int64(segment.Id),
		}
		m.queueEvent(e)
	}

	commitOrPanic(tx)
}
Ejemplo n.º 22
0
/* Reset and initalize the master database tables. Note that this
 * will delete any existing data.
 *
 * Usage:
 *   go run init_dabatase.go
 *
 */
func main() {
	hd := master.GetDbConnection()
	master.ResetDb(hd)
	master.CreateTables(hd)
	client.Debug("Success")
}
Ejemplo n.º 23
0
func StartServer(hostname string, masterhost string) *Worker {
	// call gob.Register on structures you want
	// Go's RPC library to marshall/unmarshall.
	// gob.Register()

	runtime.GOMAXPROCS(7)

	gopath := os.Getenv("GOPATH")
	if _, err := os.Stat(gopath + "/src/segments"); err != nil {
		if os.IsNotExist(err) {
			os.Mkdir(gopath+"/src/segments", 0777)
		} else {
			panic(err)
		}
	}

	client.Debug("Starting worker")
	worker := new(Worker)
	worker.master = client.MakeMasterClerk(hostname, masterhost)
	worker.batches = make(map[int][]int64)
	worker.max_segments = 50000

	rpcs := rpc.NewServer()
	rpcs.Register(worker)

	// ignore the domain name: listen on all urls
	splitName := strings.Split(hostname, ":")
	l, e := net.Listen("tcp", ":"+splitName[1])
	if e != nil {
		log.Fatal("listen error: ", e)
	}
	worker.l = l

	// Register the worker to master
	client.Debug("Registering worker")
	worker.master.Register(true)
	client.Debug("Registered worker")

	worker.segments = NewLRU(worker.max_segments, worker.master.GetId())

	go func() {
		for {
			if conn, err := worker.l.Accept(); err == nil {
				go rpcs.ServeConn(conn)
			} else {
				worker.kill()
			}
		}
	}()

	go func() {
		for {
			// Continuously ping the master so that the master is notified
			// when a network partition is resolved.
			reply := worker.master.Ping(true)
			if reply == client.RESET {
				panic("ping rejected by master")
			}
			time.Sleep(1 * time.Second)
		}
	}()

	return worker
}
Ejemplo n.º 24
0
func printUsage() {
	client.Debug("Usage\n  go run start_worker.go worker_interface:port master_interface:port\n")
	client.Debug("Example ports\n  localhost:1324\n  :2112\n  192.168.0.15:3333")
}
Ejemplo n.º 25
0
func (m *Master) execLaunchTask(segmentId int64, data interface{}) {
	client.Debug("execLaunchTask", segmentId)

	m.mu.Lock()
	defer m.mu.Unlock()
	tx := m.hd.Begin()

	segment := GetSegment(tx, segmentId)

	if segment.Status == SEGMENT_UNASSIGNED {
		worker := GetRandomAliveWorker(tx)

		if worker != nil {
			segment.WorkerId = int64(worker.Id)
		} else {
			segment.WorkerId = 0
		}
		saveOrPanic(tx, segment)

		if segment.WorkerId != 0 {
			// if a worker was availble
			inputs, missingRdds := segment.CalculateInputSegments(tx)
			if len(missingRdds) != 0 {
				// if any of the input rdds are incomplete, then re-execute them
				for _, rdd := range missingRdds {
					client.Debug("missing rdd, reexecuting", rdd)
					e := Event{
						Type: LAUNCH_JOB,
						Id:   int64(rdd.Id),
					}
					m.queueEvent(e)
				}
				commitOrPanic(tx)
			} else {
				// otherwise, launch the task
				rdd := segment.GetRdd(tx)
				pj := rdd.GetProtojob(tx)
				batch := rdd.GetWorkflowBatch(tx)
				workflow := batch.GetWorkflow(tx)
				segmentCopies := segment.GetSegmentCopies(tx)
				commitOrPanic(tx)

				command := preprocessMasterCommand(pj.Command, batch, segment, workflow)

				args := &client.ExecArgs{
					Command:         command,
					Segments:        inputs,
					OutputSegmentId: int64(segment.Id),
					Indices:         parseIndex(pj.PartitionIndex),
					Parts:           pj.NumBuckets,
				}

				c := client.MakeWorkerClerk(worker.Url)

				// Launch the task on a background goroutine
				go func() {
					reply := c.ExecTask(args, 3)
					if reply != nil {
						if reply.Err == client.OK {
							// task success
							if len(segmentCopies) > 0 {
								for _, cp := range segmentCopies {
									e := Event{
										Type: LAUNCH_COPY,
										Id:   int64(cp.Id),
									}
									m.queueEvent(e)
								}
							} else {
								e := Event{
									Type: TASK_SUCCESS,
									Id:   segmentId,
								}
								m.queueEvent(e)
							}
						} else {
							if reply.Err == client.DEAD_SEGMENT {
								client.Debug(client.DEAD_SEGMENT)
								// task failed due to dead segment host
								e := Event{
									Type: TASK_FAILURE,
									Id:   segmentId,
									Data: &FailureData{
										Type:     FAILURE_DEAD_SEGMENT,
										WorkerId: reply.WorkerId,
									},
								}
								m.queueEvent(e)
							} else {
								client.Debug(client.SEGMENT_NOT_FOUND)
								// task failed due to a segment host that forgot an RDD
								e := Event{
									Type: TASK_FAILURE,
									Id:   segmentId,
									Data: &FailureData{
										Type:     FAILURE_MISSING_SEGMENT,
										WorkerId: reply.WorkerId,
									},
								}
								m.queueEvent(e)
							}
						}
					} else {
						client.Debug("DEAD_WORKER")
						// Conclude that the worker is dead
						e := Event{
							Type: TASK_FAILURE,
							Id:   segmentId,
							Data: &FailureData{
								Type:     FAILURE_DEAD_WORKER,
								WorkerId: int64(worker.Id),
							},
						}
						m.queueEvent(e)
					}
				}()
			}
		} else {
			// if no workers are available, just re-queue the task
			client.Debug("no workers available")
			e := Event{
				Type: LAUNCH_TASK,
				Id:   segmentId,
			}
			m.queueEvent(e)
			commitOrPanic(tx)
		}
	}
}
Ejemplo n.º 26
0
func printUsage() {
	client.Debug("Usage\n  go run load_workflow.go filename\n")
}
Ejemplo n.º 27
0
func (m *Master) execLaunchCopy(segmentCopyId int64, data interface{}) {
	client.Debug("launchCopy", segmentCopyId)

	m.mu.Lock()
	defer m.mu.Unlock()
	tx := m.hd.Begin()

	cp := GetSegmentCopy(tx, segmentCopyId)

	if cp.Status == SEGMENT_COPY_UNASSIGNED {
		segment := cp.GetSegment(tx)
		rdd := segment.GetRdd(tx)
		pj := rdd.GetProtojob(tx)
		workers := GetAliveWorkers(tx)
		otherCopies := segment.GetSegmentCopies(tx)
		if len(workers) < pj.Copies+1 {
			// Stop the event loop until enough workers join the system
			// to meet the required replication level
			client.Debug("not enough workers, need at least", pj.Copies+1)
			m.increaseMinWorkersTo(int64(pj.Copies + 1))
			e := Event{
				Type: LAUNCH_COPY,
				Id:   int64(cp.Id),
			}
			m.queueEvent(e)
		} else {
			// it is safe to launch the copy, so choose a random worker that
			// doesn't already have an identical segment or a copy
			workerIds := make(map[int64]*Worker)
			for _, worker := range workers {
				workerIds[int64(worker.Id)] = worker
			}
			sourceWorker := workerIds[segment.WorkerId]
			// sourceWorker might be nil if it has already died. In this case,
			// abort this event and reschedule the RDD
			if sourceWorker == nil {
				e := Event{
					Type: LAUNCH_JOB,
					Id:   int64(rdd.Id),
				}
				m.queueEvent(e)
			} else {
				delete(workerIds, segment.WorkerId)
				for _, c := range otherCopies {
					if c.Id != cp.Id {
						delete(workerIds, c.WorkerId)
					}
				}
				workerList := make([]*Worker, 0, len(workerIds))
				for _, w := range workerIds {
					workerList = append(workerList, w)
				}
				worker := workerList[rand.Int()%len(workerList)]
				cp.WorkerId = int64(worker.Id)
				cp.Status = SEGMENT_COPY_PENDING
				saveOrPanic(tx, cp)
				// launch the rpc in the background
				c := client.MakeWorkerClerk(worker.Url)
				args := &client.CopySegmentArgs{
					SegmentId: int64(segment.Id),
					WorkerUrl: sourceWorker.Url,
					WorkerId:  int64(sourceWorker.Id),
				}
				go func() {
					reply := c.CopySegment(args, 3)
					if reply != nil {
						if reply.Err == client.OK {
							// task success
							e := Event{
								Type: COPY_SUCCESS,
								Id:   segmentCopyId,
							}
							m.queueEvent(e)
						} else {
							if reply.Err == client.DEAD_SEGMENT {
								client.Debug(client.DEAD_SEGMENT)
								// task failed due to dead segment host
								e := Event{
									Type: COPY_FAILURE,
									Id:   segmentCopyId,
									Data: &FailureData{
										Type:     FAILURE_DEAD_SEGMENT,
										WorkerId: reply.WorkerId,
									},
								}
								m.queueEvent(e)
							} else {
								client.Debug(client.SEGMENT_NOT_FOUND)
								// task failed due to a segment host that forgot an RDD
								e := Event{
									Type: COPY_FAILURE,
									Id:   segmentCopyId,
									Data: &FailureData{
										Type:     FAILURE_MISSING_SEGMENT,
										WorkerId: reply.WorkerId,
									},
								}
								m.queueEvent(e)
							}
						}
					} else {
						client.Debug("DEAD_WORKER")
						// Conclude that the worker is dead
						e := Event{
							Type: COPY_FAILURE,
							Id:   segmentCopyId,
							Data: &FailureData{
								Type:     FAILURE_DEAD_WORKER,
								WorkerId: int64(worker.Id),
							},
						}
						m.queueEvent(e)
					}
				}()
			}
		}
	}

	commitOrPanic(tx)
}