func main() { programType := flag.String("type", "", "(c) controller or (t) task") job := flag.String("job", "", "job name") etcdURLs := []string{"http://localhost:4001"} flag.Parse() if *job == "" { log.Fatalf("Please specify a job name") } ntask := uint64(2) switch *programType { case "c": log.Printf("controller") controller := controller.New(*job, etcd.NewClient(etcdURLs), ntask) controller.Start() controller.WaitForJobDone() case "t": log.Printf("task") bootstrap := framework.NewBootStrap(*job, etcdURLs, createListener(), nil) taskBuilder := ®ression.SimpleTaskBuilder{ GDataChan: make(chan int32, 11), NumberOfIterations: 10, MasterConfig: map[string]string{"writefile": "result.txt"}, } bootstrap.SetTaskBuilder(taskBuilder) bootstrap.AddLinkage("Parents" : topo.NewTreeTopologyOfParent(2, ntask)) bootstrap.AddLinkage("Children" : topo.NewTreeTopologyOfChildren(2, ntask)) bootstrap.Start() default: log.Fatal("Please choose a type: (c) controller, (t) task") } }
func TestRegressionFramework(t *testing.T) { etcdURLs := []string{"http://localhost:4001"} job := "framework_regression_test" numOfTasks := uint64(15) numOfIterations := uint64(10) // controller start first to setup task directories in etcd controller := controller.New(job, etcd.NewClient(etcdURLs), numOfTasks, []string{"Parents", "Children"}) controller.Start() // We need to set etcd so that nodes know what to do. taskBuilder := ®ression.SimpleTaskBuilder{ GDataChan: make(chan int32, 11), NumberOfIterations: numOfIterations, } for i := uint64(0); i < numOfTasks; i++ { go driveWithTreeTopo(t, job, etcdURLs, numOfTasks, taskBuilder) } wantData := []int32{0, 105, 210, 315, 420, 525, 630, 735, 840, 945, 1050} getData := make([]int32, numOfIterations+1) for i := uint64(0); i <= numOfIterations; i++ { getData[i] = <-taskBuilder.GDataChan } for i := range wantData { if wantData[i] != getData[i] { t.Errorf("#%d: data want = %d, get = %d\n", i, wantData[i], getData[i]) } } controller.WaitForJobDone() controller.Stop() }
func main() { etcdUrlList := flag.String("etcd_urls", "", "ETCD server lists, sep by a comma.") jobName := flag.String("job_name", "bwmf", "Job name in etcd path.") jobType := flag.String("job_type", "c", "Job type, either 'c' for controller or 't' for task.") numTasks := flag.Int("num_tasks", 1, "Num of tasks.") taskConfigFile := flag.String("task_config", "", "Path to task config json file.") flag.Parse() if *jobName == "" { log.Fatal("Job name is required.") } crd, oErr := filesystem.NewLocalFSClient().OpenReadCloser(*taskConfigFile) if oErr != nil { log.Fatalf("Failed opening task config file. %s", oErr) } confData, rdErr := ioutil.ReadAll(crd) if rdErr != nil { log.Fatalf("Failed reading task config. %s", rdErr) } log.Printf("conf data: %s", confData) if *etcdUrlList == "" { log.Fatal("Please specify the etcd server urls.") } etcdUrls := strings.Split(*etcdUrlList, ",") log.Println("etcd urls: ", etcdUrls) topoMaster := topo.NewFullTopologyOfMaster(uint64(*numTasks)) topoNeighbors := topo.NewFullTopologyOfNeighbor(uint64(*numTasks)) switch *jobType { case "t": bootstrap := framework.NewBootStrap(*jobName, etcdUrls, createListener(), nil) taskBuilder := &bwmf.BWMFTaskBuilder{ NumOfTasks: uint64(*numTasks), ConfBytes: confData, } bootstrap.SetTaskBuilder(taskBuilder) bootstrap.AddLinkage("Master", topoMaster) bootstrap.AddLinkage("Neighbors", topoNeighbors) log.Println("Starting task..") bootstrap.Start() case "c": controller := controller.New(*jobName, etcd.NewClient(etcdUrls), uint64(*numTasks), []string{"Master", "Neighbors"}) controller.Start() log.Println("Controller started.") controller.WaitForJobDone() controller.Stop() default: log.Fatal("Please choose a type via '-jobtype': (c) controller, (t) task") } }
// TestMasterSetEpochFailure checks if a master task failed at SetEpoch, // 1. a new boostrap will be created to take over // 2. continue what's left; // 3. finish the job with the same result. func TestMasterSetEpochFailure(t *testing.T) { job := "TestMasterSetEpochFailure" etcdURLs := []string{"http://localhost:4001"} numOfTasks := uint64(15) numOfIterations := uint64(10) // controller start first to setup task directories in etcd controller := controller.New(job, etcd.NewClient(etcdURLs), numOfTasks, []string{"Parents", "Children"}) controller.Start() taskBuilder := ®ression.SimpleTaskBuilder{ GDataChan: make(chan int32, 11), NodeProducer: make(chan bool, 1), MasterConfig: map[string]string{ "SetEpoch": "fail", "failepoch": "1", "faillevel": "100", }, NumberOfIterations: numOfIterations, } for i := uint64(0); i < numOfTasks; i++ { go driveWithTreeTopo(t, job, etcdURLs, numOfTasks, taskBuilder) } if <-taskBuilder.NodeProducer { taskBuilder.MasterConfig = nil log.Println("Starting a new node") // this time we start a new bootstrap whose task master doesn't fail. go driveWithTreeTopo(t, job, etcdURLs, numOfTasks, taskBuilder) } wantData := []int32{0, 105, 210, 315, 420, 525, 630, 735, 840, 945, 1050} getData := make([]int32, numOfIterations+1) for i := uint64(0); i <= numOfIterations; i++ { getData[i] = <-taskBuilder.GDataChan } for i := range wantData { if wantData[i] != getData[i] { t.Errorf("#%d: data want = %d, get = %d", i, wantData[i], getData[i]) } } controller.WaitForJobDone() controller.Stop() }
func testSlaveFailure(t *testing.T, job string, slaveConfig map[string]string) { etcdURLs := []string{"http://localhost:4001"} numOfTasks := uint64(15) numOfIterations := uint64(10) // controller start first to setup task directories in etcd controller := controller.New(job, etcd.NewClient(etcdURLs), numOfTasks, []string{"Parents", "Children"}) controller.Start() defer controller.Stop() taskBuilder := ®ression.SimpleTaskBuilder{ GDataChan: make(chan int32, 11), NodeProducer: make(chan bool, 1), SlaveConfig: slaveConfig, NumberOfIterations: numOfIterations, } go func() { for _ = range taskBuilder.NodeProducer { log.Println("Starting a new node") go driveWithTreeTopo(t, job, etcdURLs, numOfTasks, taskBuilder) } }() for i := uint64(0); i < numOfTasks; i++ { taskBuilder.NodeProducer <- true } wantData := []int32{0, 105, 210, 315, 420, 525, 630, 735, 840, 945, 1050} getData := make([]int32, numOfIterations+1) for i := uint64(0); i <= numOfIterations; i++ { getData[i] = <-taskBuilder.GDataChan } for i := range wantData { if wantData[i] != getData[i] { t.Errorf("#%d: data want = %d, get = %d", i, wantData[i], getData[i]) } } controller.WaitForJobDone() controller.Stop() close(taskBuilder.NodeProducer) }