Beispiel #1
0
func main() {
	programType := flag.String("type", "", "(c) controller or (t) task")
	job := flag.String("job", "", "job name")
	etcdURLs := []string{"http://localhost:4001"}
	flag.Parse()

	if *job == "" {
		log.Fatalf("Please specify a job name")
	}

	ntask := uint64(2)
	switch *programType {
	case "c":
		log.Printf("controller")
		controller := controller.New(*job, etcd.NewClient(etcdURLs), ntask)
		controller.Start()
		controller.WaitForJobDone()
	case "t":
		log.Printf("task")
		bootstrap := framework.NewBootStrap(*job, etcdURLs, createListener(), nil)
		taskBuilder := &regression.SimpleTaskBuilder{
			GDataChan:          make(chan int32, 11),
			NumberOfIterations: 10,
			MasterConfig:       map[string]string{"writefile": "result.txt"},
		}
		bootstrap.SetTaskBuilder(taskBuilder)

		bootstrap.AddLinkage("Parents" : topo.NewTreeTopologyOfParent(2, ntask))
		bootstrap.AddLinkage("Children" : topo.NewTreeTopologyOfChildren(2, ntask))
		bootstrap.Start()
	default:
		log.Fatal("Please choose a type: (c) controller, (t) task")
	}
}
Beispiel #2
0
// TestRequestDataEpochMismatch creates a scenario where data request happened
// with two different epochs. In this case, the server should back pressure and
// request client should get notified and return error.
func TestRequestDataEpochMismatch(t *testing.T) {
	t.Skip("TODO")
	job := "TestRequestDataEpochMismatch"
	etcdURLs := []string{"http://localhost:4001"}
	ctl := controller.New(job, etcd.NewClient(etcdURLs), 1, []string{"Parents", "Children"})
	ctl.InitEtcdLayout()
	defer ctl.DestroyEtcdLayout()

	fw := &framework{
		name:     job,
		etcdURLs: etcdURLs,
		ln:       createListener(t),
	}
	var wg sync.WaitGroup
	fw.SetTaskBuilder(&testableTaskBuilder{
		setupLatch: &wg,
	})
	fw.AddLinkage("Parents", topo.NewTreeTopologyOfParent(1, 1))
	fw.AddLinkage("Children", topo.NewTreeTopologyOfChildren(1, 1))
	wg.Add(1)
	go fw.Start()
	wg.Wait()
	defer fw.ShutdownJob()

	addr, err := etcdutil.GetAddress(fw.etcdClient, job, fw.GetTaskID())
	if err != nil {
		t.Fatalf("GetAddress failed: %v", err)
	}
	addr = addr
	// _, err = frameworkhttp.RequestData(addr, "Parents", "req", 0, fw.GetTaskID(), 10, fw.GetLogger())
	// if err != frameworkhttp.ErrReqEpochMismatch {
	// 	t.Fatalf("error want = %v, but get = (%)", frameworkhttp.ErrReqEpochMismatch, err.Error())
	// }
}
func TestRegressionFramework(t *testing.T) {
	etcdURLs := []string{"http://localhost:4001"}

	job := "framework_regression_test"
	numOfTasks := uint64(15)
	numOfIterations := uint64(10)

	// controller start first to setup task directories in etcd
	controller := controller.New(job, etcd.NewClient(etcdURLs), numOfTasks, []string{"Parents", "Children"})
	controller.Start()

	// We need to set etcd so that nodes know what to do.
	taskBuilder := &regression.SimpleTaskBuilder{
		GDataChan:          make(chan int32, 11),
		NumberOfIterations: numOfIterations,
	}
	for i := uint64(0); i < numOfTasks; i++ {
		go driveWithTreeTopo(t, job, etcdURLs, numOfTasks, taskBuilder)
	}

	wantData := []int32{0, 105, 210, 315, 420, 525, 630, 735, 840, 945, 1050}
	getData := make([]int32, numOfIterations+1)
	for i := uint64(0); i <= numOfIterations; i++ {
		getData[i] = <-taskBuilder.GDataChan
	}
	for i := range wantData {
		if wantData[i] != getData[i] {
			t.Errorf("#%d: data want = %d, get = %d\n", i, wantData[i], getData[i])
		}
	}

	controller.WaitForJobDone()
	controller.Stop()
}
Beispiel #4
0
func main() {
	etcdUrlList := flag.String("etcd_urls", "", "ETCD server lists, sep by a comma.")
	jobName := flag.String("job_name", "bwmf", "Job name in etcd path.")
	jobType := flag.String("job_type", "c", "Job type, either 'c' for controller or 't' for task.")
	numTasks := flag.Int("num_tasks", 1, "Num of tasks.")
	taskConfigFile := flag.String("task_config", "", "Path to task config json file.")

	flag.Parse()

	if *jobName == "" {
		log.Fatal("Job name is required.")
	}

	crd, oErr := filesystem.NewLocalFSClient().OpenReadCloser(*taskConfigFile)
	if oErr != nil {
		log.Fatalf("Failed opening task config file. %s", oErr)
	}
	confData, rdErr := ioutil.ReadAll(crd)
	if rdErr != nil {
		log.Fatalf("Failed reading task config. %s", rdErr)
	}
	log.Printf("conf data: %s", confData)

	if *etcdUrlList == "" {
		log.Fatal("Please specify the etcd server urls.")
	}
	etcdUrls := strings.Split(*etcdUrlList, ",")
	log.Println("etcd urls: ", etcdUrls)

	topoMaster := topo.NewFullTopologyOfMaster(uint64(*numTasks))
	topoNeighbors := topo.NewFullTopologyOfNeighbor(uint64(*numTasks))

	switch *jobType {
	case "t":
		bootstrap := framework.NewBootStrap(*jobName, etcdUrls, createListener(), nil)
		taskBuilder := &bwmf.BWMFTaskBuilder{
			NumOfTasks: uint64(*numTasks),
			ConfBytes:  confData,
		}
		bootstrap.SetTaskBuilder(taskBuilder)
		bootstrap.AddLinkage("Master", topoMaster)
		bootstrap.AddLinkage("Neighbors", topoNeighbors)
		log.Println("Starting task..")
		bootstrap.Start()
	case "c":
		controller := controller.New(*jobName, etcd.NewClient(etcdUrls), uint64(*numTasks), []string{"Master", "Neighbors"})
		controller.Start()
		log.Println("Controller started.")
		controller.WaitForJobDone()
		controller.Stop()
	default:
		log.Fatal("Please choose a type via '-jobtype': (c) controller, (t) task")
	}
}
Beispiel #5
0
func TestBWMF(t *testing.T) {
	etcdURLs := []string{"http://localhost:4001"}

	job := "bwmf_basic_test"
	numOfTasks := uint64(2)

	generateTestData(t)

	ctl := controller.New(job, etcd.NewClient(etcdURLs), numOfTasks, []string{"Neighbors", "Master"})
	ctl.Start()

	tb := &bwmf.BWMFTaskBuilder{
		NumOfTasks: numOfTasks,
		ConfBytes: []byte(`{
			        "OptConf": {
						"Sigma":0.01,
						"Alpha":1,
						"Beta":0.1,
						"GradTol":1e-06,
						"FixedCnt": 200000,
					    "NumIters":4,
					    "DimLatent":2
					},
					"IOConf":  {
						"Fs":"local",
						"IDPath":"../.tmp/row_shard.dat",
						"ITPath":"../.tmp/column_shard.dat",
						"ODPath":"../.tmp/dShard.dat",
						"OTPath":"../.tmp/tShard.dat"
					}
				}`),
	}
	for i := uint64(0); i < numOfTasks; i++ {
		go drive(
			t,
			job,
			etcdURLs,
			tb,
			map[string]taskgraph.Topology{
				"Master":    topo.NewFullTopologyOfMaster(numOfTasks),
				"Neighbors": topo.NewFullTopologyOfNeighbor(numOfTasks),
			},
		)
	}

	ctl.WaitForJobDone()
	ctl.Stop()
}
Beispiel #6
0
// TestMasterSetEpochFailure checks if a master task failed at SetEpoch,
// 1. a new boostrap will be created to take over
// 2. continue what's left;
// 3. finish the job with the same result.
func TestMasterSetEpochFailure(t *testing.T) {
	job := "TestMasterSetEpochFailure"
	etcdURLs := []string{"http://localhost:4001"}
	numOfTasks := uint64(15)
	numOfIterations := uint64(10)

	// controller start first to setup task directories in etcd
	controller := controller.New(job, etcd.NewClient(etcdURLs), numOfTasks, []string{"Parents", "Children"})
	controller.Start()

	taskBuilder := &regression.SimpleTaskBuilder{
		GDataChan:    make(chan int32, 11),
		NodeProducer: make(chan bool, 1),
		MasterConfig: map[string]string{
			"SetEpoch":  "fail",
			"failepoch": "1",
			"faillevel": "100",
		},
		NumberOfIterations: numOfIterations,
	}
	for i := uint64(0); i < numOfTasks; i++ {
		go driveWithTreeTopo(t, job, etcdURLs, numOfTasks, taskBuilder)
	}
	if <-taskBuilder.NodeProducer {
		taskBuilder.MasterConfig = nil
		log.Println("Starting a new node")
		// this time we start a new bootstrap whose task master doesn't fail.
		go driveWithTreeTopo(t, job, etcdURLs, numOfTasks, taskBuilder)
	}

	wantData := []int32{0, 105, 210, 315, 420, 525, 630, 735, 840, 945, 1050}
	getData := make([]int32, numOfIterations+1)
	for i := uint64(0); i <= numOfIterations; i++ {
		getData[i] = <-taskBuilder.GDataChan
	}

	for i := range wantData {
		if wantData[i] != getData[i] {
			t.Errorf("#%d: data want = %d, get = %d", i, wantData[i], getData[i])
		}
	}
	controller.WaitForJobDone()
	controller.Stop()
}
Beispiel #7
0
func testSlaveFailure(t *testing.T, job string, slaveConfig map[string]string) {
	etcdURLs := []string{"http://localhost:4001"}
	numOfTasks := uint64(15)
	numOfIterations := uint64(10)

	// controller start first to setup task directories in etcd
	controller := controller.New(job, etcd.NewClient(etcdURLs), numOfTasks, []string{"Parents", "Children"})
	controller.Start()
	defer controller.Stop()

	taskBuilder := &regression.SimpleTaskBuilder{
		GDataChan:          make(chan int32, 11),
		NodeProducer:       make(chan bool, 1),
		SlaveConfig:        slaveConfig,
		NumberOfIterations: numOfIterations,
	}
	go func() {
		for _ = range taskBuilder.NodeProducer {
			log.Println("Starting a new node")
			go driveWithTreeTopo(t, job, etcdURLs, numOfTasks, taskBuilder)
		}
	}()
	for i := uint64(0); i < numOfTasks; i++ {
		taskBuilder.NodeProducer <- true
	}

	wantData := []int32{0, 105, 210, 315, 420, 525, 630, 735, 840, 945, 1050}
	getData := make([]int32, numOfIterations+1)
	for i := uint64(0); i <= numOfIterations; i++ {
		getData[i] = <-taskBuilder.GDataChan
	}

	for i := range wantData {
		if wantData[i] != getData[i] {
			t.Errorf("#%d: data want = %d, get = %d", i, wantData[i], getData[i])
		}
	}
	controller.WaitForJobDone()
	controller.Stop()
	close(taskBuilder.NodeProducer)
}
Beispiel #8
0
// TestFrameworkFlagMetaReady and TestFrameworkDataRequest test basic workflows of
// framework impl. It uses a scenario with two nodes: 0 as parent, 1 as child.
// The basic idea is that when parent tries to talk to child and vice versa,
// there will be some data transferring and captured by application task.
// Here we have implemented a helper user task to capture those data, test if
// it's passed from framework correctly and unmodified.
func TestFrameworkFlagMeta(t *testing.T) {
	appName := "TestFrameworkFlagMeta"
	etcdURLs := []string{"http://localhost:4001"}
	// launch controller to setup etcd layout
	ctl := controller.New(appName, etcd.NewClient(etcdURLs), 2, []string{"Parents", "Children"})
	if err := ctl.InitEtcdLayout(); err != nil {
		t.Fatalf("initEtcdLayout failed: %v", err)
	}
	defer ctl.DestroyEtcdLayout()

	pDataChan := make(chan *tDataBundle, 1)
	cDataChan := make(chan *tDataBundle, 1)

	// simulate two tasks on two nodes -- 0 and 1
	// 0 is parent, 1 is child
	f0 := &framework{
		name:     appName,
		etcdURLs: etcdURLs,
		ln:       createListener(t),
	}
	f1 := &framework{
		name:     appName,
		etcdURLs: etcdURLs,
		ln:       createListener(t),
	}

	var wg sync.WaitGroup
	taskBuilder := &testableTaskBuilder{
		cDataChan:  cDataChan,
		pDataChan:  pDataChan,
		setupLatch: &wg,
	}
	f0.SetTaskBuilder(taskBuilder)
	f0.AddLinkage("Parents", topo.NewTreeTopologyOfParent(2, 2))
	f0.AddLinkage("Children", topo.NewTreeTopologyOfChildren(2, 2))
	f1.SetTaskBuilder(taskBuilder)
	f1.AddLinkage("Parents", topo.NewTreeTopologyOfParent(2, 2))
	f1.AddLinkage("Children", topo.NewTreeTopologyOfChildren(2, 2))

	taskBuilder.setupLatch.Add(2)
	go f0.Start()
	go f1.Start()
	taskBuilder.setupLatch.Wait()
	if f0.GetTaskID() != 0 {
		f0, f1 = f1, f0
	}

	defer f0.ShutdownJob()

	tests := []struct {
		cMeta string
		pMeta string
	}{
		{"parent", "child"},
		{"ParamReady", "GradientReady"},
	}

	ctx := context.WithValue(context.Background(), epochKey, uint64(0))
	for i, tt := range tests {
		// 0: F#FlagChildMetaReady -> 1: T#ParentMetaReady
		f0.FlagMeta(ctx, "Children", tt.cMeta)
		// from child(1)'s view
		data := <-cDataChan
		expected := &tDataBundle{id: 0, meta: tt.cMeta}
		if !reflect.DeepEqual(data, expected) {
			t.Errorf("#%d: data bundle want = %v, get = %v", i, expected, data)
		}

		// 1: F#FlagParentMetaReady -> 0: T#ChildMetaReady
		f1.FlagMeta(ctx, "Parents", tt.pMeta)
		// from parent(0)'s view
		data = <-pDataChan
		expected = &tDataBundle{id: 1, meta: tt.pMeta}
		if !reflect.DeepEqual(data, expected) {
			t.Errorf("#%d: data bundle want = %v, get = %v", i, expected, data)
		}
	}
}
Beispiel #9
0
func TestFrameworkDataRequest(t *testing.T) {
	appName := "framework_test_datarequest"
	etcdURLs := []string{"http://localhost:4001"}
	// launch controller to setup etcd layout
	ctl := controller.New(appName, etcd.NewClient(etcdURLs), 2, []string{"Parents", "Children"})
	if err := ctl.InitEtcdLayout(); err != nil {
		t.Fatalf("initEtcdLayout failed: %v", err)
	}
	defer ctl.DestroyEtcdLayout()

	pDataChan := make(chan *tDataBundle, 1)
	cDataChan := make(chan *tDataBundle, 1)
	// simulate two tasks on two nodes -- 0 and 1
	// 0 is parent, 1 is child
	f0 := &framework{
		name:     appName,
		etcdURLs: etcdURLs,
		ln:       createListener(t),
	}
	f1 := &framework{
		name:     appName,
		etcdURLs: etcdURLs,
		ln:       createListener(t),
	}

	var wg sync.WaitGroup
	taskBuilder := &testableTaskBuilder{
		cDataChan:  cDataChan,
		pDataChan:  pDataChan,
		setupLatch: &wg,
	}
	f0.SetTaskBuilder(taskBuilder)
	f0.AddLinkage("Parents", topo.NewTreeTopologyOfParent(2, 2))
	f0.AddLinkage("Children", topo.NewTreeTopologyOfChildren(2, 2))
	f1.SetTaskBuilder(taskBuilder)
	f1.AddLinkage("Parents", topo.NewTreeTopologyOfParent(2, 2))
	f1.AddLinkage("Children", topo.NewTreeTopologyOfChildren(2, 2))

	taskBuilder.setupLatch.Add(2)
	go f0.Start()
	go f1.Start()
	taskBuilder.setupLatch.Wait()
	if f0.GetTaskID() != 0 {
		f0, f1 = f1, f0
	}

	defer f0.ShutdownJob()
	ctx := context.WithValue(context.Background(), epochKey, uint64(0))

	f0.DataRequest(ctx, 1, "/proto.Regression/GetGradient", nil)
	data := <-pDataChan
	expected := &tDataBundle{
		id:     1,
		method: "/proto.Regression/GetGradient",
		output: &pb.Gradient{1},
	}
	if !reflect.DeepEqual(data, expected) {
		t.Errorf("data bundle want = %v, get = %v", expected, data)
	}
	f1.DataRequest(ctx, 0, "/proto.Regression/GetParameter", nil)
	data = <-cDataChan
	expected = &tDataBundle{
		id:     0,
		method: "/proto.Regression/GetParameter",
		output: &pb.Parameter{1},
	}
	if !reflect.DeepEqual(data, expected) {
		t.Errorf("data bundle want = %v, get = %v", expected, data)
	}
}