Example #1
0
func (this Manager) runTicker() {
	defer T.Un(T.Trace(""))

	for _ = range this.ticker.C {
		this.Println("mem:", T.MemAlloced(), "goroutines:", runtime.NumGoroutine())
	}
}
Example #2
0
// Wait for all the dlog goroutines finish and collect final result
// Must run after collectWorkers() finished
func (this *Manager) WaitForCompletion() (r mr.KeyValue) {
	defer T.Un(T.Trace(""))

	// 也可能我走的太快,得等他们先创建好再开始
	for this.chWorkersDone == nil {
		runtime.Gosched()
	}

	select {
	case reduceResult, ok := <-this.chWorkersDone:
		if !ok {
			panic("unkown error")
		}
		r = reduceResult
	case <-time.After(time.Hour):
		// timeout 1 hour? just demo useage of timeout
		break
	}

	close(this.chWorkersDone)
	if this.chProgress != nil {
		close(this.chProgress)
	}

	// stop the ticker
	if this.ticker != nil {
		this.ticker.Stop()
	}
	return
}
Example #3
0
func (kv KeyValue) OutputGroup(printer Printer, group, sortCol string, top int) {
	defer T.Un(T.Trace(""))

	// print group title
	fmt.Println(group)
	fmt.Println(strings.Repeat("-", OUTPUT_GROUP_HEADER_LEN))

	// output the aggregate columns title
	oneVal := kv.OneValue().(KeyValue)
	valKeys := oneVal.Keys()
	keyLengths := printer.(KeyLengther).KeyLengths(group)
	var keyLen int // key placeholder len total
	for _, l := range keyLengths {
		keyLen += l
	}
	fmt.Printf("%*d#", keyLen-1, len(kv))
	// default sort column
	if sortCol == "" {
		sortCol = valKeys[0].(string)
	}
	for _, x := range valKeys {
		if x == sortCol {
			x = x.(string) + "*"
		}
		fmt.Printf("%*s", OUTPUT_VAL_WIDTH, x)
	}

	// title done
	println()

	// sort by column
	s := NewSort(kv)
	s.SortCol(sortCol)
	s.Sort(SORT_BY_COL, SORT_ORDER_DESC)
	sortedKeys := s.Keys()
	if top > 0 && top < len(sortedKeys) {
		sortedKeys = sortedKeys[:top]
	}

	// output each key's values per line
	for _, sk := range sortedKeys {
		mapKey := sk.(GroupKey)
		// the keys
		for i, k := range mapKey.Keys() {
			if len(k) >= keyLengths[i] {
				k = k[:keyLengths[i]-1]
			}
			fmt.Printf("%*s", keyLengths[i], k)
		}

		// the values
		val := kv[sk].(KeyValue)
		for _, k := range valKeys {
			fmt.Printf("%*.1f", OUTPUT_VAL_WIDTH, val[k])
		}

		println()
	}
}
Example #4
0
// Collect worker's output
// including map data and worker summary
func (this *Manager) collectWorkers(chRateLimit chan bool, chInMap chan mr.KeyValue, chInWorker chan Worker) {
	defer T.Un(T.Trace(""))

	this.Println("collectWorkers started")

	shuffledKvs := this.shuffle(chInMap)

	var doneWorkers int
	for {
		if doneWorkers == this.workersCount() {
			break
		}

		select {
		case worker, ok := <-chInWorker: // each worker send 1 msg to this chan
			if !ok {
				// this can never happens, worker can't close this chan
				this.Fatal("worker chan closed")
				break
			}

			doneWorkers++
			this.Printf("%s workers done: %d/%d %.1f%%\n", worker.Kind(), doneWorkers,
				this.workersCount(), float64(100*doneWorkers/this.workersCount()))

			this.RawLines += worker.RawLines
			this.ValidLines += worker.ValidLines

			chRateLimit <- true // 让贤
		}
	}

	// all workers done, so close the channels
	close(chInMap)
	close(chInWorker)
	close(chRateLimit)

	this.invokeGc()

	// mappers must complete before reducers can begin
	worker := this.GetOneWorker()
	kvs := <-shuffledKvs
	this.Println(worker.Kind(), "worker Shuffled")
	reduceResult := kvs.LaunchReducer(worker)
	this.Println(worker.Kind(), "worker Reduced")

	this.invokeGc()

	// enter into output phase
	// export final result, possibly export to db
	this.Println(worker.Kind(), "worker start to Output...")
	fmt.Println() // seperated from progress bar
	reduceResult.ExportResult(worker, "", "", worker.TopN())

	// WaitForCompletion will wait for this
	this.chWorkersDone <- reduceResult
}
Example #5
0
// Constructor of NoopWorker
func NewNoopWorker(manager *Manager, name, filename string, seq uint16) IWorker {
	defer T.Un(T.Trace(""))

	this := new(NoopWorker)
	this.self = this
	this.init(manager, name, filename, seq)

	return this
}
Example #6
0
func NewUniWorker(manager *Manager, name, filename string, seq uint16) IWorker {
	defer T.Un(T.Trace(""))

	this := new(UniWorker)
	this.self = this // don't forget this
	this.init(manager, name, filename, seq)

	return this
}
Example #7
0
// this with key as mappers' output keys
// and value as reducer output value(KeyValue)
func (this KeyValue) ExportResult(printer Printer, group, sortCol string, top int) {
	defer T.Un(T.Trace(""))

	if !this.Groupped() {
		this.exportForNonGrouped(printer, top)
		return
	} else {
		this.exportForGroupped(printer, group, sortCol, top)
	}

}
Example #8
0
// mem profile
func dumpMemProfile(pf string) {
	defer T.Un(T.Trace(""))

	if pf != "" {
		f, err := os.Create(pf)
		if err != nil {
			panic(err)
		}

		pprof.WriteHeapProfile(f)
		f.Close()
	}
}
Example #9
0
func (this KeyValue) exportForGroupped(printer Printer, group, sortCol string, top int) {
	defer T.Un(T.Trace(""))

	for _, grp := range this.Groups() {
		if group != "" && grp != group {
			continue
		}

		kvGroup := this.newByGroup(grp) // a new kv just for this group
		kvGroup.OutputGroup(printer, grp, sortCol, top)
		println()
	}
}
Example #10
0
func (this KeyValue) exportForNonGrouped(printer Printer, top int) {
	defer T.Un(T.Trace(""))

	s := NewSort(this)
	s.Sort(SORT_BY_VALUE, SORT_ORDER_DESC) // sort by value desc
	sortedKeys := s.keys
	if top > 0 && top < len(sortedKeys) {
		sortedKeys = sortedKeys[:top]
	}

	for _, k := range sortedKeys {
		_ = printer.(Printrer).Printr(k, this[k].(KeyValue)) // return sql dml statement, usually 'insert into'
	}
}
Example #11
0
// 每个worker向chan写入的次数:
// chOutProgress: N
// chOutMap: 1 for each parsed line
// chOutWorker: 1
func (this *Worker) run(chOutProgress chan<- int, chOutMap chan<- mr.KeyValue, chOutWorker chan<- Worker) {
	defer T.Un(T.Trace(""))

	this.StartAt = time.Now()

	var input *stream.Stream
	if this.manager.option.filemode {
		input = stream.NewStream(stream.PLAIN_FILE, this.filename)
	} else {
		input = stream.NewStream(stream.EXEC_PIPE, LZOP_CMD, LZOP_OPTION, this.filename)
	}
	input.Open()
	defer input.Close()

	this.Printf("%s worker[%d] opened %s, start to Map...\n", this.kind, this.seq, this.BaseFilename())

	for {
		line, err := input.Reader().ReadString(EOL)
		if err != nil {
			if err != io.EOF {
				log.Fatal(err)
			}

			break
		}

		this.RawLines++
		if chOutProgress != nil && this.RawLines%PROGRESS_LINES_STEP == 0 {
			// report progress
			chOutProgress <- PROGRESS_LINES_STEP
		}

		if !this.self.IsLineValid(line) {
			continue
		}

		this.ValidLines++

		// run map for this line
		// for pipe stream flush to work, we can't strip EOL
		this.self.Map(line, chOutMap)
	}
	this.EndAt = time.Now()

	chOutWorker <- *this
	this.Printf("%s worker[%d] %s done, parsed: %d/%d, duration: %v\n", this.kind, this.seq, this.BaseFilename(),
		this.ValidLines, this.RawLines, this.Duration())
}
Example #12
0
func displaySummary(logger *log.Logger, start time.Time, files, rawLines, validLines int) {
	defer T.Un(T.Trace(""))

	delta := time.Since(start)
	summary := fmt.Sprintf("Parsed %s/%s(%.4f%s) lines in %d files within %s [%.1f lines per second]\n",
		size.Comma(int64(validLines)),
		size.Comma(int64(rawLines)),
		100*float64(validLines)/float64(rawLines),
		"%%",
		files,
		delta,
		float64(rawLines)/delta.Seconds())
	// render to both log and stderr
	logger.Print(summary)
	fmt.Fprintf(os.Stderr, summary)
}
Example #13
0
func (this *Worker) initExternalMapper() *stream.Stream {
	defer T.Un(T.Trace(""))

	mapper := this.manager.option.mapper
	if mapper != "" {
		stream := stream.NewStream(stream.EXEC_PIPE, mapper)
		if err := stream.Open(); err != nil {
			this.Fatal(err)
		}

		this.Printf("external mapper stream opened: %s\n", mapper)

		this.mapReader = stream.Reader()
		this.mapWriter = stream.Writer()
		return stream
	}

	return nil
}
Example #14
0
// Submit the job and start the job
func (this *Manager) Submit() (err error) {
	defer T.Un(T.Trace(""))

	// safely: collection the panic's
	defer func() {
		if r := recover(); r != nil {
			var ok bool
			if err, ok = r.(error); !ok {
				err = fmt.Errorf("manager: %v", r)
			}
		}
	}()

	this.Println("submitted job accepted")

	chMap := make(chan mr.KeyValue, this.workersCount()*LINE_CHANBUF_PER_WORKER)
	chWorker := make(chan Worker, this.workersCount())
	this.chWorkersDone = make(chan mr.KeyValue)

	// create workers first
	this.newWorkers()

	// TODO
	go this.trapSignal()

	if this.ticker != nil {
		go this.runTicker()
	}

	if this.option.progress {
		this.chProgress = make(chan int, PROGRESS_CHAN_BUF)
		go this.showProgress()
	}

	// collect all workers output
	chRateLimit := this.initRateLimit()
	go this.collectWorkers(chRateLimit, chMap, chWorker)

	// launch workers in chunk
	go this.launchWorkers(chRateLimit, chMap, chWorker)

	return
}
Example #15
0
func (this *Worker) SafeRun(chOutProgress chan<- int, chOutMap chan<- mr.KeyValue, chOutWorker chan<- Worker) {
	defer T.Un(T.Trace(""))

	// recover to make this worker safe for other workers
	defer func() {
		if err := recover(); err != nil {
			this.Println("panic recovered:", err)
			panic(err)
		}
	}()

	if this.manager.option.debug {
		fmt.Fprintln(os.Stderr, this)
	}

	if mapper := this.initExternalMapper(); mapper != nil {
		defer mapper.Close()
	}

	this.run(chOutProgress, chOutMap, chOutWorker)
}
Example #16
0
func (this Manager) trapSignal() {
	defer T.Un(T.Trace(""))

	ch := make(chan Signal, 10)

	// register the given channel to receive notifications of the specified signals
	signal.Notify(ch, caredSignals...)

	go func() {
		sig := <-ch
		fmt.Fprintf(Stderr, "%s signal received...\n", strings.ToUpper(sig.String()))
		for _, skip := range skippedSignals {
			if skip == sig {
				this.Printf("%s signal ignored\n", strings.ToUpper(sig.String()))
				return
			}
		}

		// not skipped
		fmt.Fprintf(Stderr, "prepare to shutdown...")
		this.Shutdown()
	}()
}
Example #17
0
// Manager constructor
func NewManager(option *Option) *Manager {
	defer T.Un(T.Trace(""))

	this := new(Manager)
	if option.tick > 0 {
		this.ticker = time.NewTicker(time.Millisecond * time.Duration(option.tick))
	}
	this.Logger = newLogger(option)
	this.option = option
	this.lock = new(sync.Mutex)
	this.logLevel = DefaultLogLevel

	this.Println("manager created")

	if this.option.rpc {
		if e := netapi.StartServer(); e != nil {
			this.Fatal(e)
		}
		this.Println("RPC server startup at", netapi.ADDRS)
	}

	return this
}
Example #18
0
func initialize(option *dlog.Option, err error) {
	defer T.Un(T.Trace(""))

	if option.Version() {
		fmt.Fprintf(os.Stderr, "%s %s %s %s\n", "dlogmon", VERSION,
			runtime.GOOS, runtime.GOARCH)
		os.Exit(0)
	}

	if err != nil {
		fmt.Fprintln(os.Stderr, err)
		os.Exit(1)
	}

	// enable gc trace
	// this will not work, the only way is to setenv before invoke me
	os.Setenv("GOGCTRACE", "1")

	// parallel level
	if os.Getenv(maxprocsenv) == "" {
		parallel := runtime.NumCPU()/2 + 1
		runtime.GOMAXPROCS(parallel)
		fmt.Fprintf(os.Stderr, "Parallel CPU(core): %d / %d, Concurrent workers: %d\n", parallel,
			runtime.NumCPU(), option.Nworkers)
	}
	fmt.Fprintln(os.Stderr, option.Timespan)

	// cpu profile
	if option.Cpuprofile() != "" {
		f, err := os.Create(option.Cpuprofile())
		if err != nil {
			panic(err)
		}

		pprof.StartCPUProfile(f)
	}
}
Example #19
0
func main() {
	// cli options
	option, err := dlog.ParseFlags()
	initialize(option, err)

	// construct the manager
	manager := dlog.NewManager(option)
	// mutex pass through
	T.SetLock(manager.GetLock())

	defer T.Un(T.Trace(""))

	// cpu profile
	if option.Cpuprofile() != "" {
		defer pprof.StopCPUProfile()
	}

	// timing all the jobs up
	start := time.Now()

	manager.Println("about to submit jobs")
	go manager.Submit()

	// mem profile
	dumpMemProfile(option.Memprofile())

	manager.Println("waiting for completion...")
	kvResult := manager.WaitForCompletion()

	displaySummary(manager.Logger, start,
		manager.FilesCount(), manager.RawLines, manager.ValidLines)

	if option.Shell {
		cliCmdloop(manager.GetOneWorker(), kvResult)
	}
}
Example #20
0
// Get any worker of the same type TODO
func (this *Manager) GetOneWorker() IWorker {
	defer T.Un(T.Trace(""))

	return this.workers[0]
}