func (this Manager) runTicker() { defer T.Un(T.Trace("")) for _ = range this.ticker.C { this.Println("mem:", T.MemAlloced(), "goroutines:", runtime.NumGoroutine()) } }
// Wait for all the dlog goroutines finish and collect final result // Must run after collectWorkers() finished func (this *Manager) WaitForCompletion() (r mr.KeyValue) { defer T.Un(T.Trace("")) // 也可能我走的太快,得等他们先创建好再开始 for this.chWorkersDone == nil { runtime.Gosched() } select { case reduceResult, ok := <-this.chWorkersDone: if !ok { panic("unkown error") } r = reduceResult case <-time.After(time.Hour): // timeout 1 hour? just demo useage of timeout break } close(this.chWorkersDone) if this.chProgress != nil { close(this.chProgress) } // stop the ticker if this.ticker != nil { this.ticker.Stop() } return }
func (kv KeyValue) OutputGroup(printer Printer, group, sortCol string, top int) { defer T.Un(T.Trace("")) // print group title fmt.Println(group) fmt.Println(strings.Repeat("-", OUTPUT_GROUP_HEADER_LEN)) // output the aggregate columns title oneVal := kv.OneValue().(KeyValue) valKeys := oneVal.Keys() keyLengths := printer.(KeyLengther).KeyLengths(group) var keyLen int // key placeholder len total for _, l := range keyLengths { keyLen += l } fmt.Printf("%*d#", keyLen-1, len(kv)) // default sort column if sortCol == "" { sortCol = valKeys[0].(string) } for _, x := range valKeys { if x == sortCol { x = x.(string) + "*" } fmt.Printf("%*s", OUTPUT_VAL_WIDTH, x) } // title done println() // sort by column s := NewSort(kv) s.SortCol(sortCol) s.Sort(SORT_BY_COL, SORT_ORDER_DESC) sortedKeys := s.Keys() if top > 0 && top < len(sortedKeys) { sortedKeys = sortedKeys[:top] } // output each key's values per line for _, sk := range sortedKeys { mapKey := sk.(GroupKey) // the keys for i, k := range mapKey.Keys() { if len(k) >= keyLengths[i] { k = k[:keyLengths[i]-1] } fmt.Printf("%*s", keyLengths[i], k) } // the values val := kv[sk].(KeyValue) for _, k := range valKeys { fmt.Printf("%*.1f", OUTPUT_VAL_WIDTH, val[k]) } println() } }
// Collect worker's output // including map data and worker summary func (this *Manager) collectWorkers(chRateLimit chan bool, chInMap chan mr.KeyValue, chInWorker chan Worker) { defer T.Un(T.Trace("")) this.Println("collectWorkers started") shuffledKvs := this.shuffle(chInMap) var doneWorkers int for { if doneWorkers == this.workersCount() { break } select { case worker, ok := <-chInWorker: // each worker send 1 msg to this chan if !ok { // this can never happens, worker can't close this chan this.Fatal("worker chan closed") break } doneWorkers++ this.Printf("%s workers done: %d/%d %.1f%%\n", worker.Kind(), doneWorkers, this.workersCount(), float64(100*doneWorkers/this.workersCount())) this.RawLines += worker.RawLines this.ValidLines += worker.ValidLines chRateLimit <- true // 让贤 } } // all workers done, so close the channels close(chInMap) close(chInWorker) close(chRateLimit) this.invokeGc() // mappers must complete before reducers can begin worker := this.GetOneWorker() kvs := <-shuffledKvs this.Println(worker.Kind(), "worker Shuffled") reduceResult := kvs.LaunchReducer(worker) this.Println(worker.Kind(), "worker Reduced") this.invokeGc() // enter into output phase // export final result, possibly export to db this.Println(worker.Kind(), "worker start to Output...") fmt.Println() // seperated from progress bar reduceResult.ExportResult(worker, "", "", worker.TopN()) // WaitForCompletion will wait for this this.chWorkersDone <- reduceResult }
// Constructor of NoopWorker func NewNoopWorker(manager *Manager, name, filename string, seq uint16) IWorker { defer T.Un(T.Trace("")) this := new(NoopWorker) this.self = this this.init(manager, name, filename, seq) return this }
func NewUniWorker(manager *Manager, name, filename string, seq uint16) IWorker { defer T.Un(T.Trace("")) this := new(UniWorker) this.self = this // don't forget this this.init(manager, name, filename, seq) return this }
// this with key as mappers' output keys // and value as reducer output value(KeyValue) func (this KeyValue) ExportResult(printer Printer, group, sortCol string, top int) { defer T.Un(T.Trace("")) if !this.Groupped() { this.exportForNonGrouped(printer, top) return } else { this.exportForGroupped(printer, group, sortCol, top) } }
// mem profile func dumpMemProfile(pf string) { defer T.Un(T.Trace("")) if pf != "" { f, err := os.Create(pf) if err != nil { panic(err) } pprof.WriteHeapProfile(f) f.Close() } }
func (this KeyValue) exportForGroupped(printer Printer, group, sortCol string, top int) { defer T.Un(T.Trace("")) for _, grp := range this.Groups() { if group != "" && grp != group { continue } kvGroup := this.newByGroup(grp) // a new kv just for this group kvGroup.OutputGroup(printer, grp, sortCol, top) println() } }
func (this KeyValue) exportForNonGrouped(printer Printer, top int) { defer T.Un(T.Trace("")) s := NewSort(this) s.Sort(SORT_BY_VALUE, SORT_ORDER_DESC) // sort by value desc sortedKeys := s.keys if top > 0 && top < len(sortedKeys) { sortedKeys = sortedKeys[:top] } for _, k := range sortedKeys { _ = printer.(Printrer).Printr(k, this[k].(KeyValue)) // return sql dml statement, usually 'insert into' } }
// 每个worker向chan写入的次数: // chOutProgress: N // chOutMap: 1 for each parsed line // chOutWorker: 1 func (this *Worker) run(chOutProgress chan<- int, chOutMap chan<- mr.KeyValue, chOutWorker chan<- Worker) { defer T.Un(T.Trace("")) this.StartAt = time.Now() var input *stream.Stream if this.manager.option.filemode { input = stream.NewStream(stream.PLAIN_FILE, this.filename) } else { input = stream.NewStream(stream.EXEC_PIPE, LZOP_CMD, LZOP_OPTION, this.filename) } input.Open() defer input.Close() this.Printf("%s worker[%d] opened %s, start to Map...\n", this.kind, this.seq, this.BaseFilename()) for { line, err := input.Reader().ReadString(EOL) if err != nil { if err != io.EOF { log.Fatal(err) } break } this.RawLines++ if chOutProgress != nil && this.RawLines%PROGRESS_LINES_STEP == 0 { // report progress chOutProgress <- PROGRESS_LINES_STEP } if !this.self.IsLineValid(line) { continue } this.ValidLines++ // run map for this line // for pipe stream flush to work, we can't strip EOL this.self.Map(line, chOutMap) } this.EndAt = time.Now() chOutWorker <- *this this.Printf("%s worker[%d] %s done, parsed: %d/%d, duration: %v\n", this.kind, this.seq, this.BaseFilename(), this.ValidLines, this.RawLines, this.Duration()) }
func displaySummary(logger *log.Logger, start time.Time, files, rawLines, validLines int) { defer T.Un(T.Trace("")) delta := time.Since(start) summary := fmt.Sprintf("Parsed %s/%s(%.4f%s) lines in %d files within %s [%.1f lines per second]\n", size.Comma(int64(validLines)), size.Comma(int64(rawLines)), 100*float64(validLines)/float64(rawLines), "%%", files, delta, float64(rawLines)/delta.Seconds()) // render to both log and stderr logger.Print(summary) fmt.Fprintf(os.Stderr, summary) }
func (this *Worker) initExternalMapper() *stream.Stream { defer T.Un(T.Trace("")) mapper := this.manager.option.mapper if mapper != "" { stream := stream.NewStream(stream.EXEC_PIPE, mapper) if err := stream.Open(); err != nil { this.Fatal(err) } this.Printf("external mapper stream opened: %s\n", mapper) this.mapReader = stream.Reader() this.mapWriter = stream.Writer() return stream } return nil }
// Submit the job and start the job func (this *Manager) Submit() (err error) { defer T.Un(T.Trace("")) // safely: collection the panic's defer func() { if r := recover(); r != nil { var ok bool if err, ok = r.(error); !ok { err = fmt.Errorf("manager: %v", r) } } }() this.Println("submitted job accepted") chMap := make(chan mr.KeyValue, this.workersCount()*LINE_CHANBUF_PER_WORKER) chWorker := make(chan Worker, this.workersCount()) this.chWorkersDone = make(chan mr.KeyValue) // create workers first this.newWorkers() // TODO go this.trapSignal() if this.ticker != nil { go this.runTicker() } if this.option.progress { this.chProgress = make(chan int, PROGRESS_CHAN_BUF) go this.showProgress() } // collect all workers output chRateLimit := this.initRateLimit() go this.collectWorkers(chRateLimit, chMap, chWorker) // launch workers in chunk go this.launchWorkers(chRateLimit, chMap, chWorker) return }
func (this *Worker) SafeRun(chOutProgress chan<- int, chOutMap chan<- mr.KeyValue, chOutWorker chan<- Worker) { defer T.Un(T.Trace("")) // recover to make this worker safe for other workers defer func() { if err := recover(); err != nil { this.Println("panic recovered:", err) panic(err) } }() if this.manager.option.debug { fmt.Fprintln(os.Stderr, this) } if mapper := this.initExternalMapper(); mapper != nil { defer mapper.Close() } this.run(chOutProgress, chOutMap, chOutWorker) }
func (this Manager) trapSignal() { defer T.Un(T.Trace("")) ch := make(chan Signal, 10) // register the given channel to receive notifications of the specified signals signal.Notify(ch, caredSignals...) go func() { sig := <-ch fmt.Fprintf(Stderr, "%s signal received...\n", strings.ToUpper(sig.String())) for _, skip := range skippedSignals { if skip == sig { this.Printf("%s signal ignored\n", strings.ToUpper(sig.String())) return } } // not skipped fmt.Fprintf(Stderr, "prepare to shutdown...") this.Shutdown() }() }
// Manager constructor func NewManager(option *Option) *Manager { defer T.Un(T.Trace("")) this := new(Manager) if option.tick > 0 { this.ticker = time.NewTicker(time.Millisecond * time.Duration(option.tick)) } this.Logger = newLogger(option) this.option = option this.lock = new(sync.Mutex) this.logLevel = DefaultLogLevel this.Println("manager created") if this.option.rpc { if e := netapi.StartServer(); e != nil { this.Fatal(e) } this.Println("RPC server startup at", netapi.ADDRS) } return this }
func initialize(option *dlog.Option, err error) { defer T.Un(T.Trace("")) if option.Version() { fmt.Fprintf(os.Stderr, "%s %s %s %s\n", "dlogmon", VERSION, runtime.GOOS, runtime.GOARCH) os.Exit(0) } if err != nil { fmt.Fprintln(os.Stderr, err) os.Exit(1) } // enable gc trace // this will not work, the only way is to setenv before invoke me os.Setenv("GOGCTRACE", "1") // parallel level if os.Getenv(maxprocsenv) == "" { parallel := runtime.NumCPU()/2 + 1 runtime.GOMAXPROCS(parallel) fmt.Fprintf(os.Stderr, "Parallel CPU(core): %d / %d, Concurrent workers: %d\n", parallel, runtime.NumCPU(), option.Nworkers) } fmt.Fprintln(os.Stderr, option.Timespan) // cpu profile if option.Cpuprofile() != "" { f, err := os.Create(option.Cpuprofile()) if err != nil { panic(err) } pprof.StartCPUProfile(f) } }
func main() { // cli options option, err := dlog.ParseFlags() initialize(option, err) // construct the manager manager := dlog.NewManager(option) // mutex pass through T.SetLock(manager.GetLock()) defer T.Un(T.Trace("")) // cpu profile if option.Cpuprofile() != "" { defer pprof.StopCPUProfile() } // timing all the jobs up start := time.Now() manager.Println("about to submit jobs") go manager.Submit() // mem profile dumpMemProfile(option.Memprofile()) manager.Println("waiting for completion...") kvResult := manager.WaitForCompletion() displaySummary(manager.Logger, start, manager.FilesCount(), manager.RawLines, manager.ValidLines) if option.Shell { cliCmdloop(manager.GetOneWorker(), kvResult) } }
// Get any worker of the same type TODO func (this *Manager) GetOneWorker() IWorker { defer T.Un(T.Trace("")) return this.workers[0] }