// Reduce func (this *FileWorker) Reduce(key interface{}, values []interface{}) (kv mr.KeyValue) { const threhold = 0 var occurence = stats.StatsSum(mr.ConvertAnySliceToFloat(values)) if occurence > threhold { kv = mr.NewKeyValue() kv[NIL_KEY] = occurence } return }
func (this *AmfWorker) Map(line string, out chan<- mr.KeyValue) { req := new(amfRequest) req.parseLine(line) kv := mr.NewKeyValue() key := mr.NewKey(req.class, req.method, req.uri) kv[key] = 1 kv.Emit(out) }
func (this *UniWorker) Reduce(key interface{}, values []interface{}) (kv mr.KeyValue) { // here we don't care about the key // we only care about values aggregate := stats.StatsSum(mr.ConvertAnySliceToFloat(values)) if aggregate > 100 { kv = mr.NewKeyValue() kv[NIL_KEY] = aggregate } return }
func (this *UniWorker) Map(line string, out chan<- mr.KeyValue) { line = line[:len(line)-1] line = strings.Replace(line, "+", " ", 2) kv := mr.NewKeyValue() rs := re.ReplaceAll([]byte(line), []byte{}) line = string(rs) line = trimAllRune(line, []rune{'>', '~', ';', '.', '-', '*'}) words := strings.Split(line, " ") for _, w := range words { line = strings.Trim(line, " ") k := mr.NewKey(w) kv[k] = 1 } kv.Emit(out) }
// Extract meta info related to amf from a valid line func (this *FileWorker) Map(line string, out chan<- mr.KeyValue) { kv := mr.NewKeyValue() line = trimAllRune(line, []rune{'=', ':', '+', '.', '-'}) line = strings.Trim(line, " ") if len(line) == 0 { return } terms := strings.Split(line, " ") for i, term := range terms { for j := i + 1; j < len(terms); j++ { coOccurence := mr.NewKey(strings.TrimSpace(term), strings.TrimSpace(terms[j])) kv[coOccurence] = 1 } } kv.Emit(out) }
func (this *KxiWorker) Map(line string, out chan<- mr.KeyValue) { var streamResult StreamResult if streamResult = this.Worker.streamedResult(line); streamResult.Empty() { return } type record struct { Url string `json:"u"` Rid string `json:"i"` Service string `json:"s"` Time float64 `json:"t"` Sql string `json:"q"` } rec := new(record) if err := streamResult.Decode(rec); err != nil { panic(err) } if this.manager.option.debug { fmt.Fprintf(os.Stderr, "DEBUG<= %s %s %s %f %s\n", rec.Url, rec.Rid, rec.Service, rec.Time, rec.Sql) } kv := mr.NewKeyValue() kg1 := mr.NewGroupKey(GROUP_URL_SERV, rec.Url, rec.Service) kg2 := mr.NewGroupKey(GROUP_URL_RID, rec.Url, rec.Rid) kg3 := mr.NewGroupKey(GROUP_KXI, rec.Service) kv[kg1] = rec.Time kv[kg2] = rec.Time kv[kg3] = rec.Time if rec.Sql != "" { kg4 := mr.NewGroupKey(GROUP_URL_SQL, rec.Url, rec.Sql) kv[kg4] = rec.Time } kg5 := mr.NewGroupKey(GROUP_URL, rec.Url) kv[kg5] = rec.Rid // key is url, val is rid(string) kv.Emit(out) }
// The key is already sorted func (this *KxiWorker) Reduce(key interface{}, values []interface{}) (kv mr.KeyValue) { kv = mr.NewKeyValue() switch key.(mr.GroupKey).Group() { case GROUP_URL_SERV: vals := mr.ConvertAnySliceToFloat(values) kv[TIME_ALL] = stats.StatsSum(vals) kv[TIME_MAX] = stats.StatsMax(vals) kv[TIME_TOP] = stats.StatsSumTopN(vals, topsum) kv[TIME_AVG] = stats.StatsMean(vals) kv[TIME_STD] = stats.StatsSampleStandardDeviationCoefficient(vals) kv[CALL_ALL] = float64(stats.StatsCount(vals)) case GROUP_KXI: vals := mr.ConvertAnySliceToFloat(values) kv[TIME_ALL] = stats.StatsSum(vals) kv[TIME_MIN] = stats.StatsMin(vals) kv[TIME_TOP] = stats.StatsSumTopN(vals, topsum) kv[TIME_MAX] = stats.StatsMax(vals) kv[TIME_AVG] = stats.StatsMean(vals) kv[TIME_STD] = stats.StatsSampleStandardDeviationCoefficient(vals) kv[CALL_ALL] = float64(stats.StatsCount(vals)) case GROUP_URL_RID: vals := mr.ConvertAnySliceToFloat(values) kv[CALL_ALL] = float64(stats.StatsCount(vals)) kv[TIME_ALL] = stats.StatsSum(vals) case GROUP_URL_SQL: vals := mr.ConvertAnySliceToFloat(values) kv[CALL_ALL] = float64(stats.StatsCount(vals)) kv[TIME_MAX] = stats.StatsMax(vals) kv[TIME_AVG] = stats.StatsMean(vals) case GROUP_URL: vals := mr.ConvertAnySliceToString(values) // rids of this url c := stats.NewCounter(vals) kv[REQ_ALL] = float64(len(c)) } return }
func (this *NoopWorker) Map(line string, out chan<- mr.KeyValue) { out <- mr.NewKeyValue() }