Exemple #1
0
// Reduce
func (this *FileWorker) Reduce(key interface{}, values []interface{}) (kv mr.KeyValue) {
	const threhold = 0
	var occurence = stats.StatsSum(mr.ConvertAnySliceToFloat(values))
	if occurence > threhold {
		kv = mr.NewKeyValue()
		kv[NIL_KEY] = occurence
	}

	return
}
Exemple #2
0
func (this *AmfWorker) Map(line string, out chan<- mr.KeyValue) {
	req := new(amfRequest)
	req.parseLine(line)

	kv := mr.NewKeyValue()
	key := mr.NewKey(req.class, req.method, req.uri)
	kv[key] = 1

	kv.Emit(out)
}
Exemple #3
0
func (this *UniWorker) Reduce(key interface{}, values []interface{}) (kv mr.KeyValue) {
	// here we don't care about the key
	// we only care about values
	aggregate := stats.StatsSum(mr.ConvertAnySliceToFloat(values))
	if aggregate > 100 {
		kv = mr.NewKeyValue()
		kv[NIL_KEY] = aggregate
	}

	return
}
Exemple #4
0
func (this *UniWorker) Map(line string, out chan<- mr.KeyValue) {
	line = line[:len(line)-1]
	line = strings.Replace(line, "+", " ", 2)
	kv := mr.NewKeyValue()
	rs := re.ReplaceAll([]byte(line), []byte{})
	line = string(rs)
	line = trimAllRune(line, []rune{'>', '~', ';', '.', '-', '*'})
	words := strings.Split(line, " ")
	for _, w := range words {
		line = strings.Trim(line, "  ")
		k := mr.NewKey(w)
		kv[k] = 1
	}

	kv.Emit(out)
}
Exemple #5
0
// Extract meta info related to amf from a valid line
func (this *FileWorker) Map(line string, out chan<- mr.KeyValue) {
	kv := mr.NewKeyValue()
	line = trimAllRune(line, []rune{'=', ':', '+', '.', '-'})
	line = strings.Trim(line, "  ")
	if len(line) == 0 {
		return
	}

	terms := strings.Split(line, " ")
	for i, term := range terms {
		for j := i + 1; j < len(terms); j++ {
			coOccurence := mr.NewKey(strings.TrimSpace(term), strings.TrimSpace(terms[j]))
			kv[coOccurence] = 1

		}

	}

	kv.Emit(out)
}
Exemple #6
0
func (this *KxiWorker) Map(line string, out chan<- mr.KeyValue) {
	var streamResult StreamResult
	if streamResult = this.Worker.streamedResult(line); streamResult.Empty() {
		return
	}

	type record struct {
		Url     string  `json:"u"`
		Rid     string  `json:"i"`
		Service string  `json:"s"`
		Time    float64 `json:"t"`
		Sql     string  `json:"q"`
	}
	rec := new(record)
	if err := streamResult.Decode(rec); err != nil {
		panic(err)
	}

	if this.manager.option.debug {
		fmt.Fprintf(os.Stderr, "DEBUG<= %s %s %s %f %s\n",
			rec.Url, rec.Rid, rec.Service, rec.Time, rec.Sql)
	}

	kv := mr.NewKeyValue()
	kg1 := mr.NewGroupKey(GROUP_URL_SERV, rec.Url, rec.Service)
	kg2 := mr.NewGroupKey(GROUP_URL_RID, rec.Url, rec.Rid)
	kg3 := mr.NewGroupKey(GROUP_KXI, rec.Service)
	kv[kg1] = rec.Time
	kv[kg2] = rec.Time
	kv[kg3] = rec.Time

	if rec.Sql != "" {
		kg4 := mr.NewGroupKey(GROUP_URL_SQL, rec.Url, rec.Sql)
		kv[kg4] = rec.Time
	}

	kg5 := mr.NewGroupKey(GROUP_URL, rec.Url)
	kv[kg5] = rec.Rid // key is url, val is rid(string)

	kv.Emit(out)
}
Exemple #7
0
// The key is already sorted
func (this *KxiWorker) Reduce(key interface{}, values []interface{}) (kv mr.KeyValue) {
	kv = mr.NewKeyValue()
	switch key.(mr.GroupKey).Group() {
	case GROUP_URL_SERV:
		vals := mr.ConvertAnySliceToFloat(values)
		kv[TIME_ALL] = stats.StatsSum(vals)
		kv[TIME_MAX] = stats.StatsMax(vals)
		kv[TIME_TOP] = stats.StatsSumTopN(vals, topsum)
		kv[TIME_AVG] = stats.StatsMean(vals)
		kv[TIME_STD] = stats.StatsSampleStandardDeviationCoefficient(vals)
		kv[CALL_ALL] = float64(stats.StatsCount(vals))
	case GROUP_KXI:
		vals := mr.ConvertAnySliceToFloat(values)
		kv[TIME_ALL] = stats.StatsSum(vals)
		kv[TIME_MIN] = stats.StatsMin(vals)
		kv[TIME_TOP] = stats.StatsSumTopN(vals, topsum)
		kv[TIME_MAX] = stats.StatsMax(vals)
		kv[TIME_AVG] = stats.StatsMean(vals)
		kv[TIME_STD] = stats.StatsSampleStandardDeviationCoefficient(vals)
		kv[CALL_ALL] = float64(stats.StatsCount(vals))
	case GROUP_URL_RID:
		vals := mr.ConvertAnySliceToFloat(values)
		kv[CALL_ALL] = float64(stats.StatsCount(vals))
		kv[TIME_ALL] = stats.StatsSum(vals)
	case GROUP_URL_SQL:
		vals := mr.ConvertAnySliceToFloat(values)
		kv[CALL_ALL] = float64(stats.StatsCount(vals))
		kv[TIME_MAX] = stats.StatsMax(vals)
		kv[TIME_AVG] = stats.StatsMean(vals)
	case GROUP_URL:
		vals := mr.ConvertAnySliceToString(values) // rids of this url
		c := stats.NewCounter(vals)
		kv[REQ_ALL] = float64(len(c))
	}

	return
}
Exemple #8
0
func (this *NoopWorker) Map(line string, out chan<- mr.KeyValue) {
	out <- mr.NewKeyValue()
}