func TestFlattenFlatMap(t *testing.T) { m := data.Map{ "abc": data.Float(123), "def": data.Float(456), "ghi": data.Float(789), } Convey("Given a flat data.Map", t, func() { Convey("when flatten it", func() { a := []*kv{} err := Flatten(m, func(k string, x float32) { a = append(a, &kv{ key: k, value: x, }) }) Convey("it should succeed.", func() { So(err, ShouldBeNil) Convey("and the flatten slice should be converted correctly.", func() { So(len(a), ShouldEqual, len(m)) for _, e := range a { mValue, ok := m[e.key] So(ok, ShouldBeTrue) So(e.value, ShouldEqual, mValue) } }) }) }) }) }
func (s *source) Parse(line string, lineNo int) (data.Map, error) { line = strings.TrimSpace(line) fields := strings.Split(line, ",") if len(fields) == 0 { return nil, perline.Pass } if len(fields) != len(schema) { return nil, fmt.Errorf("invalid csv line at %s:%d", s.path, lineNo) } fv := data.Map{} for i := range schema { if schema[i].isString { if schema[i].name != "label" { // key names on Jubatus key := fmt.Sprintf("%s$%s@str#bin/bin", schema[i].name, fields[i]) fv[key] = data.Float(1) } } else { x, err := strconv.ParseFloat(fields[i], 32) if err != nil { return nil, fmt.Errorf("invalid input at %s:%d", s.path, lineNo) } if x != 0 { // key names on Jubatus key := schema[i].name + "@num" fv[key] = data.Float(x) } } } return data.Map{ "feature_vector": fv, }, nil }
func TestFlattenNestedArray(t *testing.T) { m := data.Map{ "a": data.Array{data.Array{}, data.Array{data.Float(123), data.Float(456), data.Float(789)}}, } Convey("Given a flat data.Map having a data.Array", t, func() { Convey("when flatten it", func() { a := []*kv{} err := Flatten(m, func(k string, x float32) { a = append(a, &kv{ key: k, value: x, }) }) Convey("it should succeed.", func() { So(err, ShouldBeNil) Convey("and the flatten slice should be converted correctly.", func() { So(len(a), ShouldEqual, 3) }) }) }) }) }
func TestFlattenNestedMap(t *testing.T) { m := data.Map{ "a": data.Float(123), "b": data.Map{ "c": data.Float(456), "d": data.Map{}, "e": data.Float(789), }, "f": data.Map{}, "g": data.Float(1234), "h": data.Map{ "i": data.Map{ "j": data.Map{ "k": data.Map{ "l": data.Map{ "m": data.Map{ "n": data.Map{ "o": data.Float(5678), }, }, }, }, }, }, }, } flattenM := data.Map{ "a": data.Float(123), "b\x00c": data.Float(456), "b\x00e": data.Float(789), "g": data.Float(1234), "h\x00i\x00j\x00k\x00l\x00m\x00n\x00o": data.Float(5678), } Convey("Given a nested data.Map", t, func() { a := []*kv{} err := Flatten(m, func(k string, x float32) { a = append(a, &kv{ key: k, value: x, }) }) Convey("it should succeed.", func() { So(err, ShouldBeNil) Convey("and the flatten slice should be converted correctly.", func() { So(len(a), ShouldEqual, len(flattenM)) for _, e := range a { mValue, ok := flattenM[e.key] So(ok, ShouldBeTrue) So(e.value, ShouldEqual, mValue) } }) }) }) }
func TestPassiveAggressiveStateLoad(t *testing.T) { ctx := core.NewContext(nil) c := PassiveAggressiveStateCreator{} pas, err := c.CreateState(ctx, data.Map{ "regularization_weight": data.Float(3.402823e+38), "sensitivity": data.Float(0.1), }) if err != nil { t.Fatal(err) } pa := pas.(*PassiveAggressiveState) for i := 0; i < 100; i++ { if err := pa.Write(ctx, &core.Tuple{ Data: data.Map{ "value": data.Float(i), "feature_vector": data.Map{ "n": data.Int(i), }, }, }); err != nil { t.Fatal(err) } } Convey("Given a trained PassiveAggressiveState", t, func() { Convey("when saving it", func() { buf := bytes.NewBuffer(nil) err := pa.Save(ctx, buf, data.Map{}) Convey("it should succeed.", func() { So(err, ShouldBeNil) Convey("and the loaded state should be same.", func() { pa2, err := c.LoadState(ctx, buf, data.Map{}) So(err, ShouldBeNil) So(pa2, ShouldResemble, pa) fv := FeatureVector{ "n": data.Int(123), } v, err := pa.pa.Estimate(fv) So(err, ShouldBeNil) v2, err := pa2.(*PassiveAggressiveState).pa.Estimate(fv) So(err, ShouldBeNil) So(v2, ShouldResemble, v) }) }) }) }) }
func Example() { v := data.Map{ "labelA": data.Float(2.5), "labelB": data.Float(0.7), "labelC": data.Float(-1.2), } s, _ := Softmax(v) fmt.Printf("labelA: %0.5f\n", toFloat(s["labelA"])) fmt.Printf("labelB: %0.5f\n", toFloat(s["labelB"])) fmt.Printf("labelC: %0.5f\n", toFloat(s["labelC"])) // Output: // labelA: 0.84032 // labelB: 0.13890 // labelC: 0.02078 }
// WeightBinary creates a map having weights of each word. The weight is 1 if // there's at least one word, or 0 otherwise. Because feature vectors created // by this function is sparse, all values in resulting maps are 1. In other // words, instead of having 0 as a value, a key doesn't exist for a word that // is not in the given array. func WeightBinary(a []string) data.Map { res := data.Map{} for _, s := range a { res[s] = data.Float(1) } return res }
func unigram(s string) FeatureVector { fv := make(FeatureVector) for _, r := range s { fv[string(r)] = data.Float(1) } return fv }
// jubatus::core::classifier::linear_classifier::classify_with_scores func (s model) scores(v fVectorForScores) LScores { scores := make(LScores) for l, w := range s { var score float32 for _, x := range v { score += x.value * w[x.dim].Weight } scores[string(l)] = data.Float(score) } return scores }
func (s *SourceCreator) GenerateStream(ctx *core.Context, w core.Writer) error { device := new(Device) // devName := []string{"dev1", "dev2", "dev3", "dev4", "dev5"} // devProb := []float64{0.4, 0.3, 0.15, 0.1, 0.05} devName := []string{"dev1", "dev2"} devProb := []float64{0.5, 0.5} pickDev := func() string { r := rand.Float64() for i, p := range devProb { if r < p { return devName[i] } r -= p } return devName[len(devName)-1] } // device.MakeDevice(pickDev()) device.num = 0 temp := &device.sensorData[0] humid := &device.sensorData[1] for { device.ID = pickDev() device.num += 1 temp.MakeData("temp", 0, 30) humid.MakeData("humid", 0, 100) t := core.NewTuple(data.Map{ "deviceID": data.String(device.ID), "num": data.Int(device.num), "time": data.Float(float64(time.Now().Second()) + float64(time.Now().Nanosecond())/1e+9), temp.ID: data.Float(float64(temp.value)), humid.ID: data.Float(float64(humid.value)), }) if err := w.Write(ctx, t); err != nil { return err } time.Sleep(s.interval) } }
func TestAROWStateSaveLoad(t *testing.T) { ctx := core.NewContext(nil) c := AROWStateCreator{} as, err := c.CreateState(ctx, data.Map{ "regularization_weight": data.Float(0.001), }) if err != nil { t.Fatal(err) } a := as.(*AROWState) labels := []data.String{"a", "b", "c", "d"} for i := 0; i < 100; i++ { if err := a.Write(ctx, &core.Tuple{ Data: data.Map{ "label": labels[i%len(labels)], "feature_vector": data.Map{ "n": data.Int(i), }, }, }); err != nil { t.Fatal(err) } } Convey("Given a trained AROWState", t, func() { Convey("when saving it", func() { buf := bytes.NewBuffer(nil) err := a.Save(ctx, buf, data.Map{}) Convey("it should succeed.", func() { So(err, ShouldBeNil) Convey("and the loaded state should be same.", func() { a2, err := c.LoadState(ctx, buf, data.Map{}) So(err, ShouldBeNil) // Because AROW contains sync.RWMutex, this assertion may // fail if its implementation changes. So(a2, ShouldResemble, a) fv := FeatureVector(data.Map{"n": data.Int(10)}) s, err := a.arow.Classify(fv) So(err, ShouldBeNil) s2, err := a2.(*AROWState).arow.Classify(fv) So(err, ShouldBeNil) So(s2, ShouldResemble, s) }) }) }) }) }
// WeightTF creates a map having a word as a key and its count (i.e. tf) as // a value. func WeightTF(a []string) data.Map { m := map[string]int{} for _, s := range a { c := m[s] m[s] = c + 1 } res := data.Map{} for k, v := range m { res[k] = data.Float(v) } return res }
// WeightLogTF creates a map having a word as a key and its log(1 + tf) as // a value. This function is useful when some words appear too much but // binary weight isn't sufficient. func WeightLogTF(a []string) data.Map { m := map[string]int{} for _, s := range a { c := m[s] m[s] = c + 1 } res := data.Map{} for k, v := range m { res[k] = data.Float(math.Log(1 + float64(v))) } return res }
// Softmax calculates softmax. func Softmax(v data.Map) (data.Map, error) { ret := make(data.Map) if len(v) == 0 { return ret, nil } // copy values to an array to sort in logSumExp(). values, err := mapToValues(v) if err != nil { return nil, err } lse := logSumExp(values) for k, x := range v { val, err := data.AsFloat(x) if err != nil { return nil, err } ret[k] = data.Float(math.Exp(val - lse)) } return ret, nil }
func (s *source) Parse(line string, lineNo int) (data.Map, error) { line = strings.TrimSpace(line) fields := strings.Split(line, " ") if len(fields) == 0 { return nil, perline.Pass } label := fields[0] fv := make(data.Map) for _, field := range fields[1:] { ix := strings.Index(field, ":") if ix < 0 { return nil, fmt.Errorf("invalid libsvm format at %s:%d", s.path, lineNo) } v, err := strconv.ParseFloat(field[ix+1:], 32) if err != nil { return nil, fmt.Errorf("%v at %s:%d", err, s.path, lineNo) } fv[field[:ix]] = data.Float(v) / 255 } return data.Map{ "label": data.String(label), "feature_vector": fv, }, nil }
func (r *source) GenerateStream(ctx *core.Context, w core.Writer) error { numFieldNames := []string{ "家賃(万円)", "駅からの徒歩時間 (分)", "専有面積 (m*m)", "築年数 (年)", "階数", } defer r.file.Close() if r.training { for { line, err := r.readLine() if err != nil { if err == io.EOF { return nil } return err } if line[0] == '#' { continue } fields := strings.Split(line, ", ") if len(fields) != 6 { panic("hoge") } value, err := data.ToFloat(data.String(fields[0])) if err != nil { panic(err) } fv := make(data.Map) for i := 1; i < len(numFieldNames); i++ { x, err := data.ToFloat(data.String(fields[i])) if err != nil { panic(err) } fv[numFieldNames[i]] = data.Float(x) } fv[fields[len(fields)-1]] = data.Float(1) now := time.Now() w.Write(ctx, &core.Tuple{ Data: data.Map{ "value": data.Float(value), "feature_vector": fv, }, Timestamp: now, ProcTimestamp: now, }) } } else { fv := make(data.Map) i := 1 for { line, err := r.readLine() if err != nil { if err == io.EOF { return nil } return err } if line == "" || line[0] == '#' { continue } fields := strings.Split(line, ":") if len(fields) != 2 { panic("hoge") } for i := range fields { fields[i] = strings.TrimSpace(fields[i]) } if i < len(numFieldNames) { x, err := data.ToFloat(data.String(fields[1])) if err != nil { panic(err) } fv[numFieldNames[i]] = data.Float(x) i++ } else { if fields[0] != "aspect" { panic(fields) } aspect := strings.Trim(fields[1], "\"") fv[aspect] = data.Float(1) break } } now := time.Now() w.Write(ctx, &core.Tuple{ Data: data.Map{ "feature_vector": fv, }, Timestamp: now, ProcTimestamp: now, }) } return nil }