Example #1
0
func TestFlattenFlatMap(t *testing.T) {
	m := data.Map{
		"abc": data.Float(123),
		"def": data.Float(456),
		"ghi": data.Float(789),
	}

	Convey("Given a flat data.Map", t, func() {
		Convey("when flatten it", func() {
			a := []*kv{}
			err := Flatten(m, func(k string, x float32) {
				a = append(a, &kv{
					key:   k,
					value: x,
				})
			})

			Convey("it should succeed.", func() {
				So(err, ShouldBeNil)

				Convey("and the flatten slice should be converted correctly.", func() {
					So(len(a), ShouldEqual, len(m))
					for _, e := range a {
						mValue, ok := m[e.key]
						So(ok, ShouldBeTrue)
						So(e.value, ShouldEqual, mValue)
					}
				})
			})
		})
	})
}
Example #2
0
func (s *source) Parse(line string, lineNo int) (data.Map, error) {
	line = strings.TrimSpace(line)
	fields := strings.Split(line, ",")
	if len(fields) == 0 {
		return nil, perline.Pass
	}
	if len(fields) != len(schema) {
		return nil, fmt.Errorf("invalid csv line at %s:%d", s.path, lineNo)
	}

	fv := data.Map{}
	for i := range schema {
		if schema[i].isString {
			if schema[i].name != "label" {
				// key names on Jubatus
				key := fmt.Sprintf("%s$%s@str#bin/bin", schema[i].name, fields[i])
				fv[key] = data.Float(1)
			}
		} else {
			x, err := strconv.ParseFloat(fields[i], 32)
			if err != nil {
				return nil, fmt.Errorf("invalid input at %s:%d", s.path, lineNo)
			}
			if x != 0 {
				// key names on Jubatus
				key := schema[i].name + "@num"
				fv[key] = data.Float(x)
			}
		}
	}
	return data.Map{
		"feature_vector": fv,
	}, nil
}
Example #3
0
func TestFlattenNestedArray(t *testing.T) {
	m := data.Map{
		"a": data.Array{data.Array{}, data.Array{data.Float(123), data.Float(456), data.Float(789)}},
	}

	Convey("Given a flat data.Map having a data.Array", t, func() {
		Convey("when flatten it", func() {
			a := []*kv{}
			err := Flatten(m, func(k string, x float32) {
				a = append(a, &kv{
					key:   k,
					value: x,
				})
			})

			Convey("it should succeed.", func() {
				So(err, ShouldBeNil)

				Convey("and the flatten slice should be converted correctly.", func() {
					So(len(a), ShouldEqual, 3)
				})
			})
		})
	})
}
Example #4
0
func TestFlattenNestedMap(t *testing.T) {
	m := data.Map{
		"a": data.Float(123),
		"b": data.Map{
			"c": data.Float(456),
			"d": data.Map{},
			"e": data.Float(789),
		},
		"f": data.Map{},
		"g": data.Float(1234),
		"h": data.Map{
			"i": data.Map{
				"j": data.Map{
					"k": data.Map{
						"l": data.Map{
							"m": data.Map{
								"n": data.Map{
									"o": data.Float(5678),
								},
							},
						},
					},
				},
			},
		},
	}
	flattenM := data.Map{
		"a":      data.Float(123),
		"b\x00c": data.Float(456),
		"b\x00e": data.Float(789),
		"g":      data.Float(1234),
		"h\x00i\x00j\x00k\x00l\x00m\x00n\x00o": data.Float(5678),
	}

	Convey("Given a nested data.Map", t, func() {
		a := []*kv{}
		err := Flatten(m, func(k string, x float32) {
			a = append(a, &kv{
				key:   k,
				value: x,
			})
		})

		Convey("it should succeed.", func() {
			So(err, ShouldBeNil)

			Convey("and the flatten slice should be converted correctly.", func() {
				So(len(a), ShouldEqual, len(flattenM))
				for _, e := range a {
					mValue, ok := flattenM[e.key]
					So(ok, ShouldBeTrue)
					So(e.value, ShouldEqual, mValue)
				}
			})
		})
	})
}
func TestPassiveAggressiveStateLoad(t *testing.T) {
	ctx := core.NewContext(nil)
	c := PassiveAggressiveStateCreator{}
	pas, err := c.CreateState(ctx, data.Map{
		"regularization_weight": data.Float(3.402823e+38),
		"sensitivity":           data.Float(0.1),
	})
	if err != nil {
		t.Fatal(err)
	}
	pa := pas.(*PassiveAggressiveState)

	for i := 0; i < 100; i++ {
		if err := pa.Write(ctx, &core.Tuple{
			Data: data.Map{
				"value": data.Float(i),
				"feature_vector": data.Map{
					"n": data.Int(i),
				},
			},
		}); err != nil {
			t.Fatal(err)
		}
	}

	Convey("Given a trained PassiveAggressiveState", t, func() {
		Convey("when saving it", func() {
			buf := bytes.NewBuffer(nil)
			err := pa.Save(ctx, buf, data.Map{})

			Convey("it should succeed.", func() {
				So(err, ShouldBeNil)

				Convey("and the loaded state should be same.", func() {
					pa2, err := c.LoadState(ctx, buf, data.Map{})
					So(err, ShouldBeNil)

					So(pa2, ShouldResemble, pa)

					fv := FeatureVector{
						"n": data.Int(123),
					}
					v, err := pa.pa.Estimate(fv)
					So(err, ShouldBeNil)
					v2, err := pa2.(*PassiveAggressiveState).pa.Estimate(fv)
					So(err, ShouldBeNil)
					So(v2, ShouldResemble, v)
				})
			})
		})
	})
}
Example #6
0
func Example() {
	v := data.Map{
		"labelA": data.Float(2.5),
		"labelB": data.Float(0.7),
		"labelC": data.Float(-1.2),
	}
	s, _ := Softmax(v)
	fmt.Printf("labelA: %0.5f\n", toFloat(s["labelA"]))
	fmt.Printf("labelB: %0.5f\n", toFloat(s["labelB"]))
	fmt.Printf("labelC: %0.5f\n", toFloat(s["labelC"]))

	// Output:
	// labelA: 0.84032
	// labelB: 0.13890
	// labelC: 0.02078
}
Example #7
0
// WeightBinary creates a map having weights of each word. The weight is 1 if
// there's at least one word, or 0 otherwise. Because feature vectors created
// by this function is sparse, all values in resulting maps are 1. In other
// words, instead of having 0 as a value, a key doesn't exist for a word that
// is not in the given array.
func WeightBinary(a []string) data.Map {
	res := data.Map{}
	for _, s := range a {
		res[s] = data.Float(1)
	}
	return res
}
Example #8
0
func unigram(s string) FeatureVector {
	fv := make(FeatureVector)
	for _, r := range s {
		fv[string(r)] = data.Float(1)
	}
	return fv
}
Example #9
0
// jubatus::core::classifier::linear_classifier::classify_with_scores
func (s model) scores(v fVectorForScores) LScores {
	scores := make(LScores)
	for l, w := range s {
		var score float32
		for _, x := range v {
			score += x.value * w[x.dim].Weight
		}
		scores[string(l)] = data.Float(score)
	}
	return scores
}
Example #10
0
func (s *SourceCreator) GenerateStream(ctx *core.Context, w core.Writer) error {
	device := new(Device)
	//	devName := []string{"dev1", "dev2", "dev3", "dev4", "dev5"}
	//	devProb := []float64{0.4, 0.3, 0.15, 0.1, 0.05}
	devName := []string{"dev1", "dev2"}
	devProb := []float64{0.5, 0.5}
	pickDev := func() string {
		r := rand.Float64()
		for i, p := range devProb {
			if r < p {
				return devName[i]
			}
			r -= p
		}
		return devName[len(devName)-1]
	}

	//	device.MakeDevice(pickDev())
	device.num = 0
	temp := &device.sensorData[0]
	humid := &device.sensorData[1]

	for {
		device.ID = pickDev()
		device.num += 1
		temp.MakeData("temp", 0, 30)
		humid.MakeData("humid", 0, 100)

		t := core.NewTuple(data.Map{
			"deviceID": data.String(device.ID),
			"num":      data.Int(device.num),
			"time":     data.Float(float64(time.Now().Second()) + float64(time.Now().Nanosecond())/1e+9),
			temp.ID:    data.Float(float64(temp.value)),
			humid.ID:   data.Float(float64(humid.value)),
		})
		if err := w.Write(ctx, t); err != nil {
			return err
		}
		time.Sleep(s.interval)
	}
}
Example #11
0
func TestAROWStateSaveLoad(t *testing.T) {
	ctx := core.NewContext(nil)
	c := AROWStateCreator{}
	as, err := c.CreateState(ctx, data.Map{
		"regularization_weight": data.Float(0.001),
	})
	if err != nil {
		t.Fatal(err)
	}
	a := as.(*AROWState)

	labels := []data.String{"a", "b", "c", "d"}
	for i := 0; i < 100; i++ {
		if err := a.Write(ctx, &core.Tuple{
			Data: data.Map{
				"label": labels[i%len(labels)],
				"feature_vector": data.Map{
					"n": data.Int(i),
				},
			},
		}); err != nil {
			t.Fatal(err)
		}
	}

	Convey("Given a trained AROWState", t, func() {
		Convey("when saving it", func() {
			buf := bytes.NewBuffer(nil)
			err := a.Save(ctx, buf, data.Map{})

			Convey("it should succeed.", func() {
				So(err, ShouldBeNil)

				Convey("and the loaded state should be same.", func() {
					a2, err := c.LoadState(ctx, buf, data.Map{})
					So(err, ShouldBeNil)

					// Because AROW contains sync.RWMutex, this assertion may
					// fail if its implementation changes.
					So(a2, ShouldResemble, a)

					fv := FeatureVector(data.Map{"n": data.Int(10)})
					s, err := a.arow.Classify(fv)
					So(err, ShouldBeNil)
					s2, err := a2.(*AROWState).arow.Classify(fv)
					So(err, ShouldBeNil)
					So(s2, ShouldResemble, s)
				})
			})
		})
	})
}
Example #12
0
// WeightTF creates a map having a word as a key and its count (i.e. tf) as
// a value.
func WeightTF(a []string) data.Map {
	m := map[string]int{}
	for _, s := range a {
		c := m[s]
		m[s] = c + 1
	}

	res := data.Map{}
	for k, v := range m {
		res[k] = data.Float(v)
	}
	return res
}
Example #13
0
// WeightLogTF creates a map having a word as a key and its log(1 + tf) as
// a value. This function is useful when some words appear too much but
// binary weight isn't sufficient.
func WeightLogTF(a []string) data.Map {
	m := map[string]int{}
	for _, s := range a {
		c := m[s]
		m[s] = c + 1
	}

	res := data.Map{}
	for k, v := range m {
		res[k] = data.Float(math.Log(1 + float64(v)))
	}
	return res
}
Example #14
0
// Softmax calculates softmax.
func Softmax(v data.Map) (data.Map, error) {
	ret := make(data.Map)

	if len(v) == 0 {
		return ret, nil
	}

	// copy values to an array to sort in logSumExp().
	values, err := mapToValues(v)
	if err != nil {
		return nil, err
	}

	lse := logSumExp(values)
	for k, x := range v {
		val, err := data.AsFloat(x)
		if err != nil {
			return nil, err
		}
		ret[k] = data.Float(math.Exp(val - lse))
	}
	return ret, nil
}
Example #15
0
func (s *source) Parse(line string, lineNo int) (data.Map, error) {
	line = strings.TrimSpace(line)
	fields := strings.Split(line, " ")
	if len(fields) == 0 {
		return nil, perline.Pass
	}
	label := fields[0]
	fv := make(data.Map)
	for _, field := range fields[1:] {
		ix := strings.Index(field, ":")
		if ix < 0 {
			return nil, fmt.Errorf("invalid libsvm format at %s:%d", s.path, lineNo)
		}
		v, err := strconv.ParseFloat(field[ix+1:], 32)
		if err != nil {
			return nil, fmt.Errorf("%v at %s:%d", err, s.path, lineNo)
		}
		fv[field[:ix]] = data.Float(v) / 255
	}
	return data.Map{
		"label":          data.String(label),
		"feature_vector": fv,
	}, nil
}
Example #16
0
func (r *source) GenerateStream(ctx *core.Context, w core.Writer) error {
	numFieldNames := []string{
		"家賃(万円)",
		"駅からの徒歩時間 (分)",
		"専有面積 (m*m)",
		"築年数 (年)",
		"階数",
	}

	defer r.file.Close()
	if r.training {
		for {
			line, err := r.readLine()
			if err != nil {
				if err == io.EOF {
					return nil
				}
				return err
			}
			if line[0] == '#' {
				continue
			}
			fields := strings.Split(line, ", ")
			if len(fields) != 6 {
				panic("hoge")
			}
			value, err := data.ToFloat(data.String(fields[0]))
			if err != nil {
				panic(err)
			}
			fv := make(data.Map)
			for i := 1; i < len(numFieldNames); i++ {
				x, err := data.ToFloat(data.String(fields[i]))
				if err != nil {
					panic(err)
				}
				fv[numFieldNames[i]] = data.Float(x)
			}
			fv[fields[len(fields)-1]] = data.Float(1)
			now := time.Now()
			w.Write(ctx, &core.Tuple{
				Data: data.Map{
					"value":          data.Float(value),
					"feature_vector": fv,
				},
				Timestamp:     now,
				ProcTimestamp: now,
			})
		}
	} else {
		fv := make(data.Map)
		i := 1
		for {
			line, err := r.readLine()
			if err != nil {
				if err == io.EOF {
					return nil
				}
				return err
			}
			if line == "" || line[0] == '#' {
				continue
			}
			fields := strings.Split(line, ":")
			if len(fields) != 2 {
				panic("hoge")
			}
			for i := range fields {
				fields[i] = strings.TrimSpace(fields[i])
			}
			if i < len(numFieldNames) {
				x, err := data.ToFloat(data.String(fields[1]))
				if err != nil {
					panic(err)
				}
				fv[numFieldNames[i]] = data.Float(x)
				i++
			} else {
				if fields[0] != "aspect" {
					panic(fields)
				}
				aspect := strings.Trim(fields[1], "\"")
				fv[aspect] = data.Float(1)
				break
			}
		}
		now := time.Now()
		w.Write(ctx, &core.Tuple{
			Data: data.Map{
				"feature_vector": fv,
			},
			Timestamp:     now,
			ProcTimestamp: now,
		})
	}

	return nil
}