func (s *source) Emit(rset []fluentd_forwarder.FluentRecordSet) error { now := time.Now().UTC() for _, rs := range rset { for _, r := range rs.Records { t := &core.Tuple{ ProcTimestamp: now, Timestamp: time.Unix(int64(r.Timestamp), 0), } m, err := data.NewMap(r.Data) if err != nil { s.ctx.ErrLog(err).WithFields(logrus.Fields{ "source_type": s.ioParams.TypeName, "source_name": s.ioParams.Name, "data": r.Data, }).Error("Cannot create a data.Map from the data") continue } m[s.tagField] = data.String(rs.Tag) t.Data = m if err := s.w.Write(s.ctx, t); err != nil { s.ctx.ErrLog(err).WithFields(logrus.Fields{ "source_type": s.ioParams.TypeName, "source_name": s.ioParams.Name, }).Error("Cannot write a tuple") } } } return nil }
func TestLightLOFStaateSaveLoad(t *testing.T) { ctx := core.NewContext(nil) c := LightLOFStateCreator{} ls, err := c.CreateState(ctx, data.Map{ "nearest_neighbor_algorithm": data.String("minhash"), "hash_num": data.Int(64), "nearest_neighbor_num": data.Int(10), "reverse_nearest_neighbor_num": data.Int(30), }) if err != nil { t.Fatal(err) } l := ls.(*lightLOFState) for i := 0; i < 100; i++ { if err := l.Write(ctx, &core.Tuple{ Data: data.Map{ "feature_vector": data.Map{ "n": data.Int(i), }, }, }); err != nil { t.Fatal(err) } } Convey("Given a trained LightLOFState", t, func() { Convey("when saving it", func() { buf := bytes.NewBuffer(nil) err := l.Save(ctx, buf, data.Map{}) Convey("it should succeed.", func() { So(err, ShouldBeNil) Convey("and the loaded state should be same.", func() { l2, err := c.LoadState(ctx, buf, data.Map{}) So(err, ShouldBeNil) m := l.lightLOF m2 := l2.(*lightLOFState).lightLOF So(m2.nn, ShouldResemble, m.nn) So(m2.nnNum, ShouldEqual, m.nnNum) So(m2.rnnNum, ShouldEqual, m.rnnNum) So(m2.kdists, ShouldResemble, m.kdists) So(m2.lrds, ShouldResemble, m.lrds) So(m2.maxSize, ShouldEqual, m.maxSize) So(m2.rg, ShouldNotBeNil) fv := FeatureVector(data.Map{"n": data.Int(10)}) s, err := l.lightLOF.CalcScore(fv) So(err, ShouldBeNil) s2, err := l2.(*lightLOFState).lightLOF.CalcScore(fv) So(err, ShouldBeNil) So(s2, ShouldResemble, s) }) }) }) }) }
// GenerateStream generates a tuple having random sentences in its field with // information of a user. func (s *Sentences) GenerateStream(ctx *core.Context, w core.Writer) error { corpus := strings.Split(strings.Replace(`lorem ipsum dolor sit amet consectetur adipiscing elit sed do eiusmod tempor incididunt ut labore et dolore magna aliqua ut enim ad minim veniam quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur excepteur sint occaecat cupidatat non proident sunt in culpa qui officia deserunt mollit anim id est laborum`, "\n", " ", -1), " ") users := []string{"isabella", "jacob", "sophia", "ethan", "emma"} usersProb := []float64{0.4, 0.3, 0.15, 0.1, 0.05} pickName := func() string { r := rand.Float64() for i, p := range usersProb { if r < p { return users[i] } r -= p } return users[len(users)-1] } for { l := rand.Intn(5) + 5 text := make([]string, l) l-- for ; l >= 0; l-- { text[l] = corpus[rand.Intn(len(corpus))] } t := core.NewTuple(data.Map{ "name": data.String(pickName()), "text": data.String(strings.Join(text, " ")), }) if err := w.Write(ctx, t); err != nil { return err } time.Sleep(s.interval) } }
func (l *LoremSource) GenerateStream(ctx *core.Context, w core.Writer) error { for { var text []string for l := rand.Intn(5) + 5; l > 0; l-- { text = append(text, Lorem[rand.Intn(len(Lorem))]) } t := core.NewTuple(data.Map{ "text": data.String(strings.Join(text, " ")), }) if err := w.Write(ctx, t); err != nil { return err } time.Sleep(l.interval) } }
// Processe implements udf.UDSF.Process. It tokenizes a field of tuples. func (t *Tokenizer) Process(ctx *core.Context, tuple *core.Tuple, w core.Writer) error { var kwd []string if v, ok := tuple.Data[t.field]; !ok { return fmt.Errorf("the tuple doesn't have the required field: %v", t.field) } else if s, err := data.AsString(v); err != nil { return fmt.Errorf("'%v' field must be string: %v", t.field, err) } else { kwd = strings.Split(s, " ") } for _, k := range kwd { out := tuple.Copy() out.Data[t.field] = data.String(k) if err := w.Write(ctx, out); err != nil { return err } } return nil }
func (s *SourceCreator) GenerateStream(ctx *core.Context, w core.Writer) error { device := new(Device) // devName := []string{"dev1", "dev2", "dev3", "dev4", "dev5"} // devProb := []float64{0.4, 0.3, 0.15, 0.1, 0.05} devName := []string{"dev1", "dev2"} devProb := []float64{0.5, 0.5} pickDev := func() string { r := rand.Float64() for i, p := range devProb { if r < p { return devName[i] } r -= p } return devName[len(devName)-1] } // device.MakeDevice(pickDev()) device.num = 0 temp := &device.sensorData[0] humid := &device.sensorData[1] for { device.ID = pickDev() device.num += 1 temp.MakeData("temp", 0, 30) humid.MakeData("humid", 0, 100) t := core.NewTuple(data.Map{ "deviceID": data.String(device.ID), "num": data.Int(device.num), "time": data.Float(float64(time.Now().Second()) + float64(time.Now().Nanosecond())/1e+9), temp.ID: data.Float(float64(temp.value)), humid.ID: data.Float(float64(humid.value)), }) if err := w.Write(ctx, t); err != nil { return err } time.Sleep(s.interval) } }
func (s *source) Parse(line string, lineNo int) (data.Map, error) { line = strings.TrimSpace(line) fields := strings.Split(line, " ") if len(fields) == 0 { return nil, perline.Pass } label := fields[0] fv := make(data.Map) for _, field := range fields[1:] { ix := strings.Index(field, ":") if ix < 0 { return nil, fmt.Errorf("invalid libsvm format at %s:%d", s.path, lineNo) } v, err := strconv.ParseFloat(field[ix+1:], 32) if err != nil { return nil, fmt.Errorf("%v at %s:%d", err, s.path, lineNo) } fv[field[:ix]] = data.Float(v) / 255 } return data.Map{ "label": data.String(label), "feature_vector": fv, }, nil }
func (r *source) GenerateStream(ctx *core.Context, w core.Writer) error { numFieldNames := []string{ "家賃(万円)", "駅からの徒歩時間 (分)", "専有面積 (m*m)", "築年数 (年)", "階数", } defer r.file.Close() if r.training { for { line, err := r.readLine() if err != nil { if err == io.EOF { return nil } return err } if line[0] == '#' { continue } fields := strings.Split(line, ", ") if len(fields) != 6 { panic("hoge") } value, err := data.ToFloat(data.String(fields[0])) if err != nil { panic(err) } fv := make(data.Map) for i := 1; i < len(numFieldNames); i++ { x, err := data.ToFloat(data.String(fields[i])) if err != nil { panic(err) } fv[numFieldNames[i]] = data.Float(x) } fv[fields[len(fields)-1]] = data.Float(1) now := time.Now() w.Write(ctx, &core.Tuple{ Data: data.Map{ "value": data.Float(value), "feature_vector": fv, }, Timestamp: now, ProcTimestamp: now, }) } } else { fv := make(data.Map) i := 1 for { line, err := r.readLine() if err != nil { if err == io.EOF { return nil } return err } if line == "" || line[0] == '#' { continue } fields := strings.Split(line, ":") if len(fields) != 2 { panic("hoge") } for i := range fields { fields[i] = strings.TrimSpace(fields[i]) } if i < len(numFieldNames) { x, err := data.ToFloat(data.String(fields[1])) if err != nil { panic(err) } fv[numFieldNames[i]] = data.Float(x) i++ } else { if fields[0] != "aspect" { panic(fields) } aspect := strings.Trim(fields[1], "\"") fv[aspect] = data.Float(1) break } } now := time.Now() w.Write(ctx, &core.Tuple{ Data: data.Map{ "feature_vector": fv, }, Timestamp: now, ProcTimestamp: now, }) } return nil }
func TestKeyFile(t *testing.T) { tempPath := func() string { f, err := ioutil.TempFile("", "twitter-plugin-test-key-file") if err != nil { t.Fatal("Cannot create a temp file:", err) } defer f.Close() _, err = f.WriteString(`consumer_key: abc consumer_secret: def access_token: ghi access_token_secret: jkl`) if err != nil { t.Fatal("Cannot write a key information to the temp file:", err) } return f.Name() }() defer os.Remove(tempPath) Convey("Given an api key file", t, func() { params := data.Map{ "key_file": data.String(tempPath), } Convey("when creating apiKey", func() { keys, err := getKeyParameters(params) So(err, ShouldBeNil) Convey("consumer_key should have the correct value", func() { So(keys.ConsumerKey, ShouldEqual, "abc") }) Convey("consumer_secret should have the correct value", func() { So(keys.ConsumerSecret, ShouldEqual, "def") }) Convey("access_token should have the correct value", func() { So(keys.AccessToken, ShouldEqual, "ghi") }) Convey("access_token_secret should have the correct value", func() { So(keys.AccessTokenSecret, ShouldEqual, "jkl") }) }) Convey("when creating a source from it", func() { _, err := CreatePublicStreamSource(nil, nil, params) Convey("it should succeed", func() { So(err, ShouldBeNil) }) }) Convey("when creating apiKey with other key parameters", func() { params, err := data.NewMap(map[string]interface{}{ "key_file": tempPath, "consumer_key": "_abc", "consumer_secret": "_def", "access_token": "_ghi", "access_token_secret": "_jkl", }) So(err, ShouldBeNil) keys, err := getKeyParameters(params) So(err, ShouldBeNil) Convey("key_file parameter should be preferred", func() { So(keys.ConsumerKey, ShouldEqual, "abc") So(keys.ConsumerSecret, ShouldEqual, "def") So(keys.AccessToken, ShouldEqual, "ghi") So(keys.AccessTokenSecret, ShouldEqual, "jkl") }) }) }) }