func (t *Ticker) Process(ctx *core.Context, tuple *core.Tuple, w core.Writer) error { var i int64 for ; atomic.LoadInt32(&t.stopped) == 0; i++ { newTuple := core.NewTuple(data.Map{"tick": data.Int(i)}) if err := w.Write(ctx, newTuple); err != nil { return err } time.Sleep(t.interval) } return nil }
func (c *Collector) Process(ctx *core.Context, tuple *core.Tuple, w core.Writer) error { curTime := int(time.Now().Nanosecond() / 1e+8) if curTime == c.check { return nil } else { c.check = curTime if err := w.Write(ctx, tuple); err != nil { return err } } // time.Sleep(c.interval) return nil }
func (p *publicStream) GenerateStream(ctx *core.Context, w core.Writer) error { anaconda.SetConsumerKey(p.keys.ConsumerKey) anaconda.SetConsumerSecret(p.keys.ConsumerSecret) api := anaconda.NewTwitterApi(p.keys.AccessToken, p.keys.AccessTokenSecret) defer api.Close() s := api.PublicStreamSample(url.Values{}) defer s.Stop() for twRaw := range s.C { tw, ok := twRaw.(anaconda.Tweet) if !ok { // only processes tweets continue } createdAt, err := tw.CreatedAtTime() if err != nil { ctx.ErrLog(err).WithField("node_type", core.NTSource). WithField("node_name", p.ioParams.Name). Error("Cannot parse created at") continue } // anaconda.Tweet needs to be converted to data.Map via JSON. js, err := json.Marshal(&tw) if err != nil { ctx.ErrLog(err).WithField("node_type", core.NTSource). WithField("node_name", p.ioParams.Name). Error("Cannot re-convert a tweet to JSON") continue } m := data.Map{} if err := json.Unmarshal(js, &m); err != nil { ctx.ErrLog(err).WithField("node_type", core.NTSource). WithField("node_name", p.ioParams.Name). Error("Cannot parse converted JSON") continue } t := core.NewTuple(m) t.Timestamp = createdAt if err := w.Write(ctx, t); err != nil { return err } } return nil }
func (l *LoremSource) GenerateStream(ctx *core.Context, w core.Writer) error { for { var text []string for l := rand.Intn(5) + 5; l > 0; l-- { text = append(text, Lorem[rand.Intn(len(Lorem))]) } t := core.NewTuple(data.Map{ "text": data.String(strings.Join(text, " ")), }) if err := w.Write(ctx, t); err != nil { return err } time.Sleep(l.interval) } }
// Processe implements udf.UDSF.Process. It tokenizes a field of tuples. func (t *Tokenizer) Process(ctx *core.Context, tuple *core.Tuple, w core.Writer) error { var kwd []string if v, ok := tuple.Data[t.field]; !ok { return fmt.Errorf("the tuple doesn't have the required field: %v", t.field) } else if s, err := data.AsString(v); err != nil { return fmt.Errorf("'%v' field must be string: %v", t.field, err) } else { kwd = strings.Split(s, " ") } for _, k := range kwd { out := tuple.Copy() out.Data[t.field] = data.String(k) if err := w.Write(ctx, out); err != nil { return err } } return nil }
func (s *SourceCreator) GenerateStream(ctx *core.Context, w core.Writer) error { device := new(Device) // devName := []string{"dev1", "dev2", "dev3", "dev4", "dev5"} // devProb := []float64{0.4, 0.3, 0.15, 0.1, 0.05} devName := []string{"dev1", "dev2"} devProb := []float64{0.5, 0.5} pickDev := func() string { r := rand.Float64() for i, p := range devProb { if r < p { return devName[i] } r -= p } return devName[len(devName)-1] } // device.MakeDevice(pickDev()) device.num = 0 temp := &device.sensorData[0] humid := &device.sensorData[1] for { device.ID = pickDev() device.num += 1 temp.MakeData("temp", 0, 30) humid.MakeData("humid", 0, 100) t := core.NewTuple(data.Map{ "deviceID": data.String(device.ID), "num": data.Int(device.num), "time": data.Float(float64(time.Now().Second()) + float64(time.Now().Nanosecond())/1e+9), temp.ID: data.Float(float64(temp.value)), humid.ID: data.Float(float64(humid.value)), }) if err := w.Write(ctx, t); err != nil { return err } time.Sleep(s.interval) } }
// GenerateStream generates a tuple having random sentences in its field with // information of a user. func (s *Sentences) GenerateStream(ctx *core.Context, w core.Writer) error { corpus := strings.Split(strings.Replace(`lorem ipsum dolor sit amet consectetur adipiscing elit sed do eiusmod tempor incididunt ut labore et dolore magna aliqua ut enim ad minim veniam quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur excepteur sint occaecat cupidatat non proident sunt in culpa qui officia deserunt mollit anim id est laborum`, "\n", " ", -1), " ") users := []string{"isabella", "jacob", "sophia", "ethan", "emma"} usersProb := []float64{0.4, 0.3, 0.15, 0.1, 0.05} pickName := func() string { r := rand.Float64() for i, p := range usersProb { if r < p { return users[i] } r -= p } return users[len(users)-1] } for { l := rand.Intn(5) + 5 text := make([]string, l) l-- for ; l >= 0; l-- { text[l] = corpus[rand.Intn(len(corpus))] } t := core.NewTuple(data.Map{ "name": data.String(pickName()), "text": data.String(strings.Join(text, " ")), }) if err := w.Write(ctx, t); err != nil { return err } time.Sleep(s.interval) } }
func (s *Source) GenerateStream(ctx *core.Context, w core.Writer) error { r, c, err := s.bufio.Reader() if err != nil { return err } defer c.Close() for lineNo := s.firstLineNo; ; lineNo++ { line, err := r.ReadString('\n') line = strings.TrimSpace(line) if err != nil { if err != io.EOF { return err } if len(line) == 0 { return nil } } data, err := s.lp.Parse(line, lineNo) if err != nil { if err == Pass { continue } return err } now := time.Now() err = w.Write(ctx, &core.Tuple{ Data: data, Timestamp: now, ProcTimestamp: now, }) if err != nil { return err } } }
func (r *source) GenerateStream(ctx *core.Context, w core.Writer) error { numFieldNames := []string{ "家賃(万円)", "駅からの徒歩時間 (分)", "専有面積 (m*m)", "築年数 (年)", "階数", } defer r.file.Close() if r.training { for { line, err := r.readLine() if err != nil { if err == io.EOF { return nil } return err } if line[0] == '#' { continue } fields := strings.Split(line, ", ") if len(fields) != 6 { panic("hoge") } value, err := data.ToFloat(data.String(fields[0])) if err != nil { panic(err) } fv := make(data.Map) for i := 1; i < len(numFieldNames); i++ { x, err := data.ToFloat(data.String(fields[i])) if err != nil { panic(err) } fv[numFieldNames[i]] = data.Float(x) } fv[fields[len(fields)-1]] = data.Float(1) now := time.Now() w.Write(ctx, &core.Tuple{ Data: data.Map{ "value": data.Float(value), "feature_vector": fv, }, Timestamp: now, ProcTimestamp: now, }) } } else { fv := make(data.Map) i := 1 for { line, err := r.readLine() if err != nil { if err == io.EOF { return nil } return err } if line == "" || line[0] == '#' { continue } fields := strings.Split(line, ":") if len(fields) != 2 { panic("hoge") } for i := range fields { fields[i] = strings.TrimSpace(fields[i]) } if i < len(numFieldNames) { x, err := data.ToFloat(data.String(fields[1])) if err != nil { panic(err) } fv[numFieldNames[i]] = data.Float(x) i++ } else { if fields[0] != "aspect" { panic(fields) } aspect := strings.Trim(fields[1], "\"") fv[aspect] = data.Float(1) break } } now := time.Now() w.Write(ctx, &core.Tuple{ Data: data.Map{ "feature_vector": fv, }, Timestamp: now, ProcTimestamp: now, }) } return nil }