Пример #1
0
func (s *source) Emit(rset []fluentd_forwarder.FluentRecordSet) error {
	now := time.Now().UTC()
	for _, rs := range rset {
		for _, r := range rs.Records {
			t := &core.Tuple{
				ProcTimestamp: now,
				Timestamp:     time.Unix(int64(r.Timestamp), 0),
			}

			m, err := data.NewMap(r.Data)
			if err != nil {
				s.ctx.ErrLog(err).WithFields(logrus.Fields{
					"source_type": s.ioParams.TypeName,
					"source_name": s.ioParams.Name,
					"data":        r.Data,
				}).Error("Cannot create a data.Map from the data")
				continue
			}
			m[s.tagField] = data.String(rs.Tag)

			t.Data = m
			if err := s.w.Write(s.ctx, t); err != nil {
				s.ctx.ErrLog(err).WithFields(logrus.Fields{
					"source_type": s.ioParams.TypeName,
					"source_name": s.ioParams.Name,
				}).Error("Cannot write a tuple")
			}
		}
	}
	return nil
}
Пример #2
0
func TestLightLOFStaateSaveLoad(t *testing.T) {
	ctx := core.NewContext(nil)
	c := LightLOFStateCreator{}
	ls, err := c.CreateState(ctx, data.Map{
		"nearest_neighbor_algorithm":   data.String("minhash"),
		"hash_num":                     data.Int(64),
		"nearest_neighbor_num":         data.Int(10),
		"reverse_nearest_neighbor_num": data.Int(30),
	})
	if err != nil {
		t.Fatal(err)
	}
	l := ls.(*lightLOFState)

	for i := 0; i < 100; i++ {
		if err := l.Write(ctx, &core.Tuple{
			Data: data.Map{
				"feature_vector": data.Map{
					"n": data.Int(i),
				},
			},
		}); err != nil {
			t.Fatal(err)
		}
	}

	Convey("Given a trained LightLOFState", t, func() {
		Convey("when saving it", func() {
			buf := bytes.NewBuffer(nil)
			err := l.Save(ctx, buf, data.Map{})

			Convey("it should succeed.", func() {
				So(err, ShouldBeNil)

				Convey("and the loaded state should be same.", func() {
					l2, err := c.LoadState(ctx, buf, data.Map{})
					So(err, ShouldBeNil)

					m := l.lightLOF
					m2 := l2.(*lightLOFState).lightLOF
					So(m2.nn, ShouldResemble, m.nn)
					So(m2.nnNum, ShouldEqual, m.nnNum)
					So(m2.rnnNum, ShouldEqual, m.rnnNum)
					So(m2.kdists, ShouldResemble, m.kdists)
					So(m2.lrds, ShouldResemble, m.lrds)
					So(m2.maxSize, ShouldEqual, m.maxSize)
					So(m2.rg, ShouldNotBeNil)

					fv := FeatureVector(data.Map{"n": data.Int(10)})
					s, err := l.lightLOF.CalcScore(fv)
					So(err, ShouldBeNil)
					s2, err := l2.(*lightLOFState).lightLOF.CalcScore(fv)
					So(err, ShouldBeNil)
					So(s2, ShouldResemble, s)
				})
			})
		})
	})
}
Пример #3
0
// GenerateStream generates a tuple having random sentences in its field with
// information of a user.
func (s *Sentences) GenerateStream(ctx *core.Context, w core.Writer) error {
	corpus := strings.Split(strings.Replace(`lorem ipsum dolor sit amet
consectetur adipiscing elit sed do eiusmod tempor incididunt ut labore et dolore
magna aliqua ut enim ad minim veniam quis nostrud exercitation ullamco laboris
nisi ut aliquip ex ea commodo consequat duis aute irure dolor in reprehenderit
in voluptate velit esse cillum dolore eu fugiat nulla pariatur excepteur sint
occaecat cupidatat non proident sunt in culpa qui officia deserunt mollit anim
id est laborum`, "\n", " ", -1), " ")
	users := []string{"isabella", "jacob", "sophia", "ethan", "emma"}
	usersProb := []float64{0.4, 0.3, 0.15, 0.1, 0.05}
	pickName := func() string {
		r := rand.Float64()
		for i, p := range usersProb {
			if r < p {
				return users[i]
			}
			r -= p
		}
		return users[len(users)-1]
	}

	for {
		l := rand.Intn(5) + 5
		text := make([]string, l)
		l--
		for ; l >= 0; l-- {
			text[l] = corpus[rand.Intn(len(corpus))]
		}

		t := core.NewTuple(data.Map{
			"name": data.String(pickName()),
			"text": data.String(strings.Join(text, " ")),
		})
		if err := w.Write(ctx, t); err != nil {
			return err
		}

		time.Sleep(s.interval)
	}
}
Пример #4
0
func (l *LoremSource) GenerateStream(ctx *core.Context, w core.Writer) error {
	for {
		var text []string
		for l := rand.Intn(5) + 5; l > 0; l-- {
			text = append(text, Lorem[rand.Intn(len(Lorem))])
		}

		t := core.NewTuple(data.Map{
			"text": data.String(strings.Join(text, " ")),
		})
		if err := w.Write(ctx, t); err != nil {
			return err
		}

		time.Sleep(l.interval)
	}
}
Пример #5
0
// Processe implements udf.UDSF.Process. It tokenizes a field of tuples.
func (t *Tokenizer) Process(ctx *core.Context, tuple *core.Tuple, w core.Writer) error {
	var kwd []string
	if v, ok := tuple.Data[t.field]; !ok {
		return fmt.Errorf("the tuple doesn't have the required field: %v", t.field)
	} else if s, err := data.AsString(v); err != nil {
		return fmt.Errorf("'%v' field must be string: %v", t.field, err)
	} else {
		kwd = strings.Split(s, " ")
	}

	for _, k := range kwd {
		out := tuple.Copy()
		out.Data[t.field] = data.String(k)
		if err := w.Write(ctx, out); err != nil {
			return err
		}
	}
	return nil
}
Пример #6
0
func (s *SourceCreator) GenerateStream(ctx *core.Context, w core.Writer) error {
	device := new(Device)
	//	devName := []string{"dev1", "dev2", "dev3", "dev4", "dev5"}
	//	devProb := []float64{0.4, 0.3, 0.15, 0.1, 0.05}
	devName := []string{"dev1", "dev2"}
	devProb := []float64{0.5, 0.5}
	pickDev := func() string {
		r := rand.Float64()
		for i, p := range devProb {
			if r < p {
				return devName[i]
			}
			r -= p
		}
		return devName[len(devName)-1]
	}

	//	device.MakeDevice(pickDev())
	device.num = 0
	temp := &device.sensorData[0]
	humid := &device.sensorData[1]

	for {
		device.ID = pickDev()
		device.num += 1
		temp.MakeData("temp", 0, 30)
		humid.MakeData("humid", 0, 100)

		t := core.NewTuple(data.Map{
			"deviceID": data.String(device.ID),
			"num":      data.Int(device.num),
			"time":     data.Float(float64(time.Now().Second()) + float64(time.Now().Nanosecond())/1e+9),
			temp.ID:    data.Float(float64(temp.value)),
			humid.ID:   data.Float(float64(humid.value)),
		})
		if err := w.Write(ctx, t); err != nil {
			return err
		}
		time.Sleep(s.interval)
	}
}
Пример #7
0
func (s *source) Parse(line string, lineNo int) (data.Map, error) {
	line = strings.TrimSpace(line)
	fields := strings.Split(line, " ")
	if len(fields) == 0 {
		return nil, perline.Pass
	}
	label := fields[0]
	fv := make(data.Map)
	for _, field := range fields[1:] {
		ix := strings.Index(field, ":")
		if ix < 0 {
			return nil, fmt.Errorf("invalid libsvm format at %s:%d", s.path, lineNo)
		}
		v, err := strconv.ParseFloat(field[ix+1:], 32)
		if err != nil {
			return nil, fmt.Errorf("%v at %s:%d", err, s.path, lineNo)
		}
		fv[field[:ix]] = data.Float(v) / 255
	}
	return data.Map{
		"label":          data.String(label),
		"feature_vector": fv,
	}, nil
}
Пример #8
0
func (r *source) GenerateStream(ctx *core.Context, w core.Writer) error {
	numFieldNames := []string{
		"家賃(万円)",
		"駅からの徒歩時間 (分)",
		"専有面積 (m*m)",
		"築年数 (年)",
		"階数",
	}

	defer r.file.Close()
	if r.training {
		for {
			line, err := r.readLine()
			if err != nil {
				if err == io.EOF {
					return nil
				}
				return err
			}
			if line[0] == '#' {
				continue
			}
			fields := strings.Split(line, ", ")
			if len(fields) != 6 {
				panic("hoge")
			}
			value, err := data.ToFloat(data.String(fields[0]))
			if err != nil {
				panic(err)
			}
			fv := make(data.Map)
			for i := 1; i < len(numFieldNames); i++ {
				x, err := data.ToFloat(data.String(fields[i]))
				if err != nil {
					panic(err)
				}
				fv[numFieldNames[i]] = data.Float(x)
			}
			fv[fields[len(fields)-1]] = data.Float(1)
			now := time.Now()
			w.Write(ctx, &core.Tuple{
				Data: data.Map{
					"value":          data.Float(value),
					"feature_vector": fv,
				},
				Timestamp:     now,
				ProcTimestamp: now,
			})
		}
	} else {
		fv := make(data.Map)
		i := 1
		for {
			line, err := r.readLine()
			if err != nil {
				if err == io.EOF {
					return nil
				}
				return err
			}
			if line == "" || line[0] == '#' {
				continue
			}
			fields := strings.Split(line, ":")
			if len(fields) != 2 {
				panic("hoge")
			}
			for i := range fields {
				fields[i] = strings.TrimSpace(fields[i])
			}
			if i < len(numFieldNames) {
				x, err := data.ToFloat(data.String(fields[1]))
				if err != nil {
					panic(err)
				}
				fv[numFieldNames[i]] = data.Float(x)
				i++
			} else {
				if fields[0] != "aspect" {
					panic(fields)
				}
				aspect := strings.Trim(fields[1], "\"")
				fv[aspect] = data.Float(1)
				break
			}
		}
		now := time.Now()
		w.Write(ctx, &core.Tuple{
			Data: data.Map{
				"feature_vector": fv,
			},
			Timestamp:     now,
			ProcTimestamp: now,
		})
	}

	return nil
}
Пример #9
0
func TestKeyFile(t *testing.T) {
	tempPath := func() string {
		f, err := ioutil.TempFile("", "twitter-plugin-test-key-file")
		if err != nil {
			t.Fatal("Cannot create a temp file:", err)
		}
		defer f.Close()

		_, err = f.WriteString(`consumer_key: abc
consumer_secret: def
access_token: ghi
access_token_secret: jkl`)
		if err != nil {
			t.Fatal("Cannot write a key information to the temp file:", err)
		}
		return f.Name()
	}()
	defer os.Remove(tempPath)

	Convey("Given an api key file", t, func() {
		params := data.Map{
			"key_file": data.String(tempPath),
		}

		Convey("when creating apiKey", func() {
			keys, err := getKeyParameters(params)
			So(err, ShouldBeNil)

			Convey("consumer_key should have the correct value", func() {
				So(keys.ConsumerKey, ShouldEqual, "abc")
			})

			Convey("consumer_secret should have the correct value", func() {
				So(keys.ConsumerSecret, ShouldEqual, "def")
			})

			Convey("access_token should have the correct value", func() {
				So(keys.AccessToken, ShouldEqual, "ghi")
			})

			Convey("access_token_secret should have the correct value", func() {
				So(keys.AccessTokenSecret, ShouldEqual, "jkl")
			})
		})

		Convey("when creating a source from it", func() {
			_, err := CreatePublicStreamSource(nil, nil, params)

			Convey("it should succeed", func() {
				So(err, ShouldBeNil)
			})
		})

		Convey("when creating apiKey with other key parameters", func() {
			params, err := data.NewMap(map[string]interface{}{
				"key_file":            tempPath,
				"consumer_key":        "_abc",
				"consumer_secret":     "_def",
				"access_token":        "_ghi",
				"access_token_secret": "_jkl",
			})
			So(err, ShouldBeNil)
			keys, err := getKeyParameters(params)
			So(err, ShouldBeNil)

			Convey("key_file parameter should be preferred", func() {
				So(keys.ConsumerKey, ShouldEqual, "abc")
				So(keys.ConsumerSecret, ShouldEqual, "def")
				So(keys.AccessToken, ShouldEqual, "ghi")
				So(keys.AccessTokenSecret, ShouldEqual, "jkl")
			})
		})
	})
}