Beispiel #1
1
func MustNewWinPdhCollector(name, prefix string, opts config.Config_win_pdh_collector) *win_pdh_collector {

	c := &win_pdh_collector{
		name:        name,
		enabled:     true,
		prefix:      prefix,
		interval:    opts.Interval.MustDuration(time.Second),
		config:      opts,
		hPdh:        pdh.NewPdhCollector(),
		map_queries: make(map[string]config.Config_win_pdh_query),
	}

	for _, q := range opts.Queries {
		if q.Metric == "" {
			logging.Errorf("Error Phd Collector metric is empty: %# v", pretty.Formatter(q))
			continue
		}

		c.hPdh.AddEnglishCounter(q.Query)
		if q.Tags == nil {
			q.Tags = newcore.AddTags.Copy()
		}

		if opts.Query_to_tag == true || q.Query_to_tag == true {
			q.Tags["query"] = q.Query
		}

		c.map_queries[q.Query] = q
	}
	logging.Tracef("MustNewWinPdhCollector:opts.Queries: %# v", pretty.Formatter(opts.Queries))
	logging.Tracef("MustNuewWinPdhCollector c.map_queries: %# v", pretty.Formatter(c.map_queries))
	return c
}
Beispiel #2
0
func protect(h httprouter.Handle, expire time.Duration, trigger string) httprouter.Handle {
	return func(w http.ResponseWriter, r *http.Request, ps httprouter.Params) {

		secure := false
		if trigger == "read" && config.CoreConf.Secure_api_read {
			secure = true
		} else if trigger == "write" && config.CoreConf.Secure_api_write {
			secure = true
		}
		logging.Infof("trigger: %s, secure: %v, write: %v, read: %v\n", trigger, secure, config.CoreConf.Secure_api_write, config.CoreConf.Secure_api_read)

		if secure {
			hostname := r.URL.Query().Get("hostname")
			if strings.ToLower(hostname) != newcore.GetHostname() {
				logging.Errorf("hostname mismatch: %v", hostname)
				http.Error(w, "hostname mismatch", 500)
				return
			}

			time_str := r.URL.Query().Get("time")
			tm, err := utils.UTCTimeFromUnixStr(time_str)
			if err != nil {
				logging.Errorf("invalid time: %v", time_str)
				http.Error(w, "Invalid Time", 500)
				return
			}

			if time.Now().Sub(tm) > expire {
				// expired reqeust
				logging.Errorf("expired request: %v", time.Now().Sub(tm))
				http.Error(w, "expired request", 500)
				return
			}

			// we need to verify request.
			// request should put signature of this agent hostname into header HICKWALL_ADMIN_SIGN
			load_unsigner()

			signed_str := r.Header.Get("HICKWALL_ADMIN_SIGN")
			signed, err := base64.StdEncoding.DecodeString(signed_str)
			if err != nil {
				logging.Error("cannot decode sign")
				http.Error(w, "cannot decode sign", 500)
				return
			}

			toSign := fmt.Sprintf("%s%s", hostname, time_str)
			logging.Trace("unsign started")
			err = unsigner.Unsign([]byte(toSign), signed)
			logging.Trace("unsign finished")
			if err != nil {
				logging.Errorf("-> invalid signature: %v <-", string(signed))
				http.Error(w, "invalid signature", 500)
				return
			}
		}

		h(w, r, ps)
	}
}
Beispiel #3
0
func (c ping_collector) CollectOnce() newcore.CollectResult {
	var (
		md       newcore.MultiDataPoint
		d        stats.Stats
		p        = fastping.NewPinger()
		rtt_chan = make(chan float64)
	)

	ip, err := net.ResolveIPAddr("ip4:icmp", c.config.Target)
	if err != nil {
		logging.Errorf("ping_collector: DNS resolve error: %v", err)
		return newcore.CollectResult{
			Collected: nil,
			Next:      time.Now().Add(c.interval),
			Err:       fmt.Errorf("ping_collector: DNS resolve error: %v", err),
		}
	}

	p.MaxRTT = c.timeout
	p.AddIPAddr(ip)
	p.OnRecv = func(addr *net.IPAddr, rtt time.Duration) {
		rtt_chan <- float64(rtt.Nanoseconds() / 1000 / 1000)
	}

	go func() {
		for i := 0; i < c.config.Packets; i++ {
			err = p.Run()
			if err != nil {
				logging.Errorf("ping_collector run err: ", err)
			}
		}
		close(rtt_chan)
	}()

	for rtt := range rtt_chan {
		d.Update(rtt)
	}

	md = append(md, newcore.NewDP(c.prefix, fmt.Sprintf("%s.%s", c.config.Metric, "time_min"), d.Min(), c.tags, "", "", ""))
	md = append(md, newcore.NewDP(c.prefix, fmt.Sprintf("%s.%s", c.config.Metric, "time_max"), d.Max(), c.tags, "", "", ""))
	md = append(md, newcore.NewDP(c.prefix, fmt.Sprintf("%s.%s", c.config.Metric, "time_avg"), d.Mean(), c.tags, "", "", ""))

	std := d.SampleStandardDeviation()
	if math.IsNaN(std) {
		std = 0
	}
	md = append(md, newcore.NewDP(c.prefix, fmt.Sprintf("%s.%s", c.config.Metric, "time_mdev"), std, c.tags, "", "", ""))
	md = append(md, newcore.NewDP(c.prefix, fmt.Sprintf("%s.%s", c.config.Metric, "ip"), ip.IP.String(), c.tags, "", "", ""))

	lost_pct := float64((c.config.Packets-d.Count())/c.config.Packets) * 100
	md = append(md, newcore.NewDP(c.prefix, fmt.Sprintf("%s.%s", c.config.Metric, "lost_pct"), lost_pct, c.tags, "", "", ""))

	return newcore.CollectResult{
		Collected: md,
		Next:      time.Now().Add(c.interval),
		Err:       nil,
	}
}
Beispiel #4
0
func QueryWmiFields(query string, fields []string) ([]map[string]string, error) {

	if len(fields) == 1 && fields[0] == "*" {
		logging.Errorf("`select * ` not supported, need to address fields explicitly.")
		return nil, fmt.Errorf("`select * ` not supported, need to address fields explicitly.")
	}

	resultRaw, err := oleutil.CallMethod(wmi_service, "ExecQuery", query)
	if err != nil {
		logging.Error("ExecQuery Failed: ", err)
		return nil, fmt.Errorf("ExecQuery Failed: %v", err)
	}
	result := resultRaw.ToIDispatch()
	defer result.Release()

	countVar, err := oleutil.GetProperty(result, "Count")
	if err != nil {
		logging.Errorf("Get result count Failed: %v", err)
		return nil, fmt.Errorf("Get result count Failed: %v", err)
	}
	count := int(countVar.Val)

	resultMap := []map[string]string{}

	for i := 0; i < count; i++ {
		itemMap := make(map[string]string)

		itemRaw, err := oleutil.CallMethod(result, "ItemIndex", i)
		if err != nil {
			return nil, fmt.Errorf("ItemIndex Failed: %v", err)
		}

		item := itemRaw.ToIDispatch()
		defer item.Release()

		for _, field := range fields {
			asString, err := oleutil.GetProperty(item, field)

			if err == nil {
				itemMap[field] = fmt.Sprintf("%v", asString.Value())
			} else {
				fmt.Println(err)
			}
		}

		resultMap = append(resultMap, itemMap)
		logging.Tracef("wmi query result: %+v", itemMap)
	}
	logging.Tracef("wmi query result count: %d", len(resultMap))
	return resultMap, nil
}
Beispiel #5
0
func (c *win_pdh_collector) CollectOnce() newcore.CollectResult {
	logging.Debug("win_pdh_collector.CollectOnce Started")

	var items newcore.MultiDataPoint

	for _, pd := range c.hPdh.CollectData() {
		if pd.Err == nil {
			query, ok := c.map_queries[pd.Query]
			if ok == true {
				logging.Tracef("query: %+v, \n %+v", query.Metric, query)
				items = append(items, newcore.NewDP(c.prefix, query.Metric.Clean(), pd.Value, query.Tags, "", "", ""))
			}
		} else {
			if strings.Index(pd.Err.Error(), `\Process(hickwall)\Working Set - Private`) < 0 {
				logging.Errorf("win_pdh_collector ERROR: ", pd.Err)
			}
		}
	}

	logging.Debugf("win_pdh_collector.CollectOnce Finished. count: %d", len(items))
	return newcore.CollectResult{
		Collected: items,
		Next:      time.Now().Add(c.interval),
		Err:       nil,
	}
}
Beispiel #6
0
func new_core_from_file() (*config.RuntimeConfig, error) {
	logging.Debug("NewCoreFromFile")
	rconf, err := config.LoadRuntimeConfigFromFiles()
	if err != nil {
		logging.Errorf("NewCoreFromFile: Failed to load RuntimeConfig from files: %v", err)
		return rconf, err
	}
	logging.Debug("NewCoreFromFile: load config from file finished.")
	err = UpdateRunningCore(rconf)
	if err != nil {
		logging.Errorf("NewCoreFromFile: Failed to create running core: %v", err)
		return rconf, err
	}
	logging.Debug("NewCoreFromFile finished witout error")
	return nil, nil
}
func new_hashed_reg_response_from_json(dump []byte) (*hashed_registry_response, error) {
	var hr hashed_registry_response
	err := json.Unmarshal(dump, &hr)
	if err != nil {
		logging.Errorf("dump: %s", string(dump))
		return nil, logging.SErrorf("failed to unmarshal HashedRegistryResponse: %v", err)
	}
	return &hr, nil
}
Beispiel #8
0
func runService(isDebug bool) {
	defer utils.Recover_and_log()
	logging.Debug("runService")

	err = svc.Run(command.PrimaryService.Name(), &serviceHandler{})
	if err != nil {
		logging.Errorf("runService: failed: %v\r\n", err)
	}
}
func new_core_from_registry(stop chan error) {
	logging.Debug("new_core_from_registry started")
	if stop == nil {
		logging.Panic("stop chan is nil")
	}

	if len(config.CoreConf.Registry_urls) <= 0 {
		logging.Panic("RegistryURLs is empty!!")
	}

	resp, err := load_reg_response()
	if err != nil {
		logging.Errorf("we don't have a valid registry info cached.")
		next := time.After(0)

		// round robin registry machines
		r := ring.New(len(config.CoreConf.Registry_urls))
		for i := 0; i < r.Len(); i++ {
			r.Value = config.CoreConf.Registry_urls[i]
			r = r.Next()
		}

	registry_loop:
		for {
			select {
			case <-next:
				r = r.Next()
				resp, err = do_registry(r.Value.(string))
				if err == nil {
					logging.Info("we are registry we got a valid registry response.")
					break registry_loop
				} else {
					logging.Errorf("failed to registry: %v", err)
				}
				next = time.After(newcore.Interval(config.CoreConf.Registry_delay_on_error).MustDuration(time.Minute))
			}
		}
	}

	// TODO: handle error here. like etcd_machines are not working.
	// here we got a valid registry info. get config and start to run.
	new_core_from_etcd(resp.EtcdMachines, resp.EtcdConfigPath, stop)
}
Beispiel #10
0
func (b *kafkaBackend) connect() error {
	producer, err := sarama.NewAsyncProducer(b.conf.Broker_list, b.kconf)
	if err != nil {
		logging.Errorf("failed to start producer: %v, %v", err, b.conf.Broker_list)
		return fmt.Errorf("failed to start producer: %v, %v", err, b.conf.Broker_list)
	}

	go func() {
		logging.Debug("consuming from producer.Errors()")
		for err := range producer.Errors() {
			logging.Errorf("producer error: %v", err)
		}
		logging.Debug("producer.Errors() closed")
	}()

	logging.Infof("created new producer: %v", b.conf.Broker_list)

	// save producer reference
	b.producer = producer
	return nil
}
Beispiel #11
0
func UseConfigCreateSubscription(rconf *config.RuntimeConfig) ([]newcore.Subscription, error) {
	var subs []newcore.Subscription

	kafka_sub_names := make(map[string]bool)
	for _, conf := range rconf.Client.Subscribe_kafka {
		if conf != nil {
			// fmt.Printf("kafka_sub_names: %v\n", kafka_sub_names)
			_, ok := kafka_sub_names[conf.Name]
			if ok == true {
				logging.Errorf("duplicated kafka subscribe name are not allowed: %s", conf.Name)
				return nil, fmt.Errorf("duplicated kafka subscribe name are not allowed: %s", conf.Name)
			}
			kafka_sub_names[conf.Name] = true
			sub, err := NewKafkaSubscription(*conf)
			if err != nil {
				logging.Errorf("failed to create kafka subscription: %v", err)
				return nil, fmt.Errorf("failed to create kafka subscription: %v", err)
			}
			subs = append(subs, sub)
		}
	}
	return subs, nil
}
Beispiel #12
0
func (c *win_wmi_collector) query(query string, fields []string) ([]map[string]string, error) {
	if c.service != nil {
		resultRaw, err := oleutil.CallMethod(c.service, "ExecQuery", query)
		if err != nil {
			logging.Error("ExecQuery Failed: ", err)
			return nil, fmt.Errorf("ExecQuery Failed: %v", err)
		}
		result := resultRaw.ToIDispatch()
		defer result.Release()

		countVar, err := oleutil.GetProperty(result, "Count")
		if err != nil {
			logging.Error("Get result count Failed: ", err)
			return nil, fmt.Errorf("Get result count Failed: %v", err)
		}
		count := int(countVar.Val)

		resultMap := []map[string]string{}

		for i := 0; i < count; i++ {
			itemMap := make(map[string]string)

			itemRaw, err := oleutil.CallMethod(result, "ItemIndex", i)
			if err != nil {
				return nil, fmt.Errorf("ItemIndex Failed: %v", err)
			}

			item := itemRaw.ToIDispatch()
			defer item.Release()

			for _, field := range fields {
				asString, err := oleutil.GetProperty(item, field)

				if err == nil {
					itemMap[field] = fmt.Sprintf("%v", asString.Value())
				} else {
					logging.Errorf("cannot find field in SWbemObject: %v", err)
				}
			}

			resultMap = append(resultMap, itemMap)
			logging.Tracef("wmi query result: %+v", itemMap)
		}
		logging.Tracef("wmi query result count: %d", len(resultMap))
		return resultMap, nil
	} else {
		logging.Error("win_wmi_collector c.service is nil")
		return nil, fmt.Errorf("win_wmi_collector c.service is nil")
	}
}
func (c *kafka_subscription) loop() {
	logging.Info("loop started")

	var items newcore.MultiDataPoint
	// var tick = time.Tick(c.interval)
	var output chan newcore.MultiDataPoint
	var try_open_consumer_tick <-chan time.Time
	// var tick_reconsume chan time.Time
	var input <-chan newcore.MultiDataPoint
	// var err error

	for {
		if input == nil && try_open_consumer_tick == nil {
			logging.Info("input == nil, try_open_consumer_tick == nil")
			try_open_consumer_tick = time.After(0)
		}

		select {
		case <-try_open_consumer_tick:
			logging.Info("try_open_consumer_tick")
			_input, err := c.consume()
			if _input != nil && err == nil {
				try_open_consumer_tick = nil
			} else {
				logging.Errorf("failed to create consumers: %v", err)
				try_open_consumer_tick = time.After(time.Second)
			}
			input = _input
		case md := <-input:
			items = md[:]
			output = c.updates
			// fmt.Println
		case output <- items:
			items = nil
			output = nil
		case errc := <-c.closing:
			// clean up collector resource.
			output = nil
			close(c.updates)
			errc <- nil
			return
		}
	}
}
Beispiel #14
0
// Update RunningCore with provided RuntimeConfig.
func UpdateRunningCore(rconf *config.RuntimeConfig) error {
	logging.Debug("UpdateRunningCore")
	if rconf == nil {
		return fmt.Errorf("rconf is nil")
	}
	core, _, err := create_running_core_hooked(rconf, false)

	// http pprof
	// https://github.com/golang/go/issues/4674
	// we can only open http pprof, cannot close it.
	if pprof_serving == false && rconf.Client.Pprof_enabled == true {
		if rconf.Client.Pprof_listen == "" {
			rconf.Client.Pprof_listen = ":6060"
		}
		go func() {
			pprof_serving = true
			logging.Infof("http pprof is listen and served on: %v", rconf.Client.Pprof_listen)
			err := http.ListenAndServe(rconf.Client.Pprof_listen, nil)
			logging.Errorf("pprof ListenAndServe Error: %v", err)
			pprof_serving = false
		}()
	}

	// if registry give us an empty config. agent should also reflect this change.
	close_core()

	if err != nil {
		return err
	}

	//	close_core()
	the_core = core
	the_rconf = rconf
	logging.Debug("UpdateRunningCore Finished")
	return nil
}
Beispiel #15
0
func (c *win_wmi_collector) CollectOnce() (res newcore.CollectResult) {
	var items newcore.MultiDataPoint

	for _, query := range c.config.Queries {

		fields := c.get_fields_of_query(query)

		results, err := c.query(query.Query, fields)

		if err != nil {
			continue
		}

		if len(results) > 0 {
			for _, record := range results {
				for _, item := range query.Metrics {

					metric, err := c.c_win_wmi_parse_metric_key(string(item.Metric), record)
					if err != nil {
						logging.Errorf("CollectOnce: %v", err)
						continue
					}

					tags, err := c.c_win_wmi_parse_tags(item.Tags, record)
					if err != nil {
						logging.Errorf("CollectOnce: %v", err)
						continue
					}

					tags = newcore.AddTags.Copy().Merge(query.Tags).Merge(tags)

					if value, ok := record[item.Value_from]; ok == true {
						items = append(items, newcore.NewDP(c.prefix, metric, value, tags, "", "", ""))
					} else if item.Default != "" {
						items = append(items, newcore.NewDP(c.prefix, metric, item.Default, tags, "", "", ""))
					}
				}
			}
		} else {
			for _, item := range query.Metrics {
				if item.Default != "" {
					// no templating support if no data got
					if strings.Contains(string(item.Metric), "{{") {
						continue
					}
					for _, value := range item.Tags {
						if strings.Contains(value, "{{") {
							continue
						}
					}
					tags := newcore.AddTags.Copy().Merge(query.Tags).Merge(item.Tags)
					items = append(items, newcore.NewDP(c.prefix, item.Metric.Clean(), item.Default, tags, "", "",
						""))
				}
			}
		}
	} // for queries

	for _, dp := range items {
		logging.Tracef("wmi DataPoint -> %+v", dp)
	}
	return newcore.CollectResult{
		Collected: items,
		Next:      time.Now().Add(c.interval),
		Err:       nil,
	}
}
func (c *InfluxdbClient_v088) Write(bp client090.BatchPoints) (*client090.Response, error) {
	// logging.Debug("InfluxdbClient_v088.Write")
	// v0.9.0-rc7 [
	//  {
	//      Name: "a",
	//      Timestamp: "1",
	//      Fields: {"f1": "v1", "f2": "v2"},
	//      Precision: "s"
	//  }
	// ]

	// v0.8.8  [
	//   {
	//     "name": "log_lines",
	//     "columns": ["time", "sequence_number", "line"],
	//     "points": [
	//       [1400425947368, 1, "this line is first"],
	//       [1400425947368, 2, "and this is second"]
	//     ]
	//   }
	// ]

	var series []*client088.Series

	for _, p := range bp.Points {
		s := client088.Series{}
		// s.Name = p.Name
		name, err := newcore.FlatMetricKeyAndTags(c.flat_tpl, p.Measurement, p.Tags)
		if err != nil {
			logging.Error("FlatMetricKeyAndTags Failed!", err)
			return nil, err
		}
		s.Name = name

		point := []interface{}{}

		// time, first
		s.Columns = append(s.Columns, "time")
		point = append(point, p.Time.UnixNano()/1000000)

		// then others
		for key, value := range p.Fields {
			s.Columns = append(s.Columns, key)
			point = append(point, value)
		}

		s.Points = append(s.Points, point)

		logging.Tracef("influxdb --> %+v", s)

		series = append(series, &s)
	}

	// pretty.Println(series)

	err := c.client.WriteSeriesWithTimePrecision(series, "ms")
	if err != nil {
		logging.Errorf("InfluxdbClient_v088.Write.WriteSeriesWithTimePrecision Error: %v", err)
	} else {
		logging.Trace("InfluxdbClient_v088.Write Done No Error")
	}

	return nil, err
}
func (c *kafka_subscription) consume() (<-chan newcore.MultiDataPoint, error) {
	logging.Info("consume")

	var out = make(chan newcore.MultiDataPoint)
	var err error
	var consumers []sarama.PartitionConsumer
	if c.master == nil {
		err = c.connect()
		if err != nil {
			return nil, err
		}
	}

	for _, c := range c.consumers {
		c.Close()
	}
	c.consumers = nil

	partitions, err := c.master.Partitions(c.opts.Topic)
	if err != nil {
		return nil, fmt.Errorf("Cannot get partitions: %v", err)
	}
	logging.Infof("partitions: %v", partitions)

	err = c.state.Load()
	if err != nil {
		logging.Errorf("failed to load kafka state: %v", err)
	} else {
		logging.Infof("state: %+v", c.state.State())
	}

	flush_offset := true

	for _, part := range partitions {
		offset := int64(0)
		if c.state.Length() > 0 {
			offset = c.state.Offset(c.opts.Topic, part)
			if offset < 0 {
				offset = 0
			}
		}
		consumer, err := c.master.ConsumePartition(c.opts.Topic, part, offset)
		if err != nil {
			logging.Criticalf("Cannot consumer partition: %d, %v", part, err)
			return nil, fmt.Errorf("Cannot consumer partition: %d, %v", part, err)
		}
		logging.Infof("created consumer: %v", consumer)

		consumers = append(consumers, consumer)

		go func(flush_offset bool, topic string, part int32, out chan newcore.MultiDataPoint, consumer sarama.PartitionConsumer) {
			logging.Infof("start goroutine to consume: part: %d,  %v", part, &consumer)

			var items newcore.MultiDataPoint
			var flush_tick = time.Tick(c.flush_interval)
			var _out chan newcore.MultiDataPoint
			var startConsume <-chan *sarama.ConsumerMessage
			var flushing bool
			var offset int64

			for {
				if (flushing == true && len(items) > 0) || len(items) >= c.max_batch_size {
					_out = out         // enable output branch
					startConsume = nil // disable consuming branch
				} else if len(items) < c.max_batch_size {
					startConsume = consumer.Messages() // enable consuming branch
					_out = nil                         // disable output branch
				}

				select {
				case message := <-startConsume:
					offset = message.Offset
					dp, err := newcore.NewDPFromJson(message.Value)
					if err != nil {
						logging.Tracef("[ERROR]failed to parse datapoint: %v", err)
					}
					logging.Tracef("kafka dp --> %v", dp)
					items = append(items, dp)
				case <-flush_tick:
					flushing = true
					// every part consumer will record offset with interval
					c.state.Update(topic, part, offset)

					// only 1 goroutine will save state to disk
					if flush_offset == true && c.state.Changed() == true {
						logging.Tracef("flusing to disk: part: %d, offset: %d", part, offset)
						c.state.Save()
					}
				case _out <- items:
					items = nil                        // clear items
					_out = nil                         // disable output branch
					startConsume = consumer.Messages() // enable consuming branch
					flushing = false                   // disable flusing
				case err := <-consumer.Errors():
					logging.Infof("consumer.Errors: part:%d,  %v", part, err)
				}
			}
		}(flush_offset, c.opts.Topic, part, out, consumer)

		flush_offset = false // only 1st goroutine is responsible for flushing state back into disk
	}
	c.consumers = consumers
	return out, nil
}
Beispiel #18
0
// loop periodically fecthes Items, sends them on s.updates, and exits
// when Close is called.
// CollectOnce asynchronously.
func (s sub) loop() {
	var (
		collectDone  chan CollectResult // if non-nil, CollectOnce is running
		pending      []MultiDataPoint
		next         time.Time
		err          error
		first        MultiDataPoint
		updates      chan MultiDataPoint
		startCollect <-chan time.Time
		collectDelay time.Duration
		now          = time.Now()
	)

	for {
		startCollect = nil
		first = nil
		updates = nil

		if now = time.Now(); next.After(now) {
			collectDelay = next.Sub(now)
		}

		if s.collector.IsEnabled() && collectDone == nil && len(pending) < s.maxPending {
			startCollect = time.After(collectDelay) // enable collect case
		}

		if len(pending) > 0 {
			first = pending[0]
			updates = s.updates // enable send case
		}

		select {
		case <-startCollect:
			collectDone = make(chan CollectResult, 1) // enable CollectOnce

			// TODO: add unittest for this.
			// collectOnce should be call async, otherwise, will block consuming result.
			// TODO: leaking param c
			go func() {
				// defer func() {
				// 	if r := recover(); r != nil {
				//		logging.Criticalf("---------- Recovered -------%v", r)
				// 	}
				// }()
				logging.Tracef("running collector.CollectOnce: %s", s.collector.Name())
				res := s.collector.CollectOnce()
				collectDone <- res
				logging.Debugf("finished collector.CollectOnce: %s, count: %d", s.collector.Name(), len(res.Collected))
			}()
		case result := <-collectDone:
			//  logging.Info("result := <- collectDone", result)
			collectDone = nil

			next, err = result.Next, result.Err
			if err != nil {
				// sub default delay if error happens while collecting data
				//TODO: add unittest for delay_on_error. delay_on_error vs collector.interval ???
				logging.Errorf("ERROR: collector(%s) error: %v", s.collector.Name(), err)
				next = time.Now().Add(s.delay_on_error)
				break
			}

			//TODO: add unittest
			if next.Sub(time.Now()) < minimal_next_interval {
				next = time.Now().Add(minimal_next_interval)
			}

			if result.Collected != nil {
				// don't consuming nil collected result.
				pending = append(pending, result.Collected)
			}
		case errc := <-s.closing:
			// clean up collector resource.
			errc <- s.collector.Close()
			close(s.updates)
			return
		case updates <- first:
			pending = pending[1:]
		}
	}
}
func GetSystemInfo() (SystemInfo, error) {
	var info = SystemInfo{}
	cs_info, err := wmi.QueryWmi("SELECT Name, Domain, NumberOfLogicalProcessors, NumberOfProcessors, TotalPhysicalMemory FROM Win32_ComputerSystem")
	logging.Tracef("err: %v, cs_info: %v", err, cs_info)
	if err != nil {
		return info, err
	}
	if len(cs_info) != 1 {
		return info, fmt.Errorf("invalid query result: %v", cs_info)
	}
	cs_info_m := cs_info[0]

	info.Name = newcore.GetHostname()

	if string_value, ok := cs_info_m["Domain"]; ok == true {
		info.Domain = string_value
	}

	if string_value, ok := cs_info_m["NumberOfLogicalProcessors"]; ok == true {
		int_value, err := strconv.Atoi(string_value)
		if err != nil {
			return info, err
		}
		info.NumberOfLogicalProcessors = int_value
	}

	if string_value, ok := cs_info_m["NumberOfProcessors"]; ok == true {
		int_value, err := strconv.Atoi(string_value)
		if err != nil {
			return info, err
		}
		info.NumberOfProcessors = int_value
	}

	if string_value, ok := cs_info_m["TotalPhysicalMemory"]; ok == true {
		int_value, err := strconv.Atoi(string_value)
		if err != nil {
			return info, err
		}
		info.TotalPhsycialMemoryKb = int_value / 1024
	}

	os_info, err := wmi.QueryWmi("Select Caption, CSDVersion, OSArchitecture, Version From Win32_OperatingSystem")
	logging.Tracef("err: %v, os_info: %v", err, os_info)
	if err != nil {
		return info, err
	}
	if len(os_info) != 1 {
		return info, fmt.Errorf("invalid query result: %v", os_info)
	}
	os_info_m := os_info[0]

	if string_value, ok := os_info_m["Caption"]; ok == true {
		info.OS = string_value
	}

	csdversion := ""
	if string_value, ok := os_info_m["CSDVersion"]; ok == true {
		csdversion = string_value
	}

	if string_value, ok := os_info_m["Version"]; ok == true {
		version := string_value
		info.OSVersion = fmt.Sprintf("%s - %s", csdversion, version)
	}

	if string_value, ok := os_info_m["OSArchitecture"]; ok == true {
		if string_value == "64-bit" {
			info.Architecture = 64
		} else {
			info.Architecture = 32
		}

	}

	//FIXME: we may not be able to get ip list.
	ipv4list, err := utils.Ipv4List()
	if err != nil {
		logging.Errorf("failed to get ipv4 list: %v", err)
		//		return info, err
	} else {
		info.IPv4 = ipv4list
	}

	return info, nil
}
Beispiel #20
0
func main() {
	defer utils.Recover_and_log()
	logging.Debug("hickwall main ------- sub packages init process finished")

	app := cli.NewApp()
	app.Name = "hickwall"
	app.Usage = "collect metrics effortlessly."
	app.Version = fmt.Sprintf("%s - %s", Version, Build)

	app.Commands = []cli.Command{
		//TODO: configuration test, reload
		// {
		// 	Name:      "config",
		// 	ShortName: "",
		// 	Usage:     "config",
		// 	Subcommands: []cli.Command{
		// 		{
		// 			Name:      "test",
		// 			ShortName: "",
		// 			Usage:     "test",
		// 			Action:    command.CmdConfigTest,
		// 		},
		// 		{
		// 			Name:      "reload",
		// 			ShortName: "",
		// 			Usage:     "reload",
		// 			Action:    command.CmdConfigReload,
		// 		},
		// 	},
		// },
		{
			Name:      "service",
			ShortName: "s",
			Usage:     "service",
			Subcommands: []cli.Command{
				{
					Name:      "status",
					ShortName: "s",
					Usage:     "status",
					Action:    command.CmdServiceStatus,
				},
				{
					Name:   "statuscode",
					Usage:  "statuscode(internal use only.)",
					Action: command.CmdServiceStatusCode,
				},
				{
					Name:      "install",
					ShortName: "i",
					Usage:     "install service",
					Action:    command.CmdServiceInstall,
				},
				{
					Name:      "remove",
					ShortName: "d",
					Usage:     "remove service",
					Action:    command.CmdServiceRemove,
				},
				{
					Name:      "start",
					ShortName: "g",
					Usage:     "start service.",
					Action:    command.CmdServiceStart,
				},
				{
					Name:      "stop",
					ShortName: "x",
					Usage:     "stop service.",
					Action:    command.CmdServiceStop,
				},
				{
					Name:      "restart",
					ShortName: "n",
					Usage:     "restart service",
					Action:    command.CmdServiceRestart,
				},
			},
		},
		{
			Name:      "version",
			ShortName: "v",
			Usage:     "show version info",
			Action: func(c *cli.Context) {
				fmt.Printf("%s version: %s\n", app.Name, app.Version)
			},
		},
		{
			Name:      "daemon",
			ShortName: "d",
			Usage:     "run as daemon",
			Action: func(c *cli.Context) {
				run(false, false)
			},
		},
		{
			Name:   "config",
			Usage:  "show config info",
			Action: command.CmdShowConfig,
		},
	}

	if len(os.Args) >= 2 {

		logging.Debug("executing commands")
		app.Run(os.Args)

	} else {

		isIntSess, err := servicelib.IsAnInteractiveSession()
		if err != nil {
			logging.Errorf("failed to determine if we are running in an interactive session or not: %v", err)
			return
		}

		if !isIntSess {
			logging.Debug("running as service")
			run(false, true)
			return
		}

		//print help here.
		app.Run(os.Args)
	}
	return
}
Beispiel #21
0
func (f *fanout) loop() {
	logging.Debug("fanout.loop() started")
	var (
		startConsuming <-chan MultiDataPoint
	)

	startConsuming = f.sub.Updates()

	for idx, _ := range f.chan_pubs {
		closing := make(chan chan error)
		f.closing_list = append(f.closing_list, closing)
		go f.cosuming(idx, closing)
	}

main_loop:
	for {
		select {
		case md, opening := <-startConsuming:
			if opening == false {
				f.Close()
				break main_loop
			}
			for idx, p := range f.pending {
				_ = idx
				if len(p) < maxPending {
					p <- md
				} else {
					logging.Warnf("fanout.loop.main_loop: pending channel is jamming: bkname: %s\n", f.bks[idx].Name())
				}
			}
		case errc := <-f.closing:
			startConsuming = nil // stop consuming from sub

			for idx, bk := range f.bks {
				// closing consuming of each backend
				consuming_errc := make(chan error)
				f.closing_list[idx] <- consuming_errc
				<-consuming_errc

				// close backend.
				go func() {
					consuming_errc <- bk.Close()
				}()
				timeout := time.After(time.Duration(1) * time.Second)
			wait_bk_close:
				for {
					select {
					case <-consuming_errc:
						break wait_bk_close
					case <-timeout:
						logging.Errorf("backend(%s) is blocking the fanout closing process!\n", bk.Name())
						break wait_bk_close
					}
				}

			}
			logging.Debug("fanout.loop() closed all consuming backends")
			errc <- nil
			break main_loop
		}
	}

	logging.Debug("fanout.loop() exit main_loop")

	timeout := time.After(time.Duration(1) * time.Second)
	closing_sub := make(chan error)
	go func() {
		closing_sub <- f.sub.Close()
	}()
	for {
		select {
		case <-closing_sub:
			logging.Debug("fanout.loop() returned")
			return
		case <-timeout:
			logging.Errorf("Subscription(%s) is blocking the fanout closing process! forced return with timeout\n", f.sub.Name())
			return
		}
	}
}
func WatchRuntimeConfFromEtcd(etcd_machines []string, etcd_path string, stop chan error) <-chan RespConfig {
	logging.Info("WatchRuntimeConfFromEtcd Started")
	var (
		out            = make(chan RespConfig, 1)
		sleep_duration = time.Second
		// sleep_duration = time.Second * 5
	)

	if stop == nil {
		panic("stop chan is nil")
	}

	go func() {
		var (
			the_first_time = true
			watching       = false
			chGetConf      <-chan time.Time
			chWaching      <-chan time.Time
		)

		client := etcd.NewClient(etcd_machines)

		cached_conf, _ := LoadRuntimeConfFromPath(CONF_CACHE_PATH)

		watch_stop := make(chan bool, 0)

	loop:
		for {
			if watching == false && chGetConf == nil {
				if the_first_time == true {
					chGetConf = time.After(0)
				} else {
					chGetConf = time.After(sleep_duration)
				}
			}

			if watching == true && chWaching == nil {
				chWaching = time.After(sleep_duration)
			}

			select {
			case <-stop:
				logging.Info("stop watching etcd.")
				watch_stop <- true
				logging.Info("watching etcd stopped.")
				break loop
			case <-chGetConf:
				the_first_time = false
				chGetConf = nil

				tmp_conf, err := getRuntimeConfFromEtcd(client, etcd_path)
				if err != nil {
					if cached_conf != nil {
						// if failed to get config from etcd but we have a cached copy. then use
						// this cached version first.
						out <- RespConfig{cached_conf, nil}
						cached_conf = nil // cached copy only need to emit once.
					} else {
						out <- RespConfig{nil, logging.SErrorf("failed to getRuntimeConfFromEtcd: %v", err)}
					}
				} else {
					out <- RespConfig{tmp_conf, nil}
					watching = true
				}
			case <-chWaching:
				chWaching = nil
				logging.Infof("watching etcd remote config: %s, %s", etcd_machines, etcd_path)
				resp, err := client.Watch(etcd_path, 0, false, nil, watch_stop)
				if err != nil {
					logging.Errorf("watching etcd error: %v", err)
					break
				}

				r := bytes.NewReader([]byte(resp.Node.Value))
				tmp_conf, err := ReadRuntimeConfig(r)
				if err != nil {
					logging.Errorf("watching etcd. changes detected but faild to parse config: %v", err)
					break
				}

				logging.Infof("a new config is comming")
				out <- RespConfig{tmp_conf, nil}
			}
		}
	}()
	return out
}
Beispiel #23
0
func runAsPrimaryService(args []string, r <-chan svc.ChangeRequest, changes chan<- svc.Status) (ssec bool, errno uint32) {
	logging.Debug("runAsPrimaryService started")

	defer utils.Recover_and_log()

	const cmdsAccepted = svc.AcceptStop | svc.AcceptShutdown
	changes <- svc.Status{State: svc.StartPending}
	changes <- svc.Status{State: svc.Running, Accepts: cmdsAccepted}

	//http://localhost:6060/debug/pprof/
	// utils.HttpPprofServe(6060)

	//	after := time.After(time.Duration(8) * time.Minute)
	// f, _ := os.Create("d:\\cpu-" + strconv.Itoa(pid) + ".pprof")
	// pprof.StartCPUProfile(f)
	// defer pprof.StopCPUProfile()

	//	cfg := profile.Config{
	//		MemProfile:     true,
	//		ProfilePath:    "./pprofs/", // store profiles in current directory
	//		NoShutdownHook: true,        // do not hook SIGINT
	//	}
	//	p := profile.Start(&cfg)
	//
	//	defer p.Stop()

	// utils.StartCPUProfile()
	// defer utils.StopCPUProfile()

	// go func() {
	// 	for {
	// 		<-time.After(time.Second * time.Duration(15))
	// 		debug.FreeOSMemory()
	// 	}
	// }()

	err := hickwall.Start()
	if err != nil {
		logging.Critical("Failed To Start hickwall: %v", err)
		return
	} else {
		defer hickwall.Stop()
	}

	logging.Debug("service event handling loop started ")
	// major loop for signal processing.
loop:
	for {
		select {
		case c := <-r:
			switch c.Cmd {
			case svc.Interrogate:
				changes <- c.CurrentStatus
				// testing deadlock from https://code.google.com/p/winsvc/issues/detail?id=4
				time.Sleep(100 * time.Millisecond)
				changes <- c.CurrentStatus
			case svc.Stop, svc.Shutdown:
				break loop
			default:
				logging.Errorf("unexpected control request #%d", c)
			}
		}
	}
	changes <- svc.Status{State: svc.StopPending}
	logging.Debug("runAsPrimaryService stopped")
	return
}
Beispiel #24
0
func (b *fileBackend) loop() {
	var (
		startConsuming     <-chan newcore.MultiDataPoint
		try_open_file_once chan bool
		try_open_file_tick <-chan time.Time
		buf                = bytes.NewBuffer(make([]byte, 0, 1024))
	)
	startConsuming = b.updates
	logging.Debugf("filebackend.loop started")

	for {
		if b.output == nil && try_open_file_once == nil && try_open_file_tick == nil {
			startConsuming = nil // disable consuming
			try_open_file_once = make(chan bool)
			// log.Println("try to open file the first time.")

			// try to open file the first time async.
			go func() {
				err := b.openFile()

				if b.output != nil && err == nil {
					// log.Println("openFile first time OK", b.output)
					try_open_file_once <- true
				} else {
					logging.Errorf("filebackend trying to open file but failed: %s", err)
					try_open_file_once <- false
				}
			}()
		}

		select {
		case md := <-startConsuming:
			for _, p := range md {
				if b.output != nil {
					res, _ := p.MarshalJSON()
					buf.Write(res)
					buf.Write([]byte("\n"))
					b.output.Write(buf.Bytes())
					buf.Reset()
				}
			}

		case opened := <-try_open_file_once:
			try_open_file_once = nil // disable this branch
			if !opened {
				// failed open it the first time,
				// then we try to open file with time interval, until opened successfully.
				logging.Error("open the first time failed, try to open with interval of 1s")
				try_open_file_tick = time.Tick(time.Second * 1)
			} else {
				logging.Debugf("file opened the first time.")
				startConsuming = b.updates
			}
		case <-try_open_file_tick:
			// try to open with interval
			err := b.openFile()
			if b.output != nil && err == nil {
				// finally opened.
				try_open_file_tick = nil
				startConsuming = b.updates
			} else {
				logging.Errorf("filebackend trying to open file but failed: %s", err)
			}
		case errc := <-b.closing:
			logging.Debug("filebackend.loop closing")
			startConsuming = nil // stop comsuming
			errc <- nil
			close(b.updates)
			logging.Debug("filebackend.loop stopped")
			return
		}
	}
}
Beispiel #25
0
func (b *influxdbBackend) loop() {
	var (
		startConsuming         <-chan newcore.MultiDataPoint
		try_create_client_once chan bool
		try_create_client_tick <-chan time.Time
	)
	startConsuming = b.updates
	logging.Debug("influxdb backend loop started ")

	for {
		if b.output == nil && try_create_client_once == nil && try_create_client_tick == nil {
			startConsuming = nil // disable consuming
			try_create_client_once = make(chan bool)
			// try to create influxdb client the first time async.
			go func() {
				err := b.newInfluxdbClientFromConf()
				if err == nil {
					try_create_client_once <- true
				} else {
					try_create_client_once <- false
				}
			}()
		}

		//TODO: Flush_interval and Max_batch_size
		select {
		case md := <-startConsuming:
			if b.output != nil {
				points := []client.Point{}
				for _, p := range md {
					// logging.Debug(p.Metric.Clean())
					// logging.Debug(utils.Convert(p.Value))
					points = append(points, client.Point{
						Measurement: p.Metric.Clean(),
						Time:        p.Timestamp,
						Fields: map[string]interface{}{
							"value": utils.Convert(p.Value),
						},
						Tags: p.Tags, //TODO: Tags
					})
				}
				write := client.BatchPoints{
					Database:        b.conf.Database,
					RetentionPolicy: b.conf.RetentionPolicy,
					Points:          points,
				}
				// logging.Debugf("write: count: %d", len(md))

				//FIXME: connection timeout?
				resp, err := b.output.Write(write)
				if err != nil {
					logging.Errorf("failed to write into influxdb: %v, %+v", err, resp)
				}
			}
		case opened := <-try_create_client_once:
			try_create_client_once = nil // disable this branch
			if !opened {
				// failed open it the first time,
				// then we try to open file with time interval, until opened successfully.
				logging.Debug("open the first time failed, try to open with interval of 1s")
				try_create_client_tick = time.Tick(time.Second * 1)
			} else {
				startConsuming = b.updates
			}
		case <-try_create_client_tick:
			// try to open with interval
			err := b.newInfluxdbClientFromConf()
			if b.output != nil && err == nil {
				// finally opened.
				try_create_client_tick = nil
				startConsuming = b.updates
			} else {
				logging.Critical("influxdb backend trying to open file but failed: %s", err)
			}
		case errc := <-b.closing:
			// fmt.Println("errc <- b.closing")
			logging.Debug("influxdb backend .loop closing")
			startConsuming = nil // stop comsuming
			errc <- nil
			close(b.updates)
			logging.Debug("influxdb backend .loop stopped")
			return
		}
	}
}