func close_core() { logging.Debugf("closing the core") if the_core != nil { the_core.Close() } the_core = nil the_rconf = nil logging.Debugf("the_core now closed") }
func (c *win_pdh_collector) CollectOnce() newcore.CollectResult { logging.Debug("win_pdh_collector.CollectOnce Started") var items newcore.MultiDataPoint for _, pd := range c.hPdh.CollectData() { if pd.Err == nil { query, ok := c.map_queries[pd.Query] if ok == true { logging.Tracef("query: %+v, \n %+v", query.Metric, query) items = append(items, newcore.NewDP(c.prefix, query.Metric.Clean(), pd.Value, query.Tags, "", "", "")) } } else { if strings.Index(pd.Err.Error(), `\Process(hickwall)\Working Set - Private`) < 0 { logging.Errorf("win_pdh_collector ERROR: ", pd.Err) } } } logging.Debugf("win_pdh_collector.CollectOnce Finished. count: %d", len(items)) return newcore.CollectResult{ Collected: items, Next: time.Now().Add(c.interval), Err: nil, } }
func MustNewKafkaBackend(name string, bconf *config.Transport_kafka) *kafkaBackend { logging.Infof("MustNewKafkaBackend: %+v", bconf) _kconf := sarama.NewConfig() _kconf.Net.DialTimeout = newcore.Interval(bconf.Dail_timeout).MustDuration(time.Second * 5) _kconf.Net.WriteTimeout = newcore.Interval(bconf.Write_timeout).MustDuration(time.Second * 1) _kconf.Net.ReadTimeout = time.Second * 10 _kconf.Net.KeepAlive = newcore.Interval(bconf.Keepalive).MustDuration(time.Second * 30) if bconf.Ack_timeout_ms <= 0 { _kconf.Producer.Timeout = time.Millisecond * 100 } else { _kconf.Producer.Timeout = time.Millisecond * time.Duration(bconf.Ack_timeout_ms) } if bconf.Flush_frequency_ms <= 0 { _kconf.Producer.Flush.Frequency = time.Millisecond * 100 } else { _kconf.Producer.Flush.Frequency = time.Millisecond * time.Duration(bconf.Flush_frequency_ms) } cc := strings.ToLower(bconf.Compression_codec) switch { case cc == "none": _kconf.Producer.Compression = sarama.CompressionNone case cc == "gzip": _kconf.Producer.Compression = sarama.CompressionGZIP // Compress messages case cc == "snappy": _kconf.Producer.Compression = sarama.CompressionSnappy // Compress messages default: _kconf.Producer.Compression = sarama.CompressionNone } ra := strings.ToLower(bconf.Required_acks) switch { case ra == "no_response": _kconf.Producer.RequiredAcks = sarama.NoResponse case ra == "wait_for_local": _kconf.Producer.RequiredAcks = sarama.WaitForLocal case ra == "wait_for_all": _kconf.Producer.RequiredAcks = sarama.WaitForAll default: _kconf.Producer.RequiredAcks = sarama.NoResponse } logging.Debugf("kafka conf: %+v", _kconf) s := &kafkaBackend{ name: name, closing: make(chan chan error), updates: make(chan newcore.MultiDataPoint), conf: bconf, // backend config kconf: _kconf, // sarama config } go s.loop() return s }
func UseConfigCreateCollectors(rconf *config.RuntimeConfig) ([]newcore.Collector, error) { var clrs []newcore.Collector var prefixs = make(map[string]bool) if rconf != nil { for gid, group := range rconf.Groups { logging.Debugf("gid: %d, prefix: %s\n", gid, group.Prefix) if len(group.Prefix) <= 0 { return nil, fmt.Errorf("group (idx:%d) prefix is empty.", gid) } else { _, exists := prefixs[group.Prefix] if exists == false { prefixs[group.Prefix] = true } else { return nil, fmt.Errorf("duplicated group prefix: %s", group.Prefix) } } for cid, conf := range group.Collector_ping { c := MustNewPingCollectors(gen_collector_name(gid, cid, "ping"), group.Prefix, conf) clrs = append(clrs, c...) } //NOTE: execute command is too risky // for cid, conf := range group.Collector_cmd { // c := NewCmdCollector(gen_collector_name(gid, cid, "cmd"), group.Prefix, conf) // clrs = append(clrs, c) // } } logging.Debugf("rconf.Client.Metric_Enabled: %v, rconf.Client.Metric_Interval: %v", rconf.Client.Metric_Enabled, rconf.Client.Metric_Interval) if rconf.Client.Metric_Enabled == true { clrs = append(clrs, MustNewHickwallCollector(rconf.Client.Metric_Interval)) } return clrs[:], nil } else { return nil, fmt.Errorf("rconf is nil") } }
func UseConfigCreateCollectors(rconf *config.RuntimeConfig) ([]newcore.Collector, error) { var clrs []newcore.Collector var prefixs = make(map[string]bool) for gid, group := range rconf.Groups { logging.Infof("gid: %d, prefix: %s\n", gid, group.Prefix) if len(group.Prefix) <= 0 { return nil, fmt.Errorf("group (idx:%d) prefix is empty.", gid) } else { _, exists := prefixs[group.Prefix] if exists == false { prefixs[group.Prefix] = true } else { return nil, fmt.Errorf("duplicated group prefix: %s", group.Prefix) } } for cid, conf := range group.Collector_ping { pings := MustNewPingCollectors(gen_collector_name(gid, cid, "ping"), group.Prefix, conf) for _, c := range pings { clrs = append(clrs, c) } } for cid, conf := range group.Collector_win_pdh { c := windows.MustNewWinPdhCollector(gen_collector_name(gid, cid, "pdh"), group.Prefix, conf) clrs = append(clrs, c) } for cid, conf := range group.Collector_win_wmi { c := windows.MustNewWinWmiCollector(gen_collector_name(gid, cid, "wmi"), group.Prefix, conf) clrs = append(clrs, c) } if group.Collector_win_sys != nil { cs := windows.MustNewWinSysCollectors(gen_collector_name(gid, 0, "win_sys"), group.Prefix, group.Collector_win_sys) for _, c := range cs { clrs = append(clrs, c) } // clrs = append(clrs, cs...) } } logging.Debugf("rconf.Client.Metric_Enabled: %v, rconf.Client.Metric_Interval: %v", rconf.Client.Metric_Enabled, rconf.Client.Metric_Interval) if rconf.Client.Metric_Enabled == true { clrs = append(clrs, MustNewHickwallCollector(rconf.Client.Metric_Interval)) clrs = append(clrs, windows.MustNewWinHickwallMemCollector(rconf.Client.Metric_Interval, rconf.Client.Tags)) } return clrs[:], nil }
func (c *kafka_subscription) connect() error { logging.Info("connect") sconfig := sarama.NewConfig() logging.Debugf("broker list: %v", c.opts.Broker_list) master, err := sarama.NewConsumer(c.opts.Broker_list, sconfig) if err != nil { return fmt.Errorf("Cannot connect to kafka: %v", err) } c.master = master return nil }
func LoadCoreConfig() error { data, err := ioutil.ReadFile(CORE_CONF_FILEPATH) if err != nil { return fmt.Errorf("faild to read core config: %v", err) } CoreConf = CoreConfig{} // we can use yaml to load config directly. only because // core config structure is very simple and flat. err = yaml.Unmarshal(data, &CoreConf) if err != nil { return fmt.Errorf("unable to unmarshal yaml: %v", err) } if CoreConf.Rss_limit_mb <= 0 { CoreConf.Rss_limit_mb = 50 //deffault rss limit } if CoreConf.Listen_port <= 0 { CoreConf.Listen_port = 3031 } if CoreConf.Hostname != "" { newcore.SetHostname(CoreConf.Hostname) } logging.SetLevel(CoreConf.Log_level) if err != nil { return fmt.Errorf("LoadCoreConfFile failed: %v", err) } if CoreConf.Enable_http_api && (CoreConf.Secure_api_read || CoreConf.Secure_api_write) { // we should check public key config. _, err := utils.LoadPublicKeyFromPath(CoreConf.Server_pub_key_path) if err != nil { logging.Criticalf("unable to load server public key while SecureAPIx is set to be true: %s", err) } } logging.Debugf("SHARED_DIR: %s\n", SHARED_DIR) logging.Debugf("LOG_DIR: %s\n", LOG_DIR) logging.Debugf("LOG_FILEPATH: %s\n", LOG_FILEPATH) logging.Debugf("CORE_CONF_FILEPATH: %s\n", CORE_CONF_FILEPATH) logging.Debugf("CONF_FILEPATH: %s\n", CONF_FILEPATH) logging.Debugf("REGISTRY_FILEPATH: %s\n", REGISTRY_FILEPATH) logging.Debugf("CONF_GROUP_DIRECTORY: %s\n", CONF_GROUP_DIRECTORY) logging.Debugf("CoreConfig: %+v\n", CoreConf) logging.Debug("CoreConfig Loaded ==============================================") core_conf_loaded = true return nil }
func serveSysInfo(w http.ResponseWriter, r *http.Request, _ httprouter.Params) { // pretty.Println(r) logging.Debugf("api /sys_info called from %s, query: %s", r.RemoteAddr, r.URL.RawQuery) sys_info, err := GetSystemInfo() if err != nil { http.Error(w, fmt.Sprintf("failed to get sys info: %v", err), 500) return } dump, err := json.Marshal(sys_info) if err != nil { http.Error(w, fmt.Sprintf("cannot marshal response: %v", err), 500) return } fmt.Fprint(w, string(dump)) }
func (f *fanout) cosuming(idx int, closing chan chan error) { var ( first MultiDataPoint pub chan<- MultiDataPoint pending <-chan MultiDataPoint ) first = nil pending = nil pub = nil logging.Tracef("fanout.consuming: -started- idx: %d, closing: 0x%X\n", idx, closing) for { if pending == nil && pub == nil { pending = f.pending[idx] // enable read from pending chan } logging.Tracef("fanout.consuming -1- idx: %d, first: %x, pending: %x, pub: %x\n", idx, &first, pending, pub) select { case first = <-pending: logging.Tracef("fanout.consuming -2- idx: %d, first: %x, pending: %x, pub: %x\n", idx, &first, pending, pub) pending = nil // disable read from pending chan pub = f.chan_pubs[idx] // enable send to pub chan case pub <- first: logging.Debugf("fanout.consuming -3- send data Finished: %s idx: %d, sent cnt: %d, pub: %x\n", f.bks[idx].Name(), idx, len(first), pub) pub = nil // disable send to pub chan first = nil // clear first case errc := <-closing: logging.Tracef("fanout.consuming -4.Start- closing idx: %d, first: %x, pending: %x, pub: %x\n", idx, &first, pending, pub) pending = nil // nil startSend channel pub = nil f.chan_pubs[idx] = nil // nil pub channel f.pending[idx] = nil errc <- nil // response to closing channel logging.Tracef("fanout.consuming -4.End- closing idx: %d, first: %x, pending: %x, pub: %x\n", idx, &first, pending, pub) return } } }
func serveRegistryRevoke(w http.ResponseWriter, r *http.Request, _ httprouter.Params) { logging.Debugf("api /registry/revoke called from %s", r.RemoteAddr) err := Stop() // stop hickwall first if err != nil { http.Error(w, "Failed to Stop agent", 500) return } // delete registration file err = os.Remove(config.REGISTRY_FILEPATH) if err != nil { http.Error(w, "Failed to Delete Registration File", 500) return } err = Start() // restart hickwall. if we can pass registration process. if err != nil { http.Error(w, "Failed to Start agent", 500) return } logging.Info("agent started again.") return }
// create the topology of our running core. // // (collector -> subscription)s -> merged subscription -> fanout -> publications(backends) // func create_running_core_hooked(rconf *config.RuntimeConfig, ishook bool) (newcore.PublicationSet, *newcore.HookBackend, error) { var hook *newcore.HookBackend var subs []newcore.Subscription var heartbeat_exists bool if rconf == nil { return nil, nil, fmt.Errorf("RuntimeConfig is nil") } // create backends -------------------------------------------------------------------- bks, err := backends.UseConfigCreateBackends(rconf) if err != nil { return nil, nil, err } if len(bks) <= 0 { return nil, nil, fmt.Errorf("no backends configured. program will do nothing.") } for _, bk := range bks { logging.Debugf("loaded backend: %s", bk.Name()) logging.Tracef("loaded backend: %s -> %+v", bk.Name(), bk) } // create collectors ------------------------------------------------------------------ clrs, err := collectors.UseConfigCreateCollectors(rconf) if err != nil { return nil, nil, err } // make sure heartbeat collector always created. for _, c := range clrs { if c.Name() == "heartbeat" { heartbeat_exists = true } } if heartbeat_exists == false { clrs = append(clrs, collectors.NewHeartBeat(rconf.Client.HeartBeat_Interval)) } // create collector subscriptions. for _, c := range clrs { subs = append(subs, newcore.Subscribe(c, nil)) } // create other subscriptions, such as kafka consumer _subs, err := collectors.UseConfigCreateSubscription(rconf) if err != nil { return nil, nil, err } subs = append(subs, _subs...) for _, s := range subs { logging.Debugf("loaded subscription: %s", s.Name()) logging.Tracef("loaded subscription: %s -> %+v", s.Name(), s) } merged := newcore.Merge(subs...) if ishook == true { // the only reason to create a hooked running core is to do unittesting. it's not a good idea though. hook = newcore.NewHookBackend() bks = append(bks, hook) fset := newcore.FanOut(merged, bks...) return fset, hook, nil } else { fset := newcore.FanOut(merged, bks...) return fset, nil, nil } }
func (b *kafkaBackend) loop() { var ( startConsuming <-chan newcore.MultiDataPoint try_connect_first chan bool try_connect_tick <-chan time.Time ) startConsuming = b.updates logging.Info("kafkaBackend.loop started") for { if b.producer == nil && try_connect_first == nil && try_connect_tick == nil { startConsuming = nil // disable consuming try_connect_first = make(chan bool) logging.Debug("trying to connect to kafka first time.") // trying to connect to kafka first time go func() { err := b.connect() if b.producer != nil && err == nil { logging.Debugf("connect kafka first time OK: %v", b.producer) try_connect_first <- true } else { logging.Criticalf("connect to kafka failed %s", err) try_connect_first <- false } }() } if startConsuming != nil { logging.Trace("kafkaBackend consuming started") } select { case md := <-startConsuming: for idx, p := range md { b.producer.Input() <- &sarama.ProducerMessage{ Topic: b.conf.Topic_id, Key: sarama.StringEncoder(p.Metric), Value: p, } _d, _ := p.Encode() logging.Tracef("kafka producer ---> %d, %s", idx, _d) } logging.Debugf("kafkaBackend consuming finished: count: %d", len(md)) case connected := <-try_connect_first: try_connect_first = nil // disable this branch if !connected { // failed open it the first time, // then we try to open file with time interval, until connected successfully. logging.Critical("connect first time failed, try to connect with interval of 1s") try_connect_tick = time.Tick(time.Second * 1) } else { logging.Debug("kafka connected the first time.") startConsuming = b.updates } case <-try_connect_tick: // try to connect with interval err := b.connect() if b.producer != nil && err == nil { // finally connected. try_connect_tick = nil startConsuming = b.updates } else { logging.Criticalf("kafka backend trying to connect but failed: %s", err) } case errc := <-b.closing: logging.Info("kafaBackend.loop closing") startConsuming = nil // stop comsuming errc <- nil close(b.updates) logging.Info("kafaBackend.loop closed") return } } }
func (b *fileBackend) loop() { var ( startConsuming <-chan newcore.MultiDataPoint try_open_file_once chan bool try_open_file_tick <-chan time.Time buf = bytes.NewBuffer(make([]byte, 0, 1024)) ) startConsuming = b.updates logging.Debugf("filebackend.loop started") for { if b.output == nil && try_open_file_once == nil && try_open_file_tick == nil { startConsuming = nil // disable consuming try_open_file_once = make(chan bool) // log.Println("try to open file the first time.") // try to open file the first time async. go func() { err := b.openFile() if b.output != nil && err == nil { // log.Println("openFile first time OK", b.output) try_open_file_once <- true } else { logging.Errorf("filebackend trying to open file but failed: %s", err) try_open_file_once <- false } }() } select { case md := <-startConsuming: for _, p := range md { if b.output != nil { res, _ := p.MarshalJSON() buf.Write(res) buf.Write([]byte("\n")) b.output.Write(buf.Bytes()) buf.Reset() } } case opened := <-try_open_file_once: try_open_file_once = nil // disable this branch if !opened { // failed open it the first time, // then we try to open file with time interval, until opened successfully. logging.Error("open the first time failed, try to open with interval of 1s") try_open_file_tick = time.Tick(time.Second * 1) } else { logging.Debugf("file opened the first time.") startConsuming = b.updates } case <-try_open_file_tick: // try to open with interval err := b.openFile() if b.output != nil && err == nil { // finally opened. try_open_file_tick = nil startConsuming = b.updates } else { logging.Errorf("filebackend trying to open file but failed: %s", err) } case errc := <-b.closing: logging.Debug("filebackend.loop closing") startConsuming = nil // stop comsuming errc <- nil close(b.updates) logging.Debug("filebackend.loop stopped") return } } }
// loop periodically fecthes Items, sends them on s.updates, and exits // when Close is called. // CollectOnce asynchronously. func (s sub) loop() { var ( collectDone chan CollectResult // if non-nil, CollectOnce is running pending []MultiDataPoint next time.Time err error first MultiDataPoint updates chan MultiDataPoint startCollect <-chan time.Time collectDelay time.Duration now = time.Now() ) for { startCollect = nil first = nil updates = nil if now = time.Now(); next.After(now) { collectDelay = next.Sub(now) } if s.collector.IsEnabled() && collectDone == nil && len(pending) < s.maxPending { startCollect = time.After(collectDelay) // enable collect case } if len(pending) > 0 { first = pending[0] updates = s.updates // enable send case } select { case <-startCollect: collectDone = make(chan CollectResult, 1) // enable CollectOnce // TODO: add unittest for this. // collectOnce should be call async, otherwise, will block consuming result. // TODO: leaking param c go func() { // defer func() { // if r := recover(); r != nil { // logging.Criticalf("---------- Recovered -------%v", r) // } // }() logging.Tracef("running collector.CollectOnce: %s", s.collector.Name()) res := s.collector.CollectOnce() collectDone <- res logging.Debugf("finished collector.CollectOnce: %s, count: %d", s.collector.Name(), len(res.Collected)) }() case result := <-collectDone: // logging.Info("result := <- collectDone", result) collectDone = nil next, err = result.Next, result.Err if err != nil { // sub default delay if error happens while collecting data //TODO: add unittest for delay_on_error. delay_on_error vs collector.interval ??? logging.Errorf("ERROR: collector(%s) error: %v", s.collector.Name(), err) next = time.Now().Add(s.delay_on_error) break } //TODO: add unittest if next.Sub(time.Now()) < minimal_next_interval { next = time.Now().Add(minimal_next_interval) } if result.Collected != nil { // don't consuming nil collected result. pending = append(pending, result.Collected) } case errc := <-s.closing: // clean up collector resource. errc <- s.collector.Close() close(s.updates) return case updates <- first: pending = pending[1:] } } }