func MustNewWinPdhCollector(name, prefix string, opts config.Config_win_pdh_collector) *win_pdh_collector { c := &win_pdh_collector{ name: name, enabled: true, prefix: prefix, interval: opts.Interval.MustDuration(time.Second), config: opts, hPdh: pdh.NewPdhCollector(), map_queries: make(map[string]config.Config_win_pdh_query), } for _, q := range opts.Queries { if q.Metric == "" { logging.Errorf("Error Phd Collector metric is empty: %# v", pretty.Formatter(q)) continue } c.hPdh.AddEnglishCounter(q.Query) if q.Tags == nil { q.Tags = newcore.AddTags.Copy() } if opts.Query_to_tag == true || q.Query_to_tag == true { q.Tags["query"] = q.Query } c.map_queries[q.Query] = q } logging.Tracef("MustNewWinPdhCollector:opts.Queries: %# v", pretty.Formatter(opts.Queries)) logging.Tracef("MustNuewWinPdhCollector c.map_queries: %# v", pretty.Formatter(c.map_queries)) return c }
func protect(h httprouter.Handle, expire time.Duration, trigger string) httprouter.Handle { return func(w http.ResponseWriter, r *http.Request, ps httprouter.Params) { secure := false if trigger == "read" && config.CoreConf.Secure_api_read { secure = true } else if trigger == "write" && config.CoreConf.Secure_api_write { secure = true } logging.Infof("trigger: %s, secure: %v, write: %v, read: %v\n", trigger, secure, config.CoreConf.Secure_api_write, config.CoreConf.Secure_api_read) if secure { hostname := r.URL.Query().Get("hostname") if strings.ToLower(hostname) != newcore.GetHostname() { logging.Errorf("hostname mismatch: %v", hostname) http.Error(w, "hostname mismatch", 500) return } time_str := r.URL.Query().Get("time") tm, err := utils.UTCTimeFromUnixStr(time_str) if err != nil { logging.Errorf("invalid time: %v", time_str) http.Error(w, "Invalid Time", 500) return } if time.Now().Sub(tm) > expire { // expired reqeust logging.Errorf("expired request: %v", time.Now().Sub(tm)) http.Error(w, "expired request", 500) return } // we need to verify request. // request should put signature of this agent hostname into header HICKWALL_ADMIN_SIGN load_unsigner() signed_str := r.Header.Get("HICKWALL_ADMIN_SIGN") signed, err := base64.StdEncoding.DecodeString(signed_str) if err != nil { logging.Error("cannot decode sign") http.Error(w, "cannot decode sign", 500) return } toSign := fmt.Sprintf("%s%s", hostname, time_str) logging.Trace("unsign started") err = unsigner.Unsign([]byte(toSign), signed) logging.Trace("unsign finished") if err != nil { logging.Errorf("-> invalid signature: %v <-", string(signed)) http.Error(w, "invalid signature", 500) return } } h(w, r, ps) } }
func (c ping_collector) CollectOnce() newcore.CollectResult { var ( md newcore.MultiDataPoint d stats.Stats p = fastping.NewPinger() rtt_chan = make(chan float64) ) ip, err := net.ResolveIPAddr("ip4:icmp", c.config.Target) if err != nil { logging.Errorf("ping_collector: DNS resolve error: %v", err) return newcore.CollectResult{ Collected: nil, Next: time.Now().Add(c.interval), Err: fmt.Errorf("ping_collector: DNS resolve error: %v", err), } } p.MaxRTT = c.timeout p.AddIPAddr(ip) p.OnRecv = func(addr *net.IPAddr, rtt time.Duration) { rtt_chan <- float64(rtt.Nanoseconds() / 1000 / 1000) } go func() { for i := 0; i < c.config.Packets; i++ { err = p.Run() if err != nil { logging.Errorf("ping_collector run err: ", err) } } close(rtt_chan) }() for rtt := range rtt_chan { d.Update(rtt) } md = append(md, newcore.NewDP(c.prefix, fmt.Sprintf("%s.%s", c.config.Metric, "time_min"), d.Min(), c.tags, "", "", "")) md = append(md, newcore.NewDP(c.prefix, fmt.Sprintf("%s.%s", c.config.Metric, "time_max"), d.Max(), c.tags, "", "", "")) md = append(md, newcore.NewDP(c.prefix, fmt.Sprintf("%s.%s", c.config.Metric, "time_avg"), d.Mean(), c.tags, "", "", "")) std := d.SampleStandardDeviation() if math.IsNaN(std) { std = 0 } md = append(md, newcore.NewDP(c.prefix, fmt.Sprintf("%s.%s", c.config.Metric, "time_mdev"), std, c.tags, "", "", "")) md = append(md, newcore.NewDP(c.prefix, fmt.Sprintf("%s.%s", c.config.Metric, "ip"), ip.IP.String(), c.tags, "", "", "")) lost_pct := float64((c.config.Packets-d.Count())/c.config.Packets) * 100 md = append(md, newcore.NewDP(c.prefix, fmt.Sprintf("%s.%s", c.config.Metric, "lost_pct"), lost_pct, c.tags, "", "", "")) return newcore.CollectResult{ Collected: md, Next: time.Now().Add(c.interval), Err: nil, } }
func QueryWmiFields(query string, fields []string) ([]map[string]string, error) { if len(fields) == 1 && fields[0] == "*" { logging.Errorf("`select * ` not supported, need to address fields explicitly.") return nil, fmt.Errorf("`select * ` not supported, need to address fields explicitly.") } resultRaw, err := oleutil.CallMethod(wmi_service, "ExecQuery", query) if err != nil { logging.Error("ExecQuery Failed: ", err) return nil, fmt.Errorf("ExecQuery Failed: %v", err) } result := resultRaw.ToIDispatch() defer result.Release() countVar, err := oleutil.GetProperty(result, "Count") if err != nil { logging.Errorf("Get result count Failed: %v", err) return nil, fmt.Errorf("Get result count Failed: %v", err) } count := int(countVar.Val) resultMap := []map[string]string{} for i := 0; i < count; i++ { itemMap := make(map[string]string) itemRaw, err := oleutil.CallMethod(result, "ItemIndex", i) if err != nil { return nil, fmt.Errorf("ItemIndex Failed: %v", err) } item := itemRaw.ToIDispatch() defer item.Release() for _, field := range fields { asString, err := oleutil.GetProperty(item, field) if err == nil { itemMap[field] = fmt.Sprintf("%v", asString.Value()) } else { fmt.Println(err) } } resultMap = append(resultMap, itemMap) logging.Tracef("wmi query result: %+v", itemMap) } logging.Tracef("wmi query result count: %d", len(resultMap)) return resultMap, nil }
func (c *win_pdh_collector) CollectOnce() newcore.CollectResult { logging.Debug("win_pdh_collector.CollectOnce Started") var items newcore.MultiDataPoint for _, pd := range c.hPdh.CollectData() { if pd.Err == nil { query, ok := c.map_queries[pd.Query] if ok == true { logging.Tracef("query: %+v, \n %+v", query.Metric, query) items = append(items, newcore.NewDP(c.prefix, query.Metric.Clean(), pd.Value, query.Tags, "", "", "")) } } else { if strings.Index(pd.Err.Error(), `\Process(hickwall)\Working Set - Private`) < 0 { logging.Errorf("win_pdh_collector ERROR: ", pd.Err) } } } logging.Debugf("win_pdh_collector.CollectOnce Finished. count: %d", len(items)) return newcore.CollectResult{ Collected: items, Next: time.Now().Add(c.interval), Err: nil, } }
func new_core_from_file() (*config.RuntimeConfig, error) { logging.Debug("NewCoreFromFile") rconf, err := config.LoadRuntimeConfigFromFiles() if err != nil { logging.Errorf("NewCoreFromFile: Failed to load RuntimeConfig from files: %v", err) return rconf, err } logging.Debug("NewCoreFromFile: load config from file finished.") err = UpdateRunningCore(rconf) if err != nil { logging.Errorf("NewCoreFromFile: Failed to create running core: %v", err) return rconf, err } logging.Debug("NewCoreFromFile finished witout error") return nil, nil }
func new_hashed_reg_response_from_json(dump []byte) (*hashed_registry_response, error) { var hr hashed_registry_response err := json.Unmarshal(dump, &hr) if err != nil { logging.Errorf("dump: %s", string(dump)) return nil, logging.SErrorf("failed to unmarshal HashedRegistryResponse: %v", err) } return &hr, nil }
func runService(isDebug bool) { defer utils.Recover_and_log() logging.Debug("runService") err = svc.Run(command.PrimaryService.Name(), &serviceHandler{}) if err != nil { logging.Errorf("runService: failed: %v\r\n", err) } }
func new_core_from_registry(stop chan error) { logging.Debug("new_core_from_registry started") if stop == nil { logging.Panic("stop chan is nil") } if len(config.CoreConf.Registry_urls) <= 0 { logging.Panic("RegistryURLs is empty!!") } resp, err := load_reg_response() if err != nil { logging.Errorf("we don't have a valid registry info cached.") next := time.After(0) // round robin registry machines r := ring.New(len(config.CoreConf.Registry_urls)) for i := 0; i < r.Len(); i++ { r.Value = config.CoreConf.Registry_urls[i] r = r.Next() } registry_loop: for { select { case <-next: r = r.Next() resp, err = do_registry(r.Value.(string)) if err == nil { logging.Info("we are registry we got a valid registry response.") break registry_loop } else { logging.Errorf("failed to registry: %v", err) } next = time.After(newcore.Interval(config.CoreConf.Registry_delay_on_error).MustDuration(time.Minute)) } } } // TODO: handle error here. like etcd_machines are not working. // here we got a valid registry info. get config and start to run. new_core_from_etcd(resp.EtcdMachines, resp.EtcdConfigPath, stop) }
func (b *kafkaBackend) connect() error { producer, err := sarama.NewAsyncProducer(b.conf.Broker_list, b.kconf) if err != nil { logging.Errorf("failed to start producer: %v, %v", err, b.conf.Broker_list) return fmt.Errorf("failed to start producer: %v, %v", err, b.conf.Broker_list) } go func() { logging.Debug("consuming from producer.Errors()") for err := range producer.Errors() { logging.Errorf("producer error: %v", err) } logging.Debug("producer.Errors() closed") }() logging.Infof("created new producer: %v", b.conf.Broker_list) // save producer reference b.producer = producer return nil }
func UseConfigCreateSubscription(rconf *config.RuntimeConfig) ([]newcore.Subscription, error) { var subs []newcore.Subscription kafka_sub_names := make(map[string]bool) for _, conf := range rconf.Client.Subscribe_kafka { if conf != nil { // fmt.Printf("kafka_sub_names: %v\n", kafka_sub_names) _, ok := kafka_sub_names[conf.Name] if ok == true { logging.Errorf("duplicated kafka subscribe name are not allowed: %s", conf.Name) return nil, fmt.Errorf("duplicated kafka subscribe name are not allowed: %s", conf.Name) } kafka_sub_names[conf.Name] = true sub, err := NewKafkaSubscription(*conf) if err != nil { logging.Errorf("failed to create kafka subscription: %v", err) return nil, fmt.Errorf("failed to create kafka subscription: %v", err) } subs = append(subs, sub) } } return subs, nil }
func (c *win_wmi_collector) query(query string, fields []string) ([]map[string]string, error) { if c.service != nil { resultRaw, err := oleutil.CallMethod(c.service, "ExecQuery", query) if err != nil { logging.Error("ExecQuery Failed: ", err) return nil, fmt.Errorf("ExecQuery Failed: %v", err) } result := resultRaw.ToIDispatch() defer result.Release() countVar, err := oleutil.GetProperty(result, "Count") if err != nil { logging.Error("Get result count Failed: ", err) return nil, fmt.Errorf("Get result count Failed: %v", err) } count := int(countVar.Val) resultMap := []map[string]string{} for i := 0; i < count; i++ { itemMap := make(map[string]string) itemRaw, err := oleutil.CallMethod(result, "ItemIndex", i) if err != nil { return nil, fmt.Errorf("ItemIndex Failed: %v", err) } item := itemRaw.ToIDispatch() defer item.Release() for _, field := range fields { asString, err := oleutil.GetProperty(item, field) if err == nil { itemMap[field] = fmt.Sprintf("%v", asString.Value()) } else { logging.Errorf("cannot find field in SWbemObject: %v", err) } } resultMap = append(resultMap, itemMap) logging.Tracef("wmi query result: %+v", itemMap) } logging.Tracef("wmi query result count: %d", len(resultMap)) return resultMap, nil } else { logging.Error("win_wmi_collector c.service is nil") return nil, fmt.Errorf("win_wmi_collector c.service is nil") } }
func (c *kafka_subscription) loop() { logging.Info("loop started") var items newcore.MultiDataPoint // var tick = time.Tick(c.interval) var output chan newcore.MultiDataPoint var try_open_consumer_tick <-chan time.Time // var tick_reconsume chan time.Time var input <-chan newcore.MultiDataPoint // var err error for { if input == nil && try_open_consumer_tick == nil { logging.Info("input == nil, try_open_consumer_tick == nil") try_open_consumer_tick = time.After(0) } select { case <-try_open_consumer_tick: logging.Info("try_open_consumer_tick") _input, err := c.consume() if _input != nil && err == nil { try_open_consumer_tick = nil } else { logging.Errorf("failed to create consumers: %v", err) try_open_consumer_tick = time.After(time.Second) } input = _input case md := <-input: items = md[:] output = c.updates // fmt.Println case output <- items: items = nil output = nil case errc := <-c.closing: // clean up collector resource. output = nil close(c.updates) errc <- nil return } } }
// Update RunningCore with provided RuntimeConfig. func UpdateRunningCore(rconf *config.RuntimeConfig) error { logging.Debug("UpdateRunningCore") if rconf == nil { return fmt.Errorf("rconf is nil") } core, _, err := create_running_core_hooked(rconf, false) // http pprof // https://github.com/golang/go/issues/4674 // we can only open http pprof, cannot close it. if pprof_serving == false && rconf.Client.Pprof_enabled == true { if rconf.Client.Pprof_listen == "" { rconf.Client.Pprof_listen = ":6060" } go func() { pprof_serving = true logging.Infof("http pprof is listen and served on: %v", rconf.Client.Pprof_listen) err := http.ListenAndServe(rconf.Client.Pprof_listen, nil) logging.Errorf("pprof ListenAndServe Error: %v", err) pprof_serving = false }() } // if registry give us an empty config. agent should also reflect this change. close_core() if err != nil { return err } // close_core() the_core = core the_rconf = rconf logging.Debug("UpdateRunningCore Finished") return nil }
func (c *win_wmi_collector) CollectOnce() (res newcore.CollectResult) { var items newcore.MultiDataPoint for _, query := range c.config.Queries { fields := c.get_fields_of_query(query) results, err := c.query(query.Query, fields) if err != nil { continue } if len(results) > 0 { for _, record := range results { for _, item := range query.Metrics { metric, err := c.c_win_wmi_parse_metric_key(string(item.Metric), record) if err != nil { logging.Errorf("CollectOnce: %v", err) continue } tags, err := c.c_win_wmi_parse_tags(item.Tags, record) if err != nil { logging.Errorf("CollectOnce: %v", err) continue } tags = newcore.AddTags.Copy().Merge(query.Tags).Merge(tags) if value, ok := record[item.Value_from]; ok == true { items = append(items, newcore.NewDP(c.prefix, metric, value, tags, "", "", "")) } else if item.Default != "" { items = append(items, newcore.NewDP(c.prefix, metric, item.Default, tags, "", "", "")) } } } } else { for _, item := range query.Metrics { if item.Default != "" { // no templating support if no data got if strings.Contains(string(item.Metric), "{{") { continue } for _, value := range item.Tags { if strings.Contains(value, "{{") { continue } } tags := newcore.AddTags.Copy().Merge(query.Tags).Merge(item.Tags) items = append(items, newcore.NewDP(c.prefix, item.Metric.Clean(), item.Default, tags, "", "", "")) } } } } // for queries for _, dp := range items { logging.Tracef("wmi DataPoint -> %+v", dp) } return newcore.CollectResult{ Collected: items, Next: time.Now().Add(c.interval), Err: nil, } }
func (c *InfluxdbClient_v088) Write(bp client090.BatchPoints) (*client090.Response, error) { // logging.Debug("InfluxdbClient_v088.Write") // v0.9.0-rc7 [ // { // Name: "a", // Timestamp: "1", // Fields: {"f1": "v1", "f2": "v2"}, // Precision: "s" // } // ] // v0.8.8 [ // { // "name": "log_lines", // "columns": ["time", "sequence_number", "line"], // "points": [ // [1400425947368, 1, "this line is first"], // [1400425947368, 2, "and this is second"] // ] // } // ] var series []*client088.Series for _, p := range bp.Points { s := client088.Series{} // s.Name = p.Name name, err := newcore.FlatMetricKeyAndTags(c.flat_tpl, p.Measurement, p.Tags) if err != nil { logging.Error("FlatMetricKeyAndTags Failed!", err) return nil, err } s.Name = name point := []interface{}{} // time, first s.Columns = append(s.Columns, "time") point = append(point, p.Time.UnixNano()/1000000) // then others for key, value := range p.Fields { s.Columns = append(s.Columns, key) point = append(point, value) } s.Points = append(s.Points, point) logging.Tracef("influxdb --> %+v", s) series = append(series, &s) } // pretty.Println(series) err := c.client.WriteSeriesWithTimePrecision(series, "ms") if err != nil { logging.Errorf("InfluxdbClient_v088.Write.WriteSeriesWithTimePrecision Error: %v", err) } else { logging.Trace("InfluxdbClient_v088.Write Done No Error") } return nil, err }
func (c *kafka_subscription) consume() (<-chan newcore.MultiDataPoint, error) { logging.Info("consume") var out = make(chan newcore.MultiDataPoint) var err error var consumers []sarama.PartitionConsumer if c.master == nil { err = c.connect() if err != nil { return nil, err } } for _, c := range c.consumers { c.Close() } c.consumers = nil partitions, err := c.master.Partitions(c.opts.Topic) if err != nil { return nil, fmt.Errorf("Cannot get partitions: %v", err) } logging.Infof("partitions: %v", partitions) err = c.state.Load() if err != nil { logging.Errorf("failed to load kafka state: %v", err) } else { logging.Infof("state: %+v", c.state.State()) } flush_offset := true for _, part := range partitions { offset := int64(0) if c.state.Length() > 0 { offset = c.state.Offset(c.opts.Topic, part) if offset < 0 { offset = 0 } } consumer, err := c.master.ConsumePartition(c.opts.Topic, part, offset) if err != nil { logging.Criticalf("Cannot consumer partition: %d, %v", part, err) return nil, fmt.Errorf("Cannot consumer partition: %d, %v", part, err) } logging.Infof("created consumer: %v", consumer) consumers = append(consumers, consumer) go func(flush_offset bool, topic string, part int32, out chan newcore.MultiDataPoint, consumer sarama.PartitionConsumer) { logging.Infof("start goroutine to consume: part: %d, %v", part, &consumer) var items newcore.MultiDataPoint var flush_tick = time.Tick(c.flush_interval) var _out chan newcore.MultiDataPoint var startConsume <-chan *sarama.ConsumerMessage var flushing bool var offset int64 for { if (flushing == true && len(items) > 0) || len(items) >= c.max_batch_size { _out = out // enable output branch startConsume = nil // disable consuming branch } else if len(items) < c.max_batch_size { startConsume = consumer.Messages() // enable consuming branch _out = nil // disable output branch } select { case message := <-startConsume: offset = message.Offset dp, err := newcore.NewDPFromJson(message.Value) if err != nil { logging.Tracef("[ERROR]failed to parse datapoint: %v", err) } logging.Tracef("kafka dp --> %v", dp) items = append(items, dp) case <-flush_tick: flushing = true // every part consumer will record offset with interval c.state.Update(topic, part, offset) // only 1 goroutine will save state to disk if flush_offset == true && c.state.Changed() == true { logging.Tracef("flusing to disk: part: %d, offset: %d", part, offset) c.state.Save() } case _out <- items: items = nil // clear items _out = nil // disable output branch startConsume = consumer.Messages() // enable consuming branch flushing = false // disable flusing case err := <-consumer.Errors(): logging.Infof("consumer.Errors: part:%d, %v", part, err) } } }(flush_offset, c.opts.Topic, part, out, consumer) flush_offset = false // only 1st goroutine is responsible for flushing state back into disk } c.consumers = consumers return out, nil }
// loop periodically fecthes Items, sends them on s.updates, and exits // when Close is called. // CollectOnce asynchronously. func (s sub) loop() { var ( collectDone chan CollectResult // if non-nil, CollectOnce is running pending []MultiDataPoint next time.Time err error first MultiDataPoint updates chan MultiDataPoint startCollect <-chan time.Time collectDelay time.Duration now = time.Now() ) for { startCollect = nil first = nil updates = nil if now = time.Now(); next.After(now) { collectDelay = next.Sub(now) } if s.collector.IsEnabled() && collectDone == nil && len(pending) < s.maxPending { startCollect = time.After(collectDelay) // enable collect case } if len(pending) > 0 { first = pending[0] updates = s.updates // enable send case } select { case <-startCollect: collectDone = make(chan CollectResult, 1) // enable CollectOnce // TODO: add unittest for this. // collectOnce should be call async, otherwise, will block consuming result. // TODO: leaking param c go func() { // defer func() { // if r := recover(); r != nil { // logging.Criticalf("---------- Recovered -------%v", r) // } // }() logging.Tracef("running collector.CollectOnce: %s", s.collector.Name()) res := s.collector.CollectOnce() collectDone <- res logging.Debugf("finished collector.CollectOnce: %s, count: %d", s.collector.Name(), len(res.Collected)) }() case result := <-collectDone: // logging.Info("result := <- collectDone", result) collectDone = nil next, err = result.Next, result.Err if err != nil { // sub default delay if error happens while collecting data //TODO: add unittest for delay_on_error. delay_on_error vs collector.interval ??? logging.Errorf("ERROR: collector(%s) error: %v", s.collector.Name(), err) next = time.Now().Add(s.delay_on_error) break } //TODO: add unittest if next.Sub(time.Now()) < minimal_next_interval { next = time.Now().Add(minimal_next_interval) } if result.Collected != nil { // don't consuming nil collected result. pending = append(pending, result.Collected) } case errc := <-s.closing: // clean up collector resource. errc <- s.collector.Close() close(s.updates) return case updates <- first: pending = pending[1:] } } }
func GetSystemInfo() (SystemInfo, error) { var info = SystemInfo{} cs_info, err := wmi.QueryWmi("SELECT Name, Domain, NumberOfLogicalProcessors, NumberOfProcessors, TotalPhysicalMemory FROM Win32_ComputerSystem") logging.Tracef("err: %v, cs_info: %v", err, cs_info) if err != nil { return info, err } if len(cs_info) != 1 { return info, fmt.Errorf("invalid query result: %v", cs_info) } cs_info_m := cs_info[0] info.Name = newcore.GetHostname() if string_value, ok := cs_info_m["Domain"]; ok == true { info.Domain = string_value } if string_value, ok := cs_info_m["NumberOfLogicalProcessors"]; ok == true { int_value, err := strconv.Atoi(string_value) if err != nil { return info, err } info.NumberOfLogicalProcessors = int_value } if string_value, ok := cs_info_m["NumberOfProcessors"]; ok == true { int_value, err := strconv.Atoi(string_value) if err != nil { return info, err } info.NumberOfProcessors = int_value } if string_value, ok := cs_info_m["TotalPhysicalMemory"]; ok == true { int_value, err := strconv.Atoi(string_value) if err != nil { return info, err } info.TotalPhsycialMemoryKb = int_value / 1024 } os_info, err := wmi.QueryWmi("Select Caption, CSDVersion, OSArchitecture, Version From Win32_OperatingSystem") logging.Tracef("err: %v, os_info: %v", err, os_info) if err != nil { return info, err } if len(os_info) != 1 { return info, fmt.Errorf("invalid query result: %v", os_info) } os_info_m := os_info[0] if string_value, ok := os_info_m["Caption"]; ok == true { info.OS = string_value } csdversion := "" if string_value, ok := os_info_m["CSDVersion"]; ok == true { csdversion = string_value } if string_value, ok := os_info_m["Version"]; ok == true { version := string_value info.OSVersion = fmt.Sprintf("%s - %s", csdversion, version) } if string_value, ok := os_info_m["OSArchitecture"]; ok == true { if string_value == "64-bit" { info.Architecture = 64 } else { info.Architecture = 32 } } //FIXME: we may not be able to get ip list. ipv4list, err := utils.Ipv4List() if err != nil { logging.Errorf("failed to get ipv4 list: %v", err) // return info, err } else { info.IPv4 = ipv4list } return info, nil }
func main() { defer utils.Recover_and_log() logging.Debug("hickwall main ------- sub packages init process finished") app := cli.NewApp() app.Name = "hickwall" app.Usage = "collect metrics effortlessly." app.Version = fmt.Sprintf("%s - %s", Version, Build) app.Commands = []cli.Command{ //TODO: configuration test, reload // { // Name: "config", // ShortName: "", // Usage: "config", // Subcommands: []cli.Command{ // { // Name: "test", // ShortName: "", // Usage: "test", // Action: command.CmdConfigTest, // }, // { // Name: "reload", // ShortName: "", // Usage: "reload", // Action: command.CmdConfigReload, // }, // }, // }, { Name: "service", ShortName: "s", Usage: "service", Subcommands: []cli.Command{ { Name: "status", ShortName: "s", Usage: "status", Action: command.CmdServiceStatus, }, { Name: "statuscode", Usage: "statuscode(internal use only.)", Action: command.CmdServiceStatusCode, }, { Name: "install", ShortName: "i", Usage: "install service", Action: command.CmdServiceInstall, }, { Name: "remove", ShortName: "d", Usage: "remove service", Action: command.CmdServiceRemove, }, { Name: "start", ShortName: "g", Usage: "start service.", Action: command.CmdServiceStart, }, { Name: "stop", ShortName: "x", Usage: "stop service.", Action: command.CmdServiceStop, }, { Name: "restart", ShortName: "n", Usage: "restart service", Action: command.CmdServiceRestart, }, }, }, { Name: "version", ShortName: "v", Usage: "show version info", Action: func(c *cli.Context) { fmt.Printf("%s version: %s\n", app.Name, app.Version) }, }, { Name: "daemon", ShortName: "d", Usage: "run as daemon", Action: func(c *cli.Context) { run(false, false) }, }, { Name: "config", Usage: "show config info", Action: command.CmdShowConfig, }, } if len(os.Args) >= 2 { logging.Debug("executing commands") app.Run(os.Args) } else { isIntSess, err := servicelib.IsAnInteractiveSession() if err != nil { logging.Errorf("failed to determine if we are running in an interactive session or not: %v", err) return } if !isIntSess { logging.Debug("running as service") run(false, true) return } //print help here. app.Run(os.Args) } return }
func (f *fanout) loop() { logging.Debug("fanout.loop() started") var ( startConsuming <-chan MultiDataPoint ) startConsuming = f.sub.Updates() for idx, _ := range f.chan_pubs { closing := make(chan chan error) f.closing_list = append(f.closing_list, closing) go f.cosuming(idx, closing) } main_loop: for { select { case md, opening := <-startConsuming: if opening == false { f.Close() break main_loop } for idx, p := range f.pending { _ = idx if len(p) < maxPending { p <- md } else { logging.Warnf("fanout.loop.main_loop: pending channel is jamming: bkname: %s\n", f.bks[idx].Name()) } } case errc := <-f.closing: startConsuming = nil // stop consuming from sub for idx, bk := range f.bks { // closing consuming of each backend consuming_errc := make(chan error) f.closing_list[idx] <- consuming_errc <-consuming_errc // close backend. go func() { consuming_errc <- bk.Close() }() timeout := time.After(time.Duration(1) * time.Second) wait_bk_close: for { select { case <-consuming_errc: break wait_bk_close case <-timeout: logging.Errorf("backend(%s) is blocking the fanout closing process!\n", bk.Name()) break wait_bk_close } } } logging.Debug("fanout.loop() closed all consuming backends") errc <- nil break main_loop } } logging.Debug("fanout.loop() exit main_loop") timeout := time.After(time.Duration(1) * time.Second) closing_sub := make(chan error) go func() { closing_sub <- f.sub.Close() }() for { select { case <-closing_sub: logging.Debug("fanout.loop() returned") return case <-timeout: logging.Errorf("Subscription(%s) is blocking the fanout closing process! forced return with timeout\n", f.sub.Name()) return } } }
func WatchRuntimeConfFromEtcd(etcd_machines []string, etcd_path string, stop chan error) <-chan RespConfig { logging.Info("WatchRuntimeConfFromEtcd Started") var ( out = make(chan RespConfig, 1) sleep_duration = time.Second // sleep_duration = time.Second * 5 ) if stop == nil { panic("stop chan is nil") } go func() { var ( the_first_time = true watching = false chGetConf <-chan time.Time chWaching <-chan time.Time ) client := etcd.NewClient(etcd_machines) cached_conf, _ := LoadRuntimeConfFromPath(CONF_CACHE_PATH) watch_stop := make(chan bool, 0) loop: for { if watching == false && chGetConf == nil { if the_first_time == true { chGetConf = time.After(0) } else { chGetConf = time.After(sleep_duration) } } if watching == true && chWaching == nil { chWaching = time.After(sleep_duration) } select { case <-stop: logging.Info("stop watching etcd.") watch_stop <- true logging.Info("watching etcd stopped.") break loop case <-chGetConf: the_first_time = false chGetConf = nil tmp_conf, err := getRuntimeConfFromEtcd(client, etcd_path) if err != nil { if cached_conf != nil { // if failed to get config from etcd but we have a cached copy. then use // this cached version first. out <- RespConfig{cached_conf, nil} cached_conf = nil // cached copy only need to emit once. } else { out <- RespConfig{nil, logging.SErrorf("failed to getRuntimeConfFromEtcd: %v", err)} } } else { out <- RespConfig{tmp_conf, nil} watching = true } case <-chWaching: chWaching = nil logging.Infof("watching etcd remote config: %s, %s", etcd_machines, etcd_path) resp, err := client.Watch(etcd_path, 0, false, nil, watch_stop) if err != nil { logging.Errorf("watching etcd error: %v", err) break } r := bytes.NewReader([]byte(resp.Node.Value)) tmp_conf, err := ReadRuntimeConfig(r) if err != nil { logging.Errorf("watching etcd. changes detected but faild to parse config: %v", err) break } logging.Infof("a new config is comming") out <- RespConfig{tmp_conf, nil} } } }() return out }
func runAsPrimaryService(args []string, r <-chan svc.ChangeRequest, changes chan<- svc.Status) (ssec bool, errno uint32) { logging.Debug("runAsPrimaryService started") defer utils.Recover_and_log() const cmdsAccepted = svc.AcceptStop | svc.AcceptShutdown changes <- svc.Status{State: svc.StartPending} changes <- svc.Status{State: svc.Running, Accepts: cmdsAccepted} //http://localhost:6060/debug/pprof/ // utils.HttpPprofServe(6060) // after := time.After(time.Duration(8) * time.Minute) // f, _ := os.Create("d:\\cpu-" + strconv.Itoa(pid) + ".pprof") // pprof.StartCPUProfile(f) // defer pprof.StopCPUProfile() // cfg := profile.Config{ // MemProfile: true, // ProfilePath: "./pprofs/", // store profiles in current directory // NoShutdownHook: true, // do not hook SIGINT // } // p := profile.Start(&cfg) // // defer p.Stop() // utils.StartCPUProfile() // defer utils.StopCPUProfile() // go func() { // for { // <-time.After(time.Second * time.Duration(15)) // debug.FreeOSMemory() // } // }() err := hickwall.Start() if err != nil { logging.Critical("Failed To Start hickwall: %v", err) return } else { defer hickwall.Stop() } logging.Debug("service event handling loop started ") // major loop for signal processing. loop: for { select { case c := <-r: switch c.Cmd { case svc.Interrogate: changes <- c.CurrentStatus // testing deadlock from https://code.google.com/p/winsvc/issues/detail?id=4 time.Sleep(100 * time.Millisecond) changes <- c.CurrentStatus case svc.Stop, svc.Shutdown: break loop default: logging.Errorf("unexpected control request #%d", c) } } } changes <- svc.Status{State: svc.StopPending} logging.Debug("runAsPrimaryService stopped") return }
func (b *fileBackend) loop() { var ( startConsuming <-chan newcore.MultiDataPoint try_open_file_once chan bool try_open_file_tick <-chan time.Time buf = bytes.NewBuffer(make([]byte, 0, 1024)) ) startConsuming = b.updates logging.Debugf("filebackend.loop started") for { if b.output == nil && try_open_file_once == nil && try_open_file_tick == nil { startConsuming = nil // disable consuming try_open_file_once = make(chan bool) // log.Println("try to open file the first time.") // try to open file the first time async. go func() { err := b.openFile() if b.output != nil && err == nil { // log.Println("openFile first time OK", b.output) try_open_file_once <- true } else { logging.Errorf("filebackend trying to open file but failed: %s", err) try_open_file_once <- false } }() } select { case md := <-startConsuming: for _, p := range md { if b.output != nil { res, _ := p.MarshalJSON() buf.Write(res) buf.Write([]byte("\n")) b.output.Write(buf.Bytes()) buf.Reset() } } case opened := <-try_open_file_once: try_open_file_once = nil // disable this branch if !opened { // failed open it the first time, // then we try to open file with time interval, until opened successfully. logging.Error("open the first time failed, try to open with interval of 1s") try_open_file_tick = time.Tick(time.Second * 1) } else { logging.Debugf("file opened the first time.") startConsuming = b.updates } case <-try_open_file_tick: // try to open with interval err := b.openFile() if b.output != nil && err == nil { // finally opened. try_open_file_tick = nil startConsuming = b.updates } else { logging.Errorf("filebackend trying to open file but failed: %s", err) } case errc := <-b.closing: logging.Debug("filebackend.loop closing") startConsuming = nil // stop comsuming errc <- nil close(b.updates) logging.Debug("filebackend.loop stopped") return } } }
func (b *influxdbBackend) loop() { var ( startConsuming <-chan newcore.MultiDataPoint try_create_client_once chan bool try_create_client_tick <-chan time.Time ) startConsuming = b.updates logging.Debug("influxdb backend loop started ") for { if b.output == nil && try_create_client_once == nil && try_create_client_tick == nil { startConsuming = nil // disable consuming try_create_client_once = make(chan bool) // try to create influxdb client the first time async. go func() { err := b.newInfluxdbClientFromConf() if err == nil { try_create_client_once <- true } else { try_create_client_once <- false } }() } //TODO: Flush_interval and Max_batch_size select { case md := <-startConsuming: if b.output != nil { points := []client.Point{} for _, p := range md { // logging.Debug(p.Metric.Clean()) // logging.Debug(utils.Convert(p.Value)) points = append(points, client.Point{ Measurement: p.Metric.Clean(), Time: p.Timestamp, Fields: map[string]interface{}{ "value": utils.Convert(p.Value), }, Tags: p.Tags, //TODO: Tags }) } write := client.BatchPoints{ Database: b.conf.Database, RetentionPolicy: b.conf.RetentionPolicy, Points: points, } // logging.Debugf("write: count: %d", len(md)) //FIXME: connection timeout? resp, err := b.output.Write(write) if err != nil { logging.Errorf("failed to write into influxdb: %v, %+v", err, resp) } } case opened := <-try_create_client_once: try_create_client_once = nil // disable this branch if !opened { // failed open it the first time, // then we try to open file with time interval, until opened successfully. logging.Debug("open the first time failed, try to open with interval of 1s") try_create_client_tick = time.Tick(time.Second * 1) } else { startConsuming = b.updates } case <-try_create_client_tick: // try to open with interval err := b.newInfluxdbClientFromConf() if b.output != nil && err == nil { // finally opened. try_create_client_tick = nil startConsuming = b.updates } else { logging.Critical("influxdb backend trying to open file but failed: %s", err) } case errc := <-b.closing: // fmt.Println("errc <- b.closing") logging.Debug("influxdb backend .loop closing") startConsuming = nil // stop comsuming errc <- nil close(b.updates) logging.Debug("influxdb backend .loop stopped") return } } }