func Test_strategy_etcd_LoadConfigStrategyEtcd_Nil(t *testing.T) { logging.SetLevel("debug") defer close_core() stopCh := make(chan error) for idx, tcase := range nil_core_tests { request_cnt := 0 ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { request_cnt += 1 logging.Infof("case: %d, -- we got request: cnt:%d ------------\n", idx, request_cnt) // pretty.Println(r) iswait := r.URL.Query().Get("wait") if strings.ToLower(iswait) == "true" { logging.Info("watching") // watch, long polling // time.Sleep(time.Second * 1) } else { logging.Info("getting") } v, _ := _fack_etcd_respose(1, tcase) fmt.Printf("case: %d, content: %s\n", idx, v) fmt.Fprintln(w, v) })) go new_core_from_etcd([]string{ts.URL}, "/config/host/DST54869.yml", stopCh) tick := time.After(time.Second * 1) timeout := time.After(time.Second * 2) main_loop: for { select { case <-tick: stopCh <- nil if the_core != nil { t.Error("the_core has been created with invalid configuration!") return } else { t.Log("test case successed") ts.Close() break main_loop } case <-timeout: t.Errorf("timed out. somethings is blocking") return } } } }
func serve_api() { router := httprouter.New() router.GET("/sys_info", protect_read(serveSysInfo, time.Second)) router.DELETE("/registry/revoke", protect_write(serveRegistryRevoke, time.Second)) // router.GET("/registry", serveRegistryGet) // router.POST("/registry/accept", serveRegistryAccept) // router.PUT("/registry/renew", serveRegistryRenew) // router.PUT("/config/renew", serveConfigRenew) addr := ":3031" if config.CoreConf.Listen_port > 0 { addr = fmt.Sprintf(":%d", config.CoreConf.Listen_port) } logging.Infof("api served at: %s", addr) api_srv_running = true err := http.ListenAndServe(addr, router) api_srv_running = false if err != nil { logging.Criticalf("api server is not running!: %v", err) } else { logging.Info("api server stopped") } }
func (c *kafka_subscription) loop() { logging.Info("loop started") var items newcore.MultiDataPoint // var tick = time.Tick(c.interval) var output chan newcore.MultiDataPoint var try_open_consumer_tick <-chan time.Time // var tick_reconsume chan time.Time var input <-chan newcore.MultiDataPoint // var err error for { if input == nil && try_open_consumer_tick == nil { logging.Info("input == nil, try_open_consumer_tick == nil") try_open_consumer_tick = time.After(0) } select { case <-try_open_consumer_tick: logging.Info("try_open_consumer_tick") _input, err := c.consume() if _input != nil && err == nil { try_open_consumer_tick = nil } else { logging.Errorf("failed to create consumers: %v", err) try_open_consumer_tick = time.After(time.Second) } input = _input case md := <-input: items = md[:] output = c.updates // fmt.Println case output <- items: items = nil output = nil case errc := <-c.closing: // clean up collector resource. output = nil close(c.updates) errc <- nil return } } }
func (c *kafka_subscription) connect() error { logging.Info("connect") sconfig := sarama.NewConfig() logging.Debugf("broker list: %v", c.opts.Broker_list) master, err := sarama.NewConsumer(c.opts.Broker_list, sconfig) if err != nil { return fmt.Errorf("Cannot connect to kafka: %v", err) } c.master = master return nil }
func Start() error { // start api serve once. if !api_srv_running && config.CoreConf.Enable_http_api { go serve_api() } if Running() == true { return logging.SError("one core is already running. stop it first!") } logging.Info("Starting the core.") switch config.ValidStrategy(config.CoreConf.Config_strategy) { case config.ETCD: logging.Info("use etcd config strategy") if len(config.CoreConf.Etcd_machines) <= 0 { return logging.SCritical("EtcdMachines is empty!!") } if config.CoreConf.Etcd_path == "" { return logging.SCritical("EtcdPath is empty!!") } go new_core_from_etcd(config.CoreConf.Etcd_machines, config.CoreConf.Etcd_path, done) case config.REGISTRY: logging.Info("use registry config strategy") if len(config.CoreConf.Registry_urls) <= 0 { return logging.SCritical("RegistryURLS is empty!!") } go new_core_from_registry(done) default: logging.Info("[default] use file config strategy") _, err := new_core_from_file() if err != nil { return logging.SError(err) } } return nil }
func Stop() error { if Running() { switch config.CoreConf.Config_strategy { case config.ETCD: logging.Trace("Stopping etcd strategy") done <- nil case config.REGISTRY: logging.Trace("Stopping registry strategy") default: logging.Trace("Stopping default file strategy") close_core() } } logging.Info("core stopped") return nil }
func new_core_from_registry(stop chan error) { logging.Debug("new_core_from_registry started") if stop == nil { logging.Panic("stop chan is nil") } if len(config.CoreConf.Registry_urls) <= 0 { logging.Panic("RegistryURLs is empty!!") } resp, err := load_reg_response() if err != nil { logging.Errorf("we don't have a valid registry info cached.") next := time.After(0) // round robin registry machines r := ring.New(len(config.CoreConf.Registry_urls)) for i := 0; i < r.Len(); i++ { r.Value = config.CoreConf.Registry_urls[i] r = r.Next() } registry_loop: for { select { case <-next: r = r.Next() resp, err = do_registry(r.Value.(string)) if err == nil { logging.Info("we are registry we got a valid registry response.") break registry_loop } else { logging.Errorf("failed to registry: %v", err) } next = time.After(newcore.Interval(config.CoreConf.Registry_delay_on_error).MustDuration(time.Minute)) } } } // TODO: handle error here. like etcd_machines are not working. // here we got a valid registry info. get config and start to run. new_core_from_etcd(resp.EtcdMachines, resp.EtcdConfigPath, stop) }
func serveRegistryRevoke(w http.ResponseWriter, r *http.Request, _ httprouter.Params) { logging.Debugf("api /registry/revoke called from %s", r.RemoteAddr) err := Stop() // stop hickwall first if err != nil { http.Error(w, "Failed to Stop agent", 500) return } // delete registration file err = os.Remove(config.REGISTRY_FILEPATH) if err != nil { http.Error(w, "Failed to Delete Registration File", 500) return } err = Start() // restart hickwall. if we can pass registration process. if err != nil { http.Error(w, "Failed to Start agent", 500) return } logging.Info("agent started again.") return }
func (c *kafka_subscription) consume() (<-chan newcore.MultiDataPoint, error) { logging.Info("consume") var out = make(chan newcore.MultiDataPoint) var err error var consumers []sarama.PartitionConsumer if c.master == nil { err = c.connect() if err != nil { return nil, err } } for _, c := range c.consumers { c.Close() } c.consumers = nil partitions, err := c.master.Partitions(c.opts.Topic) if err != nil { return nil, fmt.Errorf("Cannot get partitions: %v", err) } logging.Infof("partitions: %v", partitions) err = c.state.Load() if err != nil { logging.Errorf("failed to load kafka state: %v", err) } else { logging.Infof("state: %+v", c.state.State()) } flush_offset := true for _, part := range partitions { offset := int64(0) if c.state.Length() > 0 { offset = c.state.Offset(c.opts.Topic, part) if offset < 0 { offset = 0 } } consumer, err := c.master.ConsumePartition(c.opts.Topic, part, offset) if err != nil { logging.Criticalf("Cannot consumer partition: %d, %v", part, err) return nil, fmt.Errorf("Cannot consumer partition: %d, %v", part, err) } logging.Infof("created consumer: %v", consumer) consumers = append(consumers, consumer) go func(flush_offset bool, topic string, part int32, out chan newcore.MultiDataPoint, consumer sarama.PartitionConsumer) { logging.Infof("start goroutine to consume: part: %d, %v", part, &consumer) var items newcore.MultiDataPoint var flush_tick = time.Tick(c.flush_interval) var _out chan newcore.MultiDataPoint var startConsume <-chan *sarama.ConsumerMessage var flushing bool var offset int64 for { if (flushing == true && len(items) > 0) || len(items) >= c.max_batch_size { _out = out // enable output branch startConsume = nil // disable consuming branch } else if len(items) < c.max_batch_size { startConsume = consumer.Messages() // enable consuming branch _out = nil // disable output branch } select { case message := <-startConsume: offset = message.Offset dp, err := newcore.NewDPFromJson(message.Value) if err != nil { logging.Tracef("[ERROR]failed to parse datapoint: %v", err) } logging.Tracef("kafka dp --> %v", dp) items = append(items, dp) case <-flush_tick: flushing = true // every part consumer will record offset with interval c.state.Update(topic, part, offset) // only 1 goroutine will save state to disk if flush_offset == true && c.state.Changed() == true { logging.Tracef("flusing to disk: part: %d, offset: %d", part, offset) c.state.Save() } case _out <- items: items = nil // clear items _out = nil // disable output branch startConsume = consumer.Messages() // enable consuming branch flushing = false // disable flusing case err := <-consumer.Errors(): logging.Infof("consumer.Errors: part:%d, %v", part, err) } } }(flush_offset, c.opts.Topic, part, out, consumer) flush_offset = false // only 1st goroutine is responsible for flushing state back into disk } c.consumers = consumers return out, nil }
func WatchRuntimeConfFromEtcd(etcd_machines []string, etcd_path string, stop chan error) <-chan RespConfig { logging.Info("WatchRuntimeConfFromEtcd Started") var ( out = make(chan RespConfig, 1) sleep_duration = time.Second // sleep_duration = time.Second * 5 ) if stop == nil { panic("stop chan is nil") } go func() { var ( the_first_time = true watching = false chGetConf <-chan time.Time chWaching <-chan time.Time ) client := etcd.NewClient(etcd_machines) cached_conf, _ := LoadRuntimeConfFromPath(CONF_CACHE_PATH) watch_stop := make(chan bool, 0) loop: for { if watching == false && chGetConf == nil { if the_first_time == true { chGetConf = time.After(0) } else { chGetConf = time.After(sleep_duration) } } if watching == true && chWaching == nil { chWaching = time.After(sleep_duration) } select { case <-stop: logging.Info("stop watching etcd.") watch_stop <- true logging.Info("watching etcd stopped.") break loop case <-chGetConf: the_first_time = false chGetConf = nil tmp_conf, err := getRuntimeConfFromEtcd(client, etcd_path) if err != nil { if cached_conf != nil { // if failed to get config from etcd but we have a cached copy. then use // this cached version first. out <- RespConfig{cached_conf, nil} cached_conf = nil // cached copy only need to emit once. } else { out <- RespConfig{nil, logging.SErrorf("failed to getRuntimeConfFromEtcd: %v", err)} } } else { out <- RespConfig{tmp_conf, nil} watching = true } case <-chWaching: chWaching = nil logging.Infof("watching etcd remote config: %s, %s", etcd_machines, etcd_path) resp, err := client.Watch(etcd_path, 0, false, nil, watch_stop) if err != nil { logging.Errorf("watching etcd error: %v", err) break } r := bytes.NewReader([]byte(resp.Node.Value)) tmp_conf, err := ReadRuntimeConfig(r) if err != nil { logging.Errorf("watching etcd. changes detected but faild to parse config: %v", err) break } logging.Infof("a new config is comming") out <- RespConfig{tmp_conf, nil} } } }() return out }
func (b *kafkaBackend) loop() { var ( startConsuming <-chan newcore.MultiDataPoint try_connect_first chan bool try_connect_tick <-chan time.Time ) startConsuming = b.updates logging.Info("kafkaBackend.loop started") for { if b.producer == nil && try_connect_first == nil && try_connect_tick == nil { startConsuming = nil // disable consuming try_connect_first = make(chan bool) logging.Debug("trying to connect to kafka first time.") // trying to connect to kafka first time go func() { err := b.connect() if b.producer != nil && err == nil { logging.Debugf("connect kafka first time OK: %v", b.producer) try_connect_first <- true } else { logging.Criticalf("connect to kafka failed %s", err) try_connect_first <- false } }() } if startConsuming != nil { logging.Trace("kafkaBackend consuming started") } select { case md := <-startConsuming: for idx, p := range md { b.producer.Input() <- &sarama.ProducerMessage{ Topic: b.conf.Topic_id, Key: sarama.StringEncoder(p.Metric), Value: p, } _d, _ := p.Encode() logging.Tracef("kafka producer ---> %d, %s", idx, _d) } logging.Debugf("kafkaBackend consuming finished: count: %d", len(md)) case connected := <-try_connect_first: try_connect_first = nil // disable this branch if !connected { // failed open it the first time, // then we try to open file with time interval, until connected successfully. logging.Critical("connect first time failed, try to connect with interval of 1s") try_connect_tick = time.Tick(time.Second * 1) } else { logging.Debug("kafka connected the first time.") startConsuming = b.updates } case <-try_connect_tick: // try to connect with interval err := b.connect() if b.producer != nil && err == nil { // finally connected. try_connect_tick = nil startConsuming = b.updates } else { logging.Criticalf("kafka backend trying to connect but failed: %s", err) } case errc := <-b.closing: logging.Info("kafaBackend.loop closing") startConsuming = nil // stop comsuming errc <- nil close(b.updates) logging.Info("kafaBackend.loop closed") return } } }