Example #1
0
func Test_strategy_etcd_LoadConfigStrategyEtcd_Nil(t *testing.T) {
	logging.SetLevel("debug")
	defer close_core()

	stopCh := make(chan error)

	for idx, tcase := range nil_core_tests {
		request_cnt := 0
		ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
			request_cnt += 1
			logging.Infof("case: %d, -- we got request: cnt:%d ------------\n", idx, request_cnt)
			// pretty.Println(r)
			iswait := r.URL.Query().Get("wait")
			if strings.ToLower(iswait) == "true" {
				logging.Info("watching")
				// watch, long polling
				// time.Sleep(time.Second * 1)
			} else {
				logging.Info("getting")
			}

			v, _ := _fack_etcd_respose(1, tcase)
			fmt.Printf("case: %d, content: %s\n", idx, v)

			fmt.Fprintln(w, v)

		}))

		go new_core_from_etcd([]string{ts.URL}, "/config/host/DST54869.yml", stopCh)
		tick := time.After(time.Second * 1)
		timeout := time.After(time.Second * 2)

	main_loop:
		for {
			select {
			case <-tick:
				stopCh <- nil
				if the_core != nil {
					t.Error("the_core has been created with invalid configuration!")
					return
				} else {
					t.Log("test case successed")
					ts.Close()
					break main_loop
				}
			case <-timeout:
				t.Errorf("timed out. somethings is blocking")
				return
			}
		}
	}

}
Example #2
0
func serve_api() {
	router := httprouter.New()
	router.GET("/sys_info", protect_read(serveSysInfo, time.Second))

	router.DELETE("/registry/revoke", protect_write(serveRegistryRevoke, time.Second))

	//	router.GET("/registry", serveRegistryGet)
	//	router.POST("/registry/accept", serveRegistryAccept)

	//	router.PUT("/registry/renew", serveRegistryRenew)
	//	router.PUT("/config/renew", serveConfigRenew)

	addr := ":3031"
	if config.CoreConf.Listen_port > 0 {
		addr = fmt.Sprintf(":%d", config.CoreConf.Listen_port)
	}
	logging.Infof("api served at: %s", addr)

	api_srv_running = true
	err := http.ListenAndServe(addr, router)
	api_srv_running = false
	if err != nil {
		logging.Criticalf("api server is not running!: %v", err)
	} else {
		logging.Info("api server stopped")
	}
}
Example #3
0
func (c *kafka_subscription) loop() {
	logging.Info("loop started")

	var items newcore.MultiDataPoint
	// var tick = time.Tick(c.interval)
	var output chan newcore.MultiDataPoint
	var try_open_consumer_tick <-chan time.Time
	// var tick_reconsume chan time.Time
	var input <-chan newcore.MultiDataPoint
	// var err error

	for {
		if input == nil && try_open_consumer_tick == nil {
			logging.Info("input == nil, try_open_consumer_tick == nil")
			try_open_consumer_tick = time.After(0)
		}

		select {
		case <-try_open_consumer_tick:
			logging.Info("try_open_consumer_tick")
			_input, err := c.consume()
			if _input != nil && err == nil {
				try_open_consumer_tick = nil
			} else {
				logging.Errorf("failed to create consumers: %v", err)
				try_open_consumer_tick = time.After(time.Second)
			}
			input = _input
		case md := <-input:
			items = md[:]
			output = c.updates
			// fmt.Println
		case output <- items:
			items = nil
			output = nil
		case errc := <-c.closing:
			// clean up collector resource.
			output = nil
			close(c.updates)
			errc <- nil
			return
		}
	}
}
Example #4
0
func (c *kafka_subscription) connect() error {
	logging.Info("connect")

	sconfig := sarama.NewConfig()
	logging.Debugf("broker list: %v", c.opts.Broker_list)

	master, err := sarama.NewConsumer(c.opts.Broker_list, sconfig)
	if err != nil {
		return fmt.Errorf("Cannot connect to kafka: %v", err)
	}
	c.master = master
	return nil
}
Example #5
0
func Start() error {

	// start api serve once.
	if !api_srv_running && config.CoreConf.Enable_http_api {
		go serve_api()
	}

	if Running() == true {
		return logging.SError("one core is already running. stop it first!")
	}
	logging.Info("Starting the core.")

	switch config.ValidStrategy(config.CoreConf.Config_strategy) {
	case config.ETCD:
		logging.Info("use etcd config strategy")
		if len(config.CoreConf.Etcd_machines) <= 0 {
			return logging.SCritical("EtcdMachines is empty!!")
		}
		if config.CoreConf.Etcd_path == "" {
			return logging.SCritical("EtcdPath is empty!!")
		}
		go new_core_from_etcd(config.CoreConf.Etcd_machines, config.CoreConf.Etcd_path, done)
	case config.REGISTRY:
		logging.Info("use registry config strategy")
		if len(config.CoreConf.Registry_urls) <= 0 {
			return logging.SCritical("RegistryURLS is empty!!")
		}
		go new_core_from_registry(done)
	default:
		logging.Info("[default] use file config strategy")
		_, err := new_core_from_file()
		if err != nil {
			return logging.SError(err)
		}
	}
	return nil
}
Example #6
0
func Stop() error {
	if Running() {
		switch config.CoreConf.Config_strategy {
		case config.ETCD:
			logging.Trace("Stopping etcd strategy")
			done <- nil
		case config.REGISTRY:
			logging.Trace("Stopping registry strategy")
		default:
			logging.Trace("Stopping default file strategy")
			close_core()
		}
	}
	logging.Info("core stopped")
	return nil
}
Example #7
0
func new_core_from_registry(stop chan error) {
	logging.Debug("new_core_from_registry started")
	if stop == nil {
		logging.Panic("stop chan is nil")
	}

	if len(config.CoreConf.Registry_urls) <= 0 {
		logging.Panic("RegistryURLs is empty!!")
	}

	resp, err := load_reg_response()
	if err != nil {
		logging.Errorf("we don't have a valid registry info cached.")
		next := time.After(0)

		// round robin registry machines
		r := ring.New(len(config.CoreConf.Registry_urls))
		for i := 0; i < r.Len(); i++ {
			r.Value = config.CoreConf.Registry_urls[i]
			r = r.Next()
		}

	registry_loop:
		for {
			select {
			case <-next:
				r = r.Next()
				resp, err = do_registry(r.Value.(string))
				if err == nil {
					logging.Info("we are registry we got a valid registry response.")
					break registry_loop
				} else {
					logging.Errorf("failed to registry: %v", err)
				}
				next = time.After(newcore.Interval(config.CoreConf.Registry_delay_on_error).MustDuration(time.Minute))
			}
		}
	}

	// TODO: handle error here. like etcd_machines are not working.
	// here we got a valid registry info. get config and start to run.
	new_core_from_etcd(resp.EtcdMachines, resp.EtcdConfigPath, stop)
}
Example #8
0
func serveRegistryRevoke(w http.ResponseWriter, r *http.Request, _ httprouter.Params) {
	logging.Debugf("api /registry/revoke called from %s", r.RemoteAddr)

	err := Stop() // stop hickwall first
	if err != nil {
		http.Error(w, "Failed to Stop agent", 500)
		return
	}
	// delete registration file
	err = os.Remove(config.REGISTRY_FILEPATH)
	if err != nil {
		http.Error(w, "Failed to Delete Registration File", 500)
		return
	}
	err = Start() // restart hickwall. if we can pass registration process.
	if err != nil {
		http.Error(w, "Failed to Start agent", 500)
		return
	}
	logging.Info("agent started again.")
	return
}
Example #9
0
func (c *kafka_subscription) consume() (<-chan newcore.MultiDataPoint, error) {
	logging.Info("consume")

	var out = make(chan newcore.MultiDataPoint)
	var err error
	var consumers []sarama.PartitionConsumer
	if c.master == nil {
		err = c.connect()
		if err != nil {
			return nil, err
		}
	}

	for _, c := range c.consumers {
		c.Close()
	}
	c.consumers = nil

	partitions, err := c.master.Partitions(c.opts.Topic)
	if err != nil {
		return nil, fmt.Errorf("Cannot get partitions: %v", err)
	}
	logging.Infof("partitions: %v", partitions)

	err = c.state.Load()
	if err != nil {
		logging.Errorf("failed to load kafka state: %v", err)
	} else {
		logging.Infof("state: %+v", c.state.State())
	}

	flush_offset := true

	for _, part := range partitions {
		offset := int64(0)
		if c.state.Length() > 0 {
			offset = c.state.Offset(c.opts.Topic, part)
			if offset < 0 {
				offset = 0
			}
		}
		consumer, err := c.master.ConsumePartition(c.opts.Topic, part, offset)
		if err != nil {
			logging.Criticalf("Cannot consumer partition: %d, %v", part, err)
			return nil, fmt.Errorf("Cannot consumer partition: %d, %v", part, err)
		}
		logging.Infof("created consumer: %v", consumer)

		consumers = append(consumers, consumer)

		go func(flush_offset bool, topic string, part int32, out chan newcore.MultiDataPoint, consumer sarama.PartitionConsumer) {
			logging.Infof("start goroutine to consume: part: %d,  %v", part, &consumer)

			var items newcore.MultiDataPoint
			var flush_tick = time.Tick(c.flush_interval)
			var _out chan newcore.MultiDataPoint
			var startConsume <-chan *sarama.ConsumerMessage
			var flushing bool
			var offset int64

			for {
				if (flushing == true && len(items) > 0) || len(items) >= c.max_batch_size {
					_out = out         // enable output branch
					startConsume = nil // disable consuming branch
				} else if len(items) < c.max_batch_size {
					startConsume = consumer.Messages() // enable consuming branch
					_out = nil                         // disable output branch
				}

				select {
				case message := <-startConsume:
					offset = message.Offset
					dp, err := newcore.NewDPFromJson(message.Value)
					if err != nil {
						logging.Tracef("[ERROR]failed to parse datapoint: %v", err)
					}
					logging.Tracef("kafka dp --> %v", dp)
					items = append(items, dp)
				case <-flush_tick:
					flushing = true
					// every part consumer will record offset with interval
					c.state.Update(topic, part, offset)

					// only 1 goroutine will save state to disk
					if flush_offset == true && c.state.Changed() == true {
						logging.Tracef("flusing to disk: part: %d, offset: %d", part, offset)
						c.state.Save()
					}
				case _out <- items:
					items = nil                        // clear items
					_out = nil                         // disable output branch
					startConsume = consumer.Messages() // enable consuming branch
					flushing = false                   // disable flusing
				case err := <-consumer.Errors():
					logging.Infof("consumer.Errors: part:%d,  %v", part, err)
				}
			}
		}(flush_offset, c.opts.Topic, part, out, consumer)

		flush_offset = false // only 1st goroutine is responsible for flushing state back into disk
	}
	c.consumers = consumers
	return out, nil
}
Example #10
0
func WatchRuntimeConfFromEtcd(etcd_machines []string, etcd_path string, stop chan error) <-chan RespConfig {
	logging.Info("WatchRuntimeConfFromEtcd Started")
	var (
		out            = make(chan RespConfig, 1)
		sleep_duration = time.Second
		// sleep_duration = time.Second * 5
	)

	if stop == nil {
		panic("stop chan is nil")
	}

	go func() {
		var (
			the_first_time = true
			watching       = false
			chGetConf      <-chan time.Time
			chWaching      <-chan time.Time
		)

		client := etcd.NewClient(etcd_machines)

		cached_conf, _ := LoadRuntimeConfFromPath(CONF_CACHE_PATH)

		watch_stop := make(chan bool, 0)

	loop:
		for {
			if watching == false && chGetConf == nil {
				if the_first_time == true {
					chGetConf = time.After(0)
				} else {
					chGetConf = time.After(sleep_duration)
				}
			}

			if watching == true && chWaching == nil {
				chWaching = time.After(sleep_duration)
			}

			select {
			case <-stop:
				logging.Info("stop watching etcd.")
				watch_stop <- true
				logging.Info("watching etcd stopped.")
				break loop
			case <-chGetConf:
				the_first_time = false
				chGetConf = nil

				tmp_conf, err := getRuntimeConfFromEtcd(client, etcd_path)
				if err != nil {
					if cached_conf != nil {
						// if failed to get config from etcd but we have a cached copy. then use
						// this cached version first.
						out <- RespConfig{cached_conf, nil}
						cached_conf = nil // cached copy only need to emit once.
					} else {
						out <- RespConfig{nil, logging.SErrorf("failed to getRuntimeConfFromEtcd: %v", err)}
					}
				} else {
					out <- RespConfig{tmp_conf, nil}
					watching = true
				}
			case <-chWaching:
				chWaching = nil
				logging.Infof("watching etcd remote config: %s, %s", etcd_machines, etcd_path)
				resp, err := client.Watch(etcd_path, 0, false, nil, watch_stop)
				if err != nil {
					logging.Errorf("watching etcd error: %v", err)
					break
				}

				r := bytes.NewReader([]byte(resp.Node.Value))
				tmp_conf, err := ReadRuntimeConfig(r)
				if err != nil {
					logging.Errorf("watching etcd. changes detected but faild to parse config: %v", err)
					break
				}

				logging.Infof("a new config is comming")
				out <- RespConfig{tmp_conf, nil}
			}
		}
	}()
	return out
}
Example #11
0
func (b *kafkaBackend) loop() {
	var (
		startConsuming    <-chan newcore.MultiDataPoint
		try_connect_first chan bool
		try_connect_tick  <-chan time.Time
	)
	startConsuming = b.updates
	logging.Info("kafkaBackend.loop started")

	for {
		if b.producer == nil && try_connect_first == nil && try_connect_tick == nil {
			startConsuming = nil // disable consuming

			try_connect_first = make(chan bool)
			logging.Debug("trying to connect to kafka first time.")

			// trying to connect to kafka first time
			go func() {
				err := b.connect()
				if b.producer != nil && err == nil {
					logging.Debugf("connect kafka first time OK: %v", b.producer)
					try_connect_first <- true
				} else {
					logging.Criticalf("connect to kafka failed %s", err)
					try_connect_first <- false
				}
			}()
		}
		if startConsuming != nil {
			logging.Trace("kafkaBackend consuming started")
		}

		select {
		case md := <-startConsuming:
			for idx, p := range md {
				b.producer.Input() <- &sarama.ProducerMessage{
					Topic: b.conf.Topic_id,
					Key:   sarama.StringEncoder(p.Metric),
					Value: p,
				}
				_d, _ := p.Encode()
				logging.Tracef("kafka producer ---> %d,  %s", idx, _d)
			}
			logging.Debugf("kafkaBackend consuming finished: count: %d", len(md))
		case connected := <-try_connect_first:
			try_connect_first = nil // disable this branch
			if !connected {
				// failed open it the first time,
				// then we try to open file with time interval, until connected successfully.
				logging.Critical("connect first time failed, try to connect with interval of 1s")
				try_connect_tick = time.Tick(time.Second * 1)
			} else {
				logging.Debug("kafka connected the first time.")
				startConsuming = b.updates
			}
		case <-try_connect_tick:
			// try to connect with interval
			err := b.connect()
			if b.producer != nil && err == nil {
				// finally connected.
				try_connect_tick = nil
				startConsuming = b.updates
			} else {
				logging.Criticalf("kafka backend trying to connect but failed: %s", err)
			}
		case errc := <-b.closing:
			logging.Info("kafaBackend.loop closing")
			startConsuming = nil // stop comsuming
			errc <- nil
			close(b.updates)
			logging.Info("kafaBackend.loop closed")
			return
		}
	}
}