Beispiel #1
0
func (this *AccessLogger) doRotate() {
	var fname string
	_, err := os.Lstat(this.filename)
	if err == nil {
		// file exists, find a empty slot
		num := 1
		for ; err == nil && num <= 999; num++ {
			fname = this.filename + fmt.Sprintf(".%03d", num)
			_, err = os.Lstat(fname)
		}

		if err == nil {
			log.Error("Access logger unable to rotate, 30 years passed?")
			return
		}
	}

	this.fd.Close()
	this.fd = nil

	// if fd does not close, after rename, fd.Write happens
	// content will be written to new file
	err = os.Rename(this.filename, fname)
	if err != nil {
		log.Error("rename %s->%s: %v", this.filename, fname)
		return
	}

	if this.fd, err = os.OpenFile(this.filename, os.O_WRONLY|os.O_APPEND|os.O_CREATE, 0660); err != nil {
		log.Error("open(%s): %s", this.filename, err)
	}

}
Beispiel #2
0
func (this *mysqlStore) Start() error {
	if err := this.refreshFromMysql(); err != nil {
		// refuse to start if mysql conn fails
		return fmt.Errorf("manager[%s]: %v", this.Name(), err)
	}

	go func() {
		ticker := time.NewTicker(this.cf.Refresh)
		defer ticker.Stop()

		for {
			select {
			case <-ticker.C:
				if err := this.refreshFromMysql(); err != nil {
					log.Error(err.Error())
				} else {
					log.Info("manager refreshed from mysql")
				}

			case <-this.refreshCh:
				if err := this.refreshFromMysql(); err != nil {
					log.Error(err.Error())
				} else {
					log.Info("manager forced to refresh from mysql")
				}

			case <-this.shutdownCh:
				log.Info("mysql manager stopped")
				return
			}
		}
	}()

	return nil
}
Beispiel #3
0
func (this *Client) Warmup() {
	var (
		sess *mgo.Session
		err  error
		t1   = time.Now()
	)
	for retries := 0; retries < 3; retries++ {
		for _, server := range this.selector.ServerList() {
			sess, err = this.getConn(server.Uri())
			if err != nil {
				log.Error("Warmup %v fail: %s", server.Uri(), err)
				break
			} else {
				this.putFreeConn(server.Uri(), sess)
			}
		}

		if err == nil {
			break
		}
	}

	if err == nil {
		log.Trace("Mongodb warmup within %s: %+v",
			time.Since(t1), this.freeconns)
	} else {
		log.Error("Mongodb failed to warmup within %s: %s",
			time.Since(t1), err)
	}

}
Beispiel #4
0
func (this *Ping) diagnose() {
	this.zkzone.ForSortedClusters(func(zkcluster *zk.ZkCluster) {
		registeredBrokers := zkcluster.RegisteredInfo().Roster
		for _, broker := range registeredBrokers {
			log.Debug("ping %s", broker.Addr())

			kfk, err := sarama.NewClient([]string{broker.Addr()}, sarama.NewConfig())
			if err != nil {
				log.Error("%25s %30s %s", broker.Addr(), broker.NamedAddr(), color.Red(err.Error()))

				continue
			}

			_, err = kfk.Topics() // kafka didn't provide ping, so use Topics() as ping
			if err != nil {
				log.Error("%25s %30s %s", broker.Addr(), broker.NamedAddr(), color.Red(err.Error()))
			} else {
				if !this.problematicMode {
					log.Info("%25s %30s %s", broker.Addr(), broker.NamedAddr(), color.Green("ok"))
				}
			}
			kfk.Close()
		}
	})

}
Beispiel #5
0
func (q *queue) housekeeping() {
	defer func() {
		log.Trace("queue[%s] housekeeping done", q.ident())
		q.wg.Done()
	}()

	log.Trace("queue[%s] start housekeeping...", q.ident())

	purgeTick := time.NewTicker(q.purgeInterval)
	defer purgeTick.Stop()

	cursorChkpnt := time.NewTicker(time.Second)
	defer cursorChkpnt.Stop()

	for {
		select {
		case <-purgeTick.C:
			if err := q.Purge(); err != nil {
				log.Error("queue[%s] purge: %s", q.ident(), err)
			}

		case <-cursorChkpnt.C:
			if err := q.cursor.dump(); err != nil {
				log.Error("queue[%s] cursor checkpoint: %s", q.ident(), err)
			}

		case <-q.quit:
			return
		}
	}
}
Beispiel #6
0
// InfluxDB starts a InfluxDB reporter which will post the metrics from the given registry at each d interval.
// CREATE RETENTION POLICY two_hours ON food_data DURATION 2h REPLICATION 1 DEFAULT
// SHOW RETENTION POLICIES ON food_data
// CREATE CONTINUOUS QUERY cq_30m ON food_data BEGIN SELECT mean(website) AS mean_website,mean(phone) AS mean_phone INTO food_data."default".downsampled_orders FROM orders GROUP BY time(30m) END
func InfluxDB(hostname string, r metrics.Registry, interval time.Duration,
	url, database, username, password string, stop chan struct{}) {
	u, err := uurl.Parse(url)
	if err != nil {
		log.Error("unable to parse InfluxDB url %s. err=%v", url, err)
		return
	}

	rep := &reporter{
		reg:      r,
		interval: interval,
		stop:     stop,
		url:      *u,
		database: database,
		hostname: hostname,
		username: username,
		password: password,
	}
	if err := rep.makeClient(); err != nil {
		log.Error("unable to make InfluxDB client. err=%v", err)
		return
	}

	rep.run()
}
Beispiel #7
0
func (this *Start) reloadHAproxy() (err error) {
	var cmd *exec.Cmd = nil
	waitStartCh := make(chan struct{})
	if this.starting {
		log.Info("haproxy starting")
		cmd = exec.Command(this.command, "-f", configFile) // TODO use absolute path
		this.starting = false

		go func() {
			<-waitStartCh
			log.Info("haproxy started")
			if err := cmd.Wait(); err != nil {
				log.Error("haproxy: %v", err)
			}
		}()
	} else {
		shellScript := fmt.Sprintf("%s -f %s/%s -sf `cat %s/%s`",
			this.command, this.root, configFile, this.root, haproxyPidFile)
		log.Info("haproxy reloading: %s", shellScript)
		cmd = exec.Command("/bin/sh", "-c", shellScript)
		go func() {
			<-waitStartCh
			log.Info("haproxy reloaded")
			if err := cmd.Wait(); err != nil {
				log.Error("haproxy: %v", err)
			}
		}()
	}

	if err = cmd.Start(); err == nil {
		waitStartCh <- struct{}{}
	}

	return err
}
Beispiel #8
0
func loadTemplates() {
	if config.faeTemplateFile != "" {
		body, err := ioutil.ReadFile(config.faeTemplateFile)
		if err != nil {
			log.Error("template[%s]: %s", config.faeTemplateFile, err)
		} else {
			faeTemplateContents = string(body)

			log.Info("template[%s] loaded", config.faeTemplateFile)
		}
	}

	if config.actorTemplateFile != "" {
		body, err := ioutil.ReadFile(config.actorTemplateFile)
		if err != nil {
			log.Error("template[%s]: %s", config.actorTemplateFile, err)
		} else {
			maintainTemplateContents = string(body)

			log.Info("template[%s] loaded", config.actorTemplateFile)
		}
	}

	if config.maintainTemplateFile != "" {
		body, err := ioutil.ReadFile(config.maintainTemplateFile)
		if err != nil {
			log.Error("template[%s]: %s", config.maintainTemplateFile, err)
		} else {
			maintainTemplateContents = string(body)

			log.Info("template[%s] loaded", config.maintainTemplateFile)
		}
	}
}
Beispiel #9
0
func (f *File) Attr(ctx context.Context, o *fuse.Attr) error {
	f.RLock()
	defer f.RUnlock()

	*o = f.attr

	// calculate size
	if !f.opened {
		if err := f.dir.reconnectKafkaIfNecessary(); err != nil {
			return err
		}

		latestOffset, err := f.dir.GetOffset(f.topic, f.partitionId, sarama.OffsetNewest)
		if err != nil {
			log.Error(err)

			return err
		}
		oldestOffset, err := f.dir.GetOffset(f.topic, f.partitionId, sarama.OffsetOldest)
		if err != nil {
			log.Error(err)

			return err
		}

		o.Size = uint64(latestOffset - oldestOffset)
	} else {
		o.Size = uint64(len(f.content))
	}

	log.Trace("File Attr, topic=%s, partitionId=%d, size=%d", f.topic, f.partitionId, o.Size)

	return nil
}
Beispiel #10
0
func (this *Start) shutdown() {
	// kill haproxy
	log.Info("killling haproxy processes")

	f, e := os.Open(haproxyPidFile)
	if e != nil {
		log.Error("shutdown %v", e)
		return
	}

	reader := bufio.NewReader(f)
	for {
		l, e := gio.ReadLine(reader)
		if e != nil {
			// EOF
			break
		}

		pid, _ := strconv.Atoi(string(l))
		p := &os.Process{
			Pid: pid,
		}
		if err := p.Kill(); err != nil {
			log.Error(err)
		} else {
			log.Info("haproxy[%d] terminated", pid)
		}
	}

	log.Info("removing %s", haproxyPidFile)
	os.Remove(haproxyPidFile)
}
Beispiel #11
0
func (this *WatchSlowlog) updateRedisSlowlog(wg *sync.WaitGroup, host string, port int, tag string) {
	defer wg.Done()

	spec := redis.DefaultSpec().Host(host).Port(port)
	client, err := redis.NewSynchClientWithSpec(spec)
	if err != nil {
		log.Error("redis[%s:%d]: %v", host, port, err)
		return
	}
	defer client.Quit()

	n, err := client.SlowlogLen()
	if err != nil {
		log.Error("redis[%s:%d]: %v", host, port, err)
		return
	}

	if n == 0 {
		return
	}

	this.mu.Lock()
	this.slows[tag] = metrics.NewRegisteredGauge(tag+"redis.slowlog", nil)
	this.slows[tag].Update(n)
	this.mu.Unlock()
}
Beispiel #12
0
func (this *haproxyMetrics) updateMetrics(records map[string]map[string]int64) (err error) {

	for svcName, svcCols := range records {
		//find svc
		svcMetrics, present := this.svcMetricsMap[svcName]
		if !present {
			log.Error("svcName[%s] not in svcMetricsMap[%#v]", svcName, this.svcMetricsMap)
			err = ErrUnsupService
			return err
		}

		//update col value
		for colName, colVal := range svcCols {

			//find gague
			gauge, present := svcMetrics.metricsMap[colName]
			if !present {
				log.Error("colName[%s] not in metricsMap[%#v]", colName, svcMetrics.metricsMap)
				err = ErrMetricsNotFound
				return err
			}

			//update gauge
			gauge.Update(colVal)
		}

	}

	return
}
Beispiel #13
0
// CallSOS will send SOS message to the zone wide kguard leader.
func (this *ZkZone) CallSOS(caller string, msg string) {
	log.Critical("SOS[%s] %s: sending...", caller, msg)

	// kguard leader might float, so refresh on each SOS message
	kguards, err := this.KguardInfos()
	if err != nil {
		log.Error("SOS[%s] %s: %v", caller, msg, err)
		return
	}

	leader := kguards[0]
	request := gorequest.New().Timeout(time.Second * 10)
	res, body, errs := request.Post(fmt.Sprintf("http://%s:%d", leader.Host, telemetry.SOSPort)).
		Set("User-Agent", fmt.Sprintf("sos-go-%s", gafka.BuildId)).
		Set(telemetry.SOSIdentHeader, caller).
		End()
	if len(errs) > 0 {
		log.Error("SOS[%s] %s: %+v", caller, msg, errs)
		return
	}

	if res.StatusCode != http.StatusAccepted {
		log.Error("SOS[%s] %s: HTTP %s %s", caller, msg, http.StatusText(res.StatusCode), body)
		return

	}

	log.Info("SOS[%s] %s: sent ok", caller, msg)
}
Beispiel #14
0
// @rest POST /v1/jobs/:appid/:topic/:ver
func (this *manServer) createJobHandler(w http.ResponseWriter, r *http.Request, params httprouter.Params) {
	topic := params.ByName(UrlParamTopic)
	if !manager.Default.ValidateTopicName(topic) {
		log.Warn("illegal topic: %s", topic)

		writeBadRequest(w, "illegal topic")
		return
	}

	realIp := getHttpRemoteIp(r)

	if !this.throttleAddTopic.Pour(realIp, 1) {
		writeQuotaExceeded(w)
		return
	}

	hisAppid := params.ByName(UrlParamAppid)
	appid := r.Header.Get(HttpHeaderAppid)
	pubkey := r.Header.Get(HttpHeaderPubkey)
	ver := params.ByName(UrlParamVersion)
	if !manager.Default.AuthAdmin(appid, pubkey) {
		log.Warn("suspicous create job %s(%s) {appid:%s pubkey:%s topic:%s ver:%s}",
			r.RemoteAddr, realIp, appid, pubkey, topic, ver)

		writeAuthFailure(w, manager.ErrAuthenticationFail)
		return
	}

	cluster, found := manager.Default.LookupCluster(hisAppid)
	if !found {
		log.Error("create job %s(%s) {appid:%s topic:%s ver:%s} invalid appid",
			r.RemoteAddr, realIp, hisAppid, topic, ver)

		writeBadRequest(w, "invalid appid")
		return
	}

	log.Info("create job[%s] %s(%s) {appid:%s topic:%s ver:%s}",
		appid, r.RemoteAddr, realIp, hisAppid, topic, ver)

	rawTopic := manager.Default.KafkaTopic(hisAppid, topic, ver)
	if err := job.Default.CreateJobQueue(Options.AssignJobShardId, hisAppid, rawTopic); err != nil {
		log.Error("create job[%s] %s(%s) {shard:%d appid:%s topic:%s ver:%s} %v",
			appid, r.RemoteAddr, realIp, Options.AssignJobShardId, hisAppid, topic, ver, err)

		writeServerError(w, err.Error())
		return
	}

	if err := this.gw.zkzone.CreateJobQueue(rawTopic, cluster); err != nil {
		log.Error("app[%s] %s(%s) create job: {shard:%d appid:%s topic:%s ver:%s} %v",
			appid, r.RemoteAddr, realIp, Options.AssignJobShardId, hisAppid, topic, ver, err)

		writeServerError(w, err.Error())
		return
	}

	w.WriteHeader(http.StatusCreated)
	w.Write(ResponseOk)
}
Beispiel #15
0
func (r *reporter) run() {
	intervalTicker := time.Tick(r.interval)
	//pingTicker := time.Tick(time.Second * 5)
	pingTicker := time.Tick(r.interval / 2)

	for {
		select {
		// TODO on shutdown, flush all metrics

		case <-r.stop:
			return

		case <-intervalTicker:
			if err := r.send(); err != nil {
				log.Error("unable to send metrics to InfluxDB. err=%v", err)
			}

		case <-pingTicker:
			_, _, err := r.client.Ping()
			if err != nil {
				log.Error("got error while sending a ping to InfluxDB, trying to recreate client. err=%v", err)

				if err = r.makeClient(); err != nil {
					log.Error("unable to make InfluxDB client. err=%v", err)
				}
			}
		}
	}
}
Beispiel #16
0
func (this *WatchActord) dueJobsWithin(topic string, timeSpan int64,
	now time.Time) (backlog int64, archive int64) {
	jobTable := jm.JobTable(topic)
	appid := manager.Default.TopicAppid(topic)
	aid := jm.App_id(appid)
	sql := fmt.Sprintf("SELECT count(job_id) FROM %s WHERE due_time<=?", jobTable)
	rows, err := this.mc.Query(jm.AppPool, jobTable, aid, sql, now.Unix()+timeSpan)
	if err != nil {
		log.Error("%s: %s", this.ident(), err)
		return
	}
	var n int
	for rows.Next() {
		rows.Scan(&n)
	}
	rows.Close()
	backlog += int64(n)

	archiveTable := jm.HistoryTable(topic)
	sql = fmt.Sprintf("SELECT count(job_id) FROM %s WHERE due_time>=?", archiveTable)
	rows, err = this.mc.Query(jm.AppPool, archiveTable, aid, sql, now.Unix()-timeSpan)
	if err != nil {
		log.Error("%s: %s", this.ident(), err)
		return
	}
	for rows.Next() {
		rows.Scan(&n)
	}
	rows.Close()
	archive += int64(n)

	return

}
Beispiel #17
0
// TODO batch DELETE/INSERT for better performance.
func (this *JobExecutor) handleDueJobs(wg *sync.WaitGroup) {
	defer wg.Done()

	var (
		// zabbix maintains a in-memory delete queue
		// delete from history_uint where itemid=? and clock<min_clock
		sqlDeleteJob = fmt.Sprintf("DELETE FROM %s WHERE job_id=?", this.table)

		sqlInsertArchive = fmt.Sprintf("INSERT INTO %s(job_id,payload,ctime,due_time,etime,actor_id) VALUES(?,?,?,?,?,?)",
			jm.HistoryTable(this.topic))
		sqlReinject = fmt.Sprintf("INSERT INTO %s(job_id, payload, ctime, due_time) VALUES(?,?,?,?)", this.table)
	)
	for {
		select {
		case <-this.stopper:
			return

		case item := <-this.dueJobs:
			now := time.Now()
			affectedRows, _, err := this.mc.Exec(jm.AppPool, this.table, this.aid, sqlDeleteJob, item.JobId)
			if err != nil {
				log.Error("%s: %s", this.ident, err)
				continue
			}
			if affectedRows == 0 {
				// 2 possibilities:
				// - client Cancel job wins
				// - this handler is too slow and the job fetched twice in tick
				continue
			}

			log.Debug("%s land %s", this.ident, item)
			_, _, err = store.DefaultPubStore.SyncPub(this.cluster, this.topic, nil, item.Payload)
			if err != nil {
				err = hh.Default.Append(this.cluster, this.topic, nil, item.Payload)
			}
			if err != nil {
				// pub fails and hinted handoff also fails: reinject job back to mysql
				log.Error("%s: %s", this.ident, err)
				this.mc.Exec(jm.AppPool, this.table, this.aid, sqlReinject,
					item.JobId, item.Payload, item.Ctime, item.DueTime)
				continue
			}

			log.Debug("%s fired %s", this.ident, item)
			this.auditor.Trace(item.String())

			// mv job to archive table
			_, _, err = this.mc.Exec(jm.AppPool, this.table, this.aid, sqlInsertArchive,
				item.JobId, item.Payload, item.Ctime, item.DueTime, now.Unix(), this.parentId)
			if err != nil {
				log.Error("%s: %s", this.ident, err)
			} else {
				log.Debug("%s archived %s", this.ident, item)
			}

		}
	}
}
Beispiel #18
0
// @rest GET /v1/partitions/:appid/:topic/:ver
func (this *manServer) partitionsHandler(w http.ResponseWriter, r *http.Request, params httprouter.Params) {
	topic := params.ByName(UrlParamTopic)
	hisAppid := params.ByName(UrlParamAppid)
	appid := r.Header.Get(HttpHeaderAppid)
	pubkey := r.Header.Get(HttpHeaderPubkey)
	ver := params.ByName(UrlParamVersion)
	realIp := getHttpRemoteIp(r)

	cluster, found := manager.Default.LookupCluster(hisAppid)
	if !found {
		log.Error("partitions[%s] %s(%s) {app:%s topic:%s ver:%s} invalid appid",
			appid, r.RemoteAddr, realIp, hisAppid, topic, ver)

		writeBadRequest(w, "invalid appid")
		return
	}

	if !manager.Default.AuthAdmin(appid, pubkey) {
		log.Warn("suspicous partitions call from %s(%s) {cluster:%s app:%s key:%s topic:%s ver:%s}",
			r.RemoteAddr, realIp, cluster, appid, pubkey, topic, ver)

		writeAuthFailure(w, manager.ErrAuthenticationFail)
		return
	}

	log.Info("partitions[%s] %s(%s) {cluster:%s app:%s topic:%s ver:%s}",
		appid, r.RemoteAddr, realIp, cluster, hisAppid, topic, ver)

	zkcluster := meta.Default.ZkCluster(cluster)
	if zkcluster == nil {
		log.Error("suspicous partitions call from %s(%s) {cluster:%s app:%s key:%s topic:%s ver:%s} undefined cluster",
			r.RemoteAddr, realIp, cluster, appid, pubkey, topic, ver)

		writeBadRequest(w, "undefined cluster")
		return
	}

	kfk, err := sarama.NewClient(zkcluster.BrokerList(), sarama.NewConfig())
	if err != nil {
		log.Error("cluster[%s] %v", zkcluster.Name(), err)

		writeServerError(w, err.Error())
		return
	}
	defer kfk.Close()

	partitions, err := kfk.Partitions(manager.Default.KafkaTopic(hisAppid, topic, ver))
	if err != nil {
		log.Error("cluster[%s] from %s(%s) {app:%s topic:%s ver:%s} %v",
			zkcluster.Name(), r.RemoteAddr, realIp, hisAppid, topic, ver, err)

		writeServerError(w, err.Error())
		return
	}

	w.Write([]byte(fmt.Sprintf(`{"num": %d}`, len(partitions))))
}
Beispiel #19
0
func (this *WatchReplicas) report() (deadPartitions, outOfSyncPartitions int64) {
	this.Zkzone.ForSortedClusters(func(zkcluster *zk.ZkCluster) {
		brokerList := zkcluster.BrokerList()
		if len(brokerList) == 0 {
			log.Warn("cluster[%s] empty brokers", zkcluster.Name())
			return
		}

		kfk, err := sarama.NewClient(brokerList, sarama.NewConfig())
		if err != nil {
			log.Error("cluster[%s] %v", zkcluster.Name(), err)
			return
		}
		defer kfk.Close()

		topics, err := kfk.Topics()
		if err != nil {
			log.Error("cluster[%s] %v", zkcluster.Name(), err)
			return
		}

		for _, topic := range topics {
			alivePartitions, err := kfk.WritablePartitions(topic)
			if err != nil {
				log.Error("cluster[%s] topic:%s %v", zkcluster.Name(), topic, err)
				continue
			}
			partions, err := kfk.Partitions(topic)
			if err != nil {
				log.Error("cluster[%s] topic:%s %v", zkcluster.Name(), topic, err)
				continue
			}

			// some partitions are dead
			if len(alivePartitions) != len(partions) {
				deadPartitions += 1
			}

			for _, partitionID := range alivePartitions {
				replicas, err := kfk.Replicas(topic, partitionID)
				if err != nil {
					log.Error("cluster[%s] topic:%s partition:%d %v",
						zkcluster.Name(), topic, partitionID, err)
					continue
				}

				isr, _, _ := zkcluster.Isr(topic, partitionID)
				if len(isr) != len(replicas) {
					outOfSyncPartitions += 1
				}
			}
		}
	})

	return
}
Beispiel #20
0
func fetchDashboardStats(statsUri string) (v map[string]map[string]int64) {
	v = make(map[string]map[string]int64)

	client := http.Client{Timeout: time.Second * 4}
	resp, err := client.Get(statsUri)
	if err != nil {
		log.Error("%s: %v", statsUri, err)
		return
	}
	defer resp.Body.Close()

	if resp.StatusCode != http.StatusOK {
		log.Error("%s: got status %v", statsUri, resp.Status)
		return
	}

	reader := csv.NewReader(resp.Body)
	records, err := reader.ReadAll()
	if err != nil {
		log.Error("%s: %v", statsUri, err)
		return
	}

	theCols := make(map[int]string) // col:name
	for i, row := range records {
		if i == 0 {
			// header
			for j, col := range row {
				theCols[j] = col
			}
			continue
		}

		if row[1] != "BACKEND" || (row[0] != "pub" && row[0] != "sub" && row[0] != "man") {
			continue
		}

		v[row[0]] = make(map[string]int64)
		for i, col := range row {
			if _, present := colsMap[theCols[i]]; !present {
				// ignore unwanted metrics
				continue
			}
			if strings.HasPrefix(theCols[i], "#") || theCols[i] == "svname" {
				continue
			}

			n, _ := strconv.ParseInt(col, 10, 64)
			v[row[0]][theCols[i]] = n
		}
	}

	return
}
Beispiel #21
0
// DELETE /v1/jobs/:topic/:ver?id=22323
func (this *pubServer) deleteJobHandler(w http.ResponseWriter, r *http.Request, params httprouter.Params) {
	appid := r.Header.Get(HttpHeaderAppid)
	topic := params.ByName(UrlParamTopic)
	ver := params.ByName(UrlParamVersion)
	realIp := getHttpRemoteIp(r)
	if err := manager.Default.OwnTopic(appid, r.Header.Get(HttpHeaderPubkey), topic); err != nil {
		log.Error("-job[%s] %s(%s) {topic:%s, ver:%s} %s",
			appid, r.RemoteAddr, realIp, topic, ver, err)

		writeAuthFailure(w, err)
		return
	}

	_, found := manager.Default.LookupCluster(appid)
	if !found {
		log.Error("-job[%s] %s(%s) {topic:%s, ver:%s} cluster not found",
			appid, r.RemoteAddr, realIp, topic, ver)

		writeBadRequest(w, "invalid appid")
		return
	}

	jobId := r.URL.Query().Get("id")
	if len(jobId) < 18 { // jobId e,g. 341647700585877504
		writeBadRequest(w, "invalid job id")
		return
	}

	if err := job.Default.Delete(appid, manager.Default.KafkaTopic(appid, topic, ver), jobId); err != nil {
		if err == job.ErrNothingDeleted {
			// race failed, actor worker wins
			log.Warn("-job[%s] %s(%s) {topic:%s, ver:%s jid:%s} %v",
				appid, r.RemoteAddr, realIp, topic, ver, jobId, err)

			w.WriteHeader(http.StatusConflict)
			w.Write([]byte{})
			return
		}

		log.Error("-job[%s] %s(%s) {topic:%s, ver:%s jid:%s} %v",
			appid, r.RemoteAddr, realIp, topic, ver, jobId, err)

		writeServerError(w, err.Error())
		return
	}

	if Options.AuditPub {
		this.auditor.Trace("-job[%s] %s(%s) {topic:%s ver:%s UA:%s jid:%s}",
			appid, r.RemoteAddr, realIp, topic, ver, r.Header.Get("User-Agent"), jobId)
	}

	w.Write(ResponseOk)
}
Beispiel #22
0
// @rest DELETE /v1/manager/cache
func (this *manServer) refreshManagerHandler(w http.ResponseWriter, r *http.Request, params httprouter.Params) {
	appid := r.Header.Get(HttpHeaderAppid)
	pubkey := r.Header.Get(HttpHeaderPubkey)
	realIp := getHttpRemoteIp(r)

	if !manager.Default.AuthAdmin(appid, pubkey) {
		log.Warn("suspicous refresh call from %s(%s) {app:%s key:%s}",
			r.RemoteAddr, realIp, appid, pubkey)

		writeAuthFailure(w, manager.ErrAuthenticationFail)
		return
	}

	if !this.throttleAddTopic.Pour(realIp, 1) {
		writeQuotaExceeded(w)
		return
	}

	kateways, err := this.gw.zkzone.KatewayInfos()
	if err != nil {
		log.Error("refresh from %s(%s) %v", r.RemoteAddr, getHttpRemoteIp(r), err)

		writeServerError(w, err.Error())
		return
	}

	// refresh locally
	manager.Default.ForceRefresh()

	// refresh zone wide
	allOk := true
	for _, kw := range kateways {
		if kw.Id != this.gw.id {
			// notify other kateways to refresh: avoid dead loop in the network
			if err := this.gw.callKateway(kw, "PUT", "v1/options/refreshdb/true"); err != nil {
				// don't retry, just log
				log.Error("refresh from %s(%s) %s@%s: %v", r.RemoteAddr, realIp, kw.Id, kw.Host, err)

				allOk = false
			}
		}
	}

	log.Info("refresh from %s(%s) all ok: %v", r.RemoteAddr, realIp, allOk)

	if !allOk {
		writeServerError(w, "cache partially refreshed")
		return
	}

	w.Write(ResponseOk)
}
Beispiel #23
0
func (this *WatchInfluxServer) Run() {
	defer this.Wg.Done()

	if this.addr == "" {
		log.Warn("empty influxdb server addr, quit...")
		return
	}

	ticker := time.NewTicker(this.Tick)
	defer ticker.Stop()

	var (
		err                 error
		influxdbServerAlive = metrics.NewRegisteredGauge("influxdb.alive", nil)
	)

	for {
		select {
		case <-this.Stop:
			log.Info("influx.server stopped")
			return

		case <-ticker.C:
			if this.cli == nil {
				this.cli, err = client.NewHTTPClient(client.HTTPConfig{
					Addr:     this.addr,
					Username: "",
					Password: "",
				})
				if err != nil {
					log.Error("influxdb.server: %v", err)
					influxdbServerAlive.Update(0)
					continue
				}
				if this.cli == nil {
					log.Error("influxdb.server connected got nil cli")
					influxdbServerAlive.Update(0)
					continue
				}
			}

			_, _, err = this.cli.Ping(time.Second * 4)
			if err != nil {
				log.Error("influxdb.server: %v", err)
				influxdbServerAlive.Update(0)
			} else {
				influxdbServerAlive.Update(1)
			}

		}
	}
}
Beispiel #24
0
func (this *WatchRedisInfo) updateRedisInfo(wg *sync.WaitGroup, host string, port int, tag string) {
	defer wg.Done()

	spec := redis.DefaultSpec().Host(host).Port(port)
	client, err := redis.NewSynchClientWithSpec(spec)
	if err != nil {
		log.Error("redis[%s:%d]: %v", host, port, err)
		atomic.AddInt64(&this.deadN, 1)
		return
	}
	defer client.Quit()

	infoMap, err := client.Info()
	if err != nil {
		log.Error("redis[%s:%d] info: %v", host, port, err)
		atomic.AddInt64(&this.deadN, 1)
		return
	}

	var keysN int64
	// db0:keys=15500,expires=15500,avg_ttl=27438570
	for key, value := range infoMap {
		if strings.HasPrefix(key, "db") && strings.Contains(value, "keys=") {
			keysN += extractKeysCount(value)
		}
	}

	conns, _ := strconv.ParseInt(infoMap["connected_clients"], 10, 64)
	blocked, _ := strconv.ParseInt(infoMap["blocked_clients"], 10, 64)
	mem, _ := strconv.ParseInt(infoMap["used_memory"], 10, 64)
	ops, _ := strconv.ParseInt(infoMap["instantaneous_ops_per_sec"], 10, 64)
	rejected, _ := strconv.ParseInt(infoMap["rejected_connections"], 10, 64)
	syncPartial, _ := strconv.ParseInt(infoMap["sync_partial_err"], 10, 64)
	rxKbps, _ := strconv.ParseFloat(infoMap["instantaneous_input_kbps"], 64)
	txKbps, _ := strconv.ParseFloat(infoMap["instantaneous_output_kbps"], 64)
	expiredKeys, _ := strconv.ParseInt(infoMap["expired_keys"], 10, 64)

	atomic.AddInt64(&this.syncPartialN, syncPartial)

	this.mu.Lock()
	this.keys[tag].Update(keysN)
	this.conns[tag].Update(conns)
	this.blocked[tag].Update(blocked)
	this.usedMem[tag].Update(mem)
	this.ops[tag].Update(ops)
	this.rejected[tag].Update(rejected)
	this.rxKbps[tag].Update(int64(rxKbps))
	this.txKbps[tag].Update(int64(txKbps))
	this.expiredKeys[tag].Update(expiredKeys)
	this.mu.Unlock()
}
Beispiel #25
0
func (this *WatchSlowlog) Run() {
	defer this.Wg.Done()

	ticker := time.NewTicker(this.Tick)
	defer ticker.Stop()

	this.slows = make(map[string]metrics.Gauge, 10)

	for {
		select {
		case <-this.Stop:
			log.Info("redis.slowlog stopped")
			return

		case <-ticker.C:
			var wg sync.WaitGroup

			for _, hostPort := range this.Zkzone.AllRedis() {
				host, port, err := net.SplitHostPort(hostPort)
				if err != nil {
					log.Error("invalid redis instance: %s", hostPort)
					continue
				}

				nport, err := strconv.Atoi(port)
				if err != nil || nport < 0 {
					log.Error("invalid redis instance: %s", hostPort)
					continue
				}

				var ip string
				ips, err := net.LookupIP(host) // host in ip form is also ok e,g. 10.1.1.1
				if err != nil {
					log.Error("redis host[%s] ip: %v", host, err)
				} else if len(ips) > 0 {
					ip = ips[0].String()
				}

				tag := telemetry.Tag(strings.Replace(host, ".", "_", -1), port, ip)

				wg.Add(1)
				go this.updateRedisSlowlog(&wg, host, nport, tag)
			}

			wg.Wait()
		}
	}
}
Beispiel #26
0
func (this *WatchExec) Run() {
	defer this.Wg.Done()

	if this.confDir == "" {
		log.Warn("empty confd, external.exec disabled")
		return
	}

	ticker := time.NewTicker(time.Minute)
	defer ticker.Stop()

	if err := this.watchConfigDir(); err != nil {
		log.Error("%v", err)
		return
	}

	for {
		select {
		case <-this.Stop:
			log.Info("external.exec stopped")
			return

		case <-ticker.C:

		}
	}
}
Beispiel #27
0
// append raw data to an existing item
func (this *FunServantImpl) CbAppend(ctx *rpc.Context, bucket string,
	key string, val []byte) (ex error) {
	const IDENT = "cb.append"
	if this.cb == nil {
		ex = ErrServantNotStarted
		return
	}

	profiler, err := this.getSession(ctx).startProfiler()
	if err != nil {
		ex = err
		return
	}

	svtStats.inc(IDENT)

	b, err := this.cb.GetBucket(bucket)
	if err != nil {
		ex = err
		return
	}

	ex = b.Append(key, val)
	if ex != nil {
		log.Error("Q=%s %s: %s %s", IDENT, ctx.String(), key, ex)
	}

	profiler.do(IDENT, ctx,
		"{b^%s k^%s v^%s}",
		bucket, key, string(val))

	return
}
Beispiel #28
0
func (this *FunServantImpl) CbAdd(ctx *rpc.Context, bucket string,
	key string, val []byte, expire int32) (r bool, ex error) {
	const IDENT = "cb.add"
	if this.cb == nil {
		ex = ErrServantNotStarted
		return
	}

	profiler, err := this.getSession(ctx).startProfiler()
	if err != nil {
		ex = err
		return
	}

	svtStats.inc(IDENT)

	b, err := this.cb.GetBucket(bucket)
	if err != nil {
		ex = err
		return
	}

	r, ex = b.AddRaw(key, int(expire), val)
	if ex != nil {
		log.Error("Q=%s %s: %s %s", IDENT, ctx.String(), key, ex)
	}

	profiler.do(IDENT, ctx,
		"{b^%s k^%s v^%s exp^%d} {r^%v}",
		bucket, key, string(val), expire, r)

	return
}
Beispiel #29
0
func (this *Peer) discoverPeers() {
	defer func() {
		this.c.Close() // leave the multicast group
	}()

	var msg peerMessage
	reader := bufio.NewReader(this.c)
	for {
		// net.ListenMulticastUDP sets IP_MULTICAST_LOOP=0 as
		// default, so you never receive your own sent data
		// if you run both sender and receiver on (logically) same IP host
		line, _, err := reader.ReadLine()
		if err != nil {
			log.Error(err)
			continue
		}

		if err := msg.unmarshal(line); err != nil {
			// Not our protocol, it may be SSDP or else
			continue
		}

		log.Debug("received peer: %+v", msg)

		neighborIp, present := msg["ip"]
		if !present {
			log.Info("Peer msg has no 'ip'")
			continue
		}

		this.refreshNeighbor(neighborIp.(string))
	}
}
Beispiel #30
0
func (this *mysqlStore) fetchTopicRecords(db *sql.DB) error {
	rows, err := db.Query("SELECT AppId,TopicName,Status FROM topics")
	if err != nil {
		return err
	}
	defer rows.Close()

	m := make(map[string]map[string]bool)
	var app appTopicRecord
	for rows.Next() {
		err = rows.Scan(&app.AppId, &app.TopicName, &app.Status)
		if err != nil {
			log.Error("mysql manager store: %v", err)
			continue
		}

		if _, present := m[app.AppId]; !present {
			m[app.AppId] = make(map[string]bool)
		}

		m[app.AppId][app.TopicName] = app.Status == "1"
	}

	this.appTopicsMap = m

	return nil
}