Пример #1
0
// Fetch will attempt to retreive a user from token cache
// If cacheHit == true and u == nil and err == nil then we KNOW they don't
// exist (and so we don't have to bother looking them up via login service)
func (c *memcacheCacher) Fetch(sessId string) (u *User, cacheHit bool, err error) {
	t := time.Now()
	u, hit, err := c.doFetch(sessId)
	instTiming("auth.cache.fetch", err, t)
	if hit {
		inst.Counter(1.0, "auth.cache.fetch.hit", 1)
	} else {
		inst.Counter(1.0, "auth.cache.fetch.miss", 1)
	}
	return u, hit, err
}
Пример #2
0
// Authorise tests auth
func (a *simpleAuthoriser) Authorise(req *Request) errors.Error {

	// If we require neither a role or a user, then there is no need to authorise
	if !a.requireUser && !a.requireRole {
		log.Debugf("Skipping auth from %s to %s, as neither user or role required", req.From(), req.Destination())
		return nil
	}

	// Otherwise, authorise this request
	scope := req.Auth()
	log.Tracef("Scope user: %v", scope.AuthUser())
	if a.requireUser && !scope.IsAuth() {
		return errors.Forbidden("com.hailocab.kernel.auth.notsignedin", fmt.Sprintf("Must be signed in to call this endpoint[endpoint=%s, service=%s, from=%s]",
			req.Endpoint(), req.Service(), req.From()), "201")
	}
	if a.requireRole {
		matchesRole := false
		for _, r := range a.roles {
			if scope.HasAccess(r) {
				matchesRole = true
				break
			}
		}
		if !matchesRole {
			if scope.HasTriedAuth() {
				return errors.Forbidden("com.hailocab.kernel.auth.badrole", fmt.Sprintf("Must be signed in to call this endpoint[endpoint=%s, service=%s, from=%s]",
					req.Endpoint(), req.Service(), req.From()), "201")
			}
			// Instrument when service to service auth fails
			inst.Counter(1.0, "auth.servicetoservice.failed", 1)
			return BadRoleError(req)
		}
	}
	return nil
}
Пример #3
0
// tokenConstrainedMiddleware limits the max concurrent requests handled per caller
func tokenConstrainedMiddleware(ep *Endpoint, h Handler) Handler {
	return func(req *Request) (proto.Message, errors.Error) {
		callerName := req.From()
		if callerName == "" {
			callerName = "unknown"
		}
		tokenBucketName := fmt.Sprintf("server.tokens.%s", callerName)
		reqsBucketName := fmt.Sprintf("server.inflightrequests.%s", callerName)
		tokC := tokensChan(callerName)

		select {
		case t := <-tokC:
			defer func() {
				atomic.AddUint64(&inFlightRequests, ^uint64(0)) // This is actually a subtraction
				tokC <- t                                       // Return the token to the pool
			}()

			nowInFlight := atomic.AddUint64(&inFlightRequests, 1) // Update active request counters
			inst.Gauge(1.0, tokenBucketName, len(tokC))
			inst.Gauge(1.0, reqsBucketName, int(nowInFlight))
			return h(req)
		case <-time.After(time.Duration(ep.Mean) * time.Millisecond):
			inst.Gauge(1.0, tokenBucketName, len(tokC))
			inst.Counter(1.0, "server.error.capacity", 1)

			return nil, errors.InternalServerError("com.hailocab.kernel.server.capacity",
				fmt.Sprintf("Server %v out of capacity", Name))
		}
	}
}
Пример #4
0
// instrumentedHandler wraps the handler to provide instrumentation
func instrumentedMiddleware(ep *Endpoint, h Handler) Handler {
	return func(req *Request) (rsp proto.Message, err errors.Error) {
		start := time.Now()
		// In a defer in case the handler panics
		defer func() {
			stats.Record(ep, err, time.Since(start))
			if err == nil {
				inst.Timing(1.0, "success."+ep.Name, time.Since(start))
				return
			}
			inst.Counter(1.0, fmt.Sprintf("server.error.%s", err.Code()), 1)
			switch err.Type() {
			case errors.ErrorBadRequest, errors.ErrorNotFound:
				// Ignore errors that are caused by clients
				// TODO: consider a new stat for clienterror?
				inst.Timing(1.0, "success."+ep.Name, time.Since(start))
				return
			default:
				inst.Timing(1.0, "error."+ep.Name, time.Since(start))
			}
		}()
		rsp, err = h(req)
		return rsp, err
	}
}
Пример #5
0
func newRegionLeader(lockNode string) *regionLeader {
	rl := &regionLeader{
		active:    true,
		lockNode:  lockNode,
		rescinded: make(chan struct{}),
	}
	// establish a watch to cleanup
	go func() {
		_, _, watch, err := zookeeper.GetW(rl.lockNode)
		if err != nil {
			rl.Rescind()
			return
		}
		<-watch
		log.Debugf("[Sync:RegionLeader] Watch triggered on '%v', will rescind leadership", rl.lockNode)
		inst.Counter(1.0, "sync.regionleader.remotely-rescinded")
		rl.Rescind()
		return
	}()

	// Register region leader
	mu.Lock()
	rls = append(rls, rl)
	mu.Unlock()

	return rl
}
Пример #6
0
// Rescind should be called to indicate you no longer wish to be the leader
func (rl *regionLeader) Rescind() {
	rl.cleanup.Do(func() {
		log.Debugf("[Sync:RegionLeader] Cleaning up leadership of '%v'...", rl.lockNode)
		close(rl.rescinded)
		// keep trying to delete the ZK node (to release leadership) until we're sure it doesn't exist
		for {
			err := zookeeper.Delete(rl.lockNode, -1)
			if err == nil || err == gozk.ErrNoNode {
				log.Debugf("[Sync:RegionLeader] Have deleted leadership node '%v'", rl.lockNode)
				inst.Counter(1.0, "sync.regionleader.rescinded")
				break
			}
			log.Warnf("[Sync:RegionLeader] Failed to cleanup/rescind leadership (will retry): %v", err)
			time.Sleep(cleanupDelay)
		}

		// Unregister region leader
		mu.Lock()
		for i := 0; i < len(rls); i++ {
			if rls[i] == rl {
				rls = append(rls[:i], rls[i+1:]...)
				break
			}
		}
		mu.Unlock()
	})
}
Пример #7
0
// Auth will pass the supplied details onto the login service in an attempt
// to authenticate a brand new session
func (s *realScope) Auth(mech, device string, creds map[string]string) error {
	t := time.Now()

	u, err := s.doAuth(mech, device, creds)

	instTiming("auth.auth", err, t)
	if s.IsAuth() {
		inst.Counter(0.01, "auth.authenticate.recovered", 1)
	} else {
		inst.Counter(1.0, "auth.authenticate.badCredentials", 1)
	}

	s.Lock()
	defer s.Unlock()

	s.authUser = u
	if err == nil || err == BadCredentialsError {
		s.triedAuth = true
	}

	return err
}
Пример #8
0
// RegionTimedLock attempts to achieve a regional lock on `id`, waiting for `waitFor` time in
// case of contention (before giving up) and reserving the lock for a maximum `holdFor`
// in the event of failing to Unlock()
func RegionTimedLock(id []byte, waitFor, holdFor time.Duration) (Lock, error) {
	// Ensure we are reaping
	once.Do(startReaper)

	if int64(holdFor) < int64(minRegionHoldFor) {
		return nil, ErrRegionHoldFor
	}

	lockId := string(id) // we use []byte for function signature compatibility with the global lock

	// Our locks are namespaced per service
	path, err := constructLockPath(regionLockNamespace, lockId)
	if err != nil {
		return nil, err
	}
	log.Tracef("[Sync:RegionTimedLock] Attempting to acquire '%s'; held for %v", path, holdFor)

	// Create new lock which we will be lock()ed
	lock := &regionLock{
		zkLock: zk.NewLock(path, gozk.WorldACL(gozk.PermAll)),
	}
	lock.zkLock.SetTTL(holdFor)
	lock.zkLock.SetTimeout(waitFor)
	// Acquire a lock
	startTime := time.Now()
	err = lock.zkLock.Lock()
	inst.Timing(1.0, "sync.regionlock.acquire", time.Since(startTime))
	defaultReaper.addPath(path) // only add path to reaper AFTER we've acquired the lock (or not)

	if err == nil {
		log.Tracef("[Sync:RegionTimedLock] Successfully acquired '%s'", path)
		inst.Counter(1.0, "sync.regionlock.acquire.success")
	} else {
		log.Errorf("[Sync:RegionTimedLock] Failed to acquire '%s': %s", path, err.Error())
		inst.Counter(1.0, "sync.regionlock.acquire.failure")
	}

	return lock, err
}
Пример #9
0
// RecoverSession will try to turn a sessId into a valid user/token, if possible
// error will be non-nil if something goes wrong during this process - if we
// can't find any valid user with this sessId that is *not* an error
// If there is an error, the current state of the scope *will not have been changed*
// If there is no error, then the state will be updated, either to the recovered
// user *or* to nil, if no user was recovered
func (s *realScope) RecoverSession(sessId string) error {
	t := time.Now()

	u, err := s.doRecoverSession(sessId)
	instTiming("auth.recoverSession", err, t)

	if s.IsAuth() {
		inst.Counter(0.01, "auth.recoverSession.recovered", 1)
	} else {
		inst.Counter(1.0, "auth.recoverSession.badCredentials", 1)
	}

	s.Lock()
	defer s.Unlock()

	s.authUser = u

	if err == nil {
		s.triedAuth = true
	}

	return err
}
Пример #10
0
// Query executes a query to fetch data from graphite - returning raw JSON-encoded bytes
// The supplied URL values will be augmented with hostname, port
func (conn *Connection) Query(path string, v url.Values) ([]byte, error) {
	hostname, scheme, port := conn.hostSchemePort()
	if port > 0 {
		hostname = fmt.Sprintf("%v:%v", hostname, port)
	}
	url := &url.URL{Scheme: scheme, Host: hostname, Path: path, RawQuery: v.Encode()}
	res, err := conn.doQuery(url)
	if err != nil {
		inst.Counter(0.1, "query.fail", 1)
		// try to unmangle transport -- this was to avoid an error we were having where
		// we have "failed to send on a closed socket" or somesuch
		// @imsnakes advises us that we should hope to fix this in the http client lib
		// However for now we'll just take this slightly heavy-handed approach
		conn.Lock()
		defer conn.Unlock()
		conn.transport.Close()
		conn.transport = conn.newTimeoutTransport()
		conn.client = &http.Client{Transport: conn.transport}
	} else {
		inst.Counter(0.1, "query.success", 1)
	}
	return res, err
}
Пример #11
0
// RegionLeader block indefinitely until this invocation has been elected the "leader" within the local operating region.
// It will then return a channel that will eventually be closed when leadership is rescinded.
func RegionLeader(id string) Leader {
	path := fmt.Sprintf(regionLeaderPath, id)
	prefix := path + "/lock-"
	var lockNode string

	for {
		// create our lock node -- retry until this is done, use exponential backoff
		// to add some delay between attempts
		b := backoff.NewExponentialBackOff()
		b.InitialInterval = backoffInitialInterval
		b.MaxInterval = backoffMaxInterval
		b.MaxElapsedTime = 0 // Never stop retrying

		backoff.RetryNotify(func() (err error) {
			log.Infof("[Sync:RegionLeader] Attepting to create ephemeral lock node for leadership election")
			lockNode, err = zookeeper.CreateProtectedEphemeralSequential(prefix, []byte{}, gozk.WorldACL(gozk.PermAll))

			return
		}, b, func(err error, d time.Duration) {
			if err == gozk.ErrNoNode {
				createParents(path)
			} else if err != nil {
				log.Warnf("[Sync:RegionLeader] ZooKeeper error creating ephemeral lock node for leadership election: %s. Waiting %s", err, d)
			}
		})

		err := waitForWinner(path, lockNode)
		if err != nil {
			// try to cleanup - then go again
			zookeeper.Delete(lockNode, -1)
			time.Sleep(time.Second)
			continue
		}

		// we are the leader
		break
	}

	log.Infof("[Sync:RegionLeader] Elected leader of '%v'", id)
	inst.Counter(1.0, "sync.regionleader.elected")

	return newRegionLeader(lockNode)
}
Пример #12
0
// doReq sends a request, with timeout options and retries, waits for response and returns it
func (c *client) doReq(req *Request, options ...Options) (*Response, errors.Error) {

	if circuitbreaker.Open(req.service, req.endpoint) {
		inst.Counter(1.0, fmt.Sprintf("client.error.%s.%s.circuitbroken", req.service, req.endpoint), 1)
		log.Warnf("Broken Circuit for %s.%s", req.service, req.endpoint)
		return nil, errors.CircuitBroken("com.hailocab.kernel.platform.circuitbreaker", "Circuit is open")
	}

	retries := c.defaults["retries"].(int)
	var timeout time.Duration
	timeoutSupplied := false
	if len(options) == 1 {
		if _, ok := options[0]["retries"]; ok {
			retries = options[0]["retries"].(int)
		}
		if _, ok := options[0]["timeout"]; ok {
			timeout = options[0]["timeout"].(time.Duration)
			timeoutSupplied = true
		}
	}

	// setup the response channel
	rc := make(chan *Response, retries)
	c.responses.add(req, rc)
	defer c.responses.removeByRequest(req)

	instPrefix := fmt.Sprintf("client.%s.%s", req.service, req.endpoint)
	tAllRetries := time.Now()

	for i := 1; i <= retries+1; i++ {
		t := time.Now()

		c.RLock()
		con := c.listening
		c.RUnlock()
		if !con {
			log.Debug("[Client] not yet listening, establishing now...")
			ch := make(chan bool)
			go c.listen(ch)
			if online := <-ch; !online {
				log.Error("[Client] Listener failed")
				inst.Timing(1.0, fmt.Sprintf("%s.error", instPrefix), time.Since(t))
				inst.Counter(1.0, "client.error.com.hailocab.kernel.platform.client.listenfail", 1)
				return nil, errors.InternalServerError("com.hailocab.kernel.platform.client.listenfail", "Listener failed")
			}

			log.Info("[Client] Listener online")
		}

		// figure out what timeout to use
		if !timeoutSupplied {
			timeout = c.timeout.Get(req.service, req.endpoint, i)
		}
		log.Tracef("[Client] Sync request attempt %d for %s using timeout %v", i, req.MessageID(), timeout)

		// only bother sending the request if we are listening, otherwise allow to timeout
		if err := raven.SendRequest(req, c.instanceID); err != nil {
			log.Errorf("[Client] Failed to send request: %v", err)
		}

		select {
		case payload := <-rc:
			if payload.IsError() {
				inst.Timing(1.0, fmt.Sprintf("%s.error", instPrefix), time.Since(t))

				errorProto := &pe.PlatformError{}
				if err := payload.Unmarshal(errorProto); err != nil {
					inst.Counter(1.0, "client.error.com.hailocab.kernel.platform.badresponse", 1)
					return nil, errors.BadResponse("com.hailocab.kernel.platform.badresponse", err.Error())
				}

				err := errors.FromProtobuf(errorProto)
				inst.Counter(1.0, fmt.Sprintf("client.error.%s", err.Code()), 1)

				circuitbreaker.Result(req.service, req.endpoint, err)

				return nil, err
			}

			inst.Timing(1.0, fmt.Sprintf("%s.success", instPrefix), time.Since(t))
			circuitbreaker.Result(req.service, req.endpoint, nil)
			return payload, nil
		case <-time.After(timeout):
			// timeout
			log.Errorf("[Client] Timeout talking to %s.%s after %v for %s", req.Service(), req.Endpoint(), timeout, req.MessageID())
			inst.Timing(1.0, fmt.Sprintf("%s.error", instPrefix), time.Since(t))
			c.traceAttemptTimeout(req, i, timeout)

			circuitbreaker.Result(req.service, req.endpoint, errors.Timeout("com.hailocab.kernel.platform.timeout",
				fmt.Sprintf("Request timed out talking to %s.%s from %s (most recent timeout %v)", req.Service(), req.Endpoint(), req.From(), timeout),
				req.Service(),
				req.Endpoint()))
		}
	}

	inst.Timing(1.0, fmt.Sprintf("%s.error.timedOut", instPrefix), time.Since(tAllRetries))
	inst.Counter(1.0, "client.error.com.hailocab.kernel.platform.timeout", 1)

	return nil, errors.Timeout(
		"com.hailocab.kernel.platform.timeout",
		fmt.Sprintf("Request timed out talking to %s.%s from %s (most recent timeout %v)", req.Service(), req.Endpoint(), req.From(), timeout),
		req.Service(),
		req.Endpoint(),
	)
}
Пример #13
0
// GlobalTimedLock attempts to achieve a global lock on `id`, waiting for `waitFor` time in
// case of contention (before giving up) and reserving the lock for a maximum `holdFor`
// in the event of failure
// NOTE: locks can and will be held for longer than `holdFor`, but in the case of failure
// (eg: binary crashes) then this is the maximum amount of time other programs will hang
// around contending for the now defunkt lock
func GlobalTimedLock(id []byte, waitFor, holdFor time.Duration) (Lock, error) {
	if int64(holdFor) < int64(minHoldFor) {
		return nil, ErrHoldFor
	}

	u, err := gossie.NewTimeUUID()
	if err != nil {
		log.Warnf("[Sync:GlobalLock] Failed to generate time UUID: %v", err)
		return nil, ErrGenUuid
	}
	l := &globalLock{
		id:     id,
		lockId: u,
		exit:   make(chan struct{}),
	}

	// make my node in C*
	pool, err := cassandra.ConnectionPool(keyspace)
	if err != nil {
		return nil, fmt.Errorf("Error locking due to C*: %v", err)
	}
	writer := pool.Writer()
	writer.ConsistencyLevel(gossie.CONSISTENCY_QUORUM).Insert(cfGlobalLock, &gossie.Row{
		Key: l.id,
		Columns: []*gossie.Column{
			{
				Name:  []byte(l.lockId[:]),
				Value: []byte{}, // @todo could inject some data about who has the lock here
				Ttl:   durationToSeconds(holdFor, 1.0),
			},
		},
	})
	startTime := time.Now()
	err = writer.Run()
	if err != nil {
		inst.Timing(1.0, "sync.globaltimedlock.acquire", time.Since(startTime))
		inst.Counter(1.0, "sync.globaltimedlock.acquire.failure")
		return nil, err
	}

	// read all back and ensure i'm the lowest
	reader := pool.Reader().ConsistencyLevel(gossie.CONSISTENCY_QUORUM).Cf(cfGlobalLock)
	attempts := 0
	errs := multierror.New()
	start := time.Now()
	for {
		// break out if we've waited too long
		if attempts > 0 {
			if time.Now().After(start.Add(waitFor)) {
				inst.Timing(1.0, "sync.globaltimedlock.acquire", time.Since(startTime))
				inst.Counter(1.0, "sync.globaltimedlock.acquire.failure")
				l.Unlock()
				return nil, ErrContended
			}
			// delay a bit to avoid hammering C*
			time.Sleep(addJitter(delayFor))
		}

		attempts++

		row, err := reader.Get(l.id)
		if err != nil {
			errs.Add(fmt.Errorf("C* read back error: %v", err))
			continue
		}
		if row == nil || len(row.Columns) == 0 {
			errs.Add(fmt.Errorf("C* read back error: no columns returned from query"))
			continue
		}

		col := row.Columns[0]
		if bytes.Equal(col.Name, []byte(l.lockId[:])) {
			// we have the lock
			break
		}
	}

	inst.Timing(1.0, "sync.globaltimedlock.acquire", time.Since(startTime))
	inst.Counter(1.0, "sync.globaltimedlock.acquire.success")

	// put in place the refresher loop @todo
	go func() {
		for {
			log.Debug("[Sync:GlobalLock] Doing refresher loop…")
			refresh := time.Duration(float64(holdFor) * 0.75)
			select {
			case <-l.exit:
				log.Debugf("[Sync:GlobalLock] Breaking out of refresher loop")
				return
			case <-time.After(refresh):
				log.Debugf("[Sync:GlobalLock] Refreshing %s [%s]", string(l.id), l.lockId.String())
				writer.ConsistencyLevel(gossie.CONSISTENCY_QUORUM).Insert(cfGlobalLock, &gossie.Row{
					Key: l.id,
					Columns: []*gossie.Column{{
						Name:  []byte(l.lockId[:]),
						Value: []byte{},                        // @todo could inject some data about who has the lock here
						Ttl:   durationToSeconds(holdFor, 1.5), // 1.5 is because we renew the lock earlier than the timeout, so we need to cover that extra bit
					}},
				})
				if err := writer.Run(); err != nil {
					// @todo we could inform clients of this, somehow, eg: via a channel
					log.Warnf("[Sync:GlobalLock] failed to refresh lock .. cannot guarantee exclusivity")
				}
			}
		}
	}()

	return l, nil
}
Пример #14
0
// HandleRequest and send back response
func HandleRequest(req *Request) {
	defer func() {
		if r := recover(); r != nil {
			log.Criticalf("[Server] Panic \"%v\" when handling request: (id: %s, endpoint: %s, content-type: %s,"+
				" content-length: %d)", r, req.MessageID(), req.Destination(), req.delivery.ContentType,
				len(req.delivery.Body))
			inst.Counter(1.0, "runtime.panic", 1)
			publishFailure(r)
			debug.PrintStack()
		}
	}()

	if len(req.Service()) > 0 && req.Service() != Name {
		log.Criticalf(`[Server] Message meant for "%s" not "%s"`, req.Service(), Name)
		return
	}

reqProcessor:
	switch {
	case req.isHeartbeat():
		if dsc.IsConnected() {
			log.Tracef("[Server] Inbound heartbeat from: %s", req.ReplyTo())
			dsc.hb.beat()
			raven.SendResponse(PongResponse(req), InstanceID)
		} else {
			log.Warnf("[Server] Not connected but heartbeat from: %s", req.ReplyTo())
		}

	case req.IsPublication():
		log.Tracef("[Server] Inbound publication on topic: %s", req.Topic())

		if endpoint, ok := reg.find(req.Topic()); ok { // Match + call handler
			if data, err := endpoint.unmarshalRequest(req); err != nil {
				log.Warnf("[Server] Failed to unmarshal published message: %s", err.Error())
				break reqProcessor
			} else {
				req.unmarshaledData = data
			}

			if _, err := endpoint.Handler(req); err != nil {
				// don't do anything on error apart from log - it's a pub sub call so no response required
				log.Warnf("[Server] Failed to process published message: %v", err)
			}
		}

	default:
		log.Tracef("[Server] Inbound message %s from %s", req.MessageID(), req.ReplyTo())

		// Match a handler
		endpoint, ok := reg.find(req.Endpoint())
		if !ok {
			if rsp, err := ErrorResponse(req, errors.InternalServerError("com.hailocab.kernel.handler.missing", fmt.Sprintf("No handler registered for %s", req.Destination()))); err != nil {
				log.Criticalf("[Server] Unable to build response: %v", err)
			} else {
				raven.SendResponse(rsp, InstanceID)
			}
			return
		}

		// Unmarshal the request data
		var (
			reqData, rspData proto.Message
			err              errors.Error
		)
		if reqData, err = endpoint.unmarshalRequest(req); err == nil {
			req.unmarshaledData = reqData
		}

		// Call handler if no errors so far
		if err == nil {
			rspData, err = endpoint.Handler(req)
		}

		// Check response type matches what's registered
		if err == nil && rspData != nil {
			_, rspProtoT := endpoint.ProtoTypes()
			rspDataT := reflect.TypeOf(rspData)
			if rspProtoT != nil && rspProtoT != rspDataT {
				err = errors.InternalServerError("com.hailocab.kernel.server.mismatchedprotocol",
					fmt.Sprintf("Mismatched response protocol. %s != %s", rspDataT.String(), rspProtoT.String()))
			}
		}

		if err != nil {
			switch err.Type() {
			case errors.ErrorBadRequest, errors.ErrorForbidden, errors.ErrorNotFound:
				log.Debugf("[Server] Handler error %s calling %v.%v from %v: %v", err.Type(), req.Service(),
					req.Endpoint(), req.From(), err)
			case errors.ErrorInternalServer:
				go publishError(req, err)
				fallthrough
			default:
				log.Errorf("[Server] Handler error %s calling %v.%v from %v: %v", err.Type(), req.Service(),
					req.Endpoint(), req.From(), err)
			}

			if rsp, err := ErrorResponse(req, err); err != nil {
				log.Criticalf("[Server] Unable to build response: %v", err)
			} else {
				raven.SendResponse(rsp, InstanceID)
			}

			return
		}

		if rsp, err := ReplyResponse(req, rspData); err != nil {
			if rsp, err2 := ErrorResponse(req, errors.InternalServerError("com.hailocab.kernel.marshal.error", fmt.Sprintf("Could not marshal response %v", err))); err2 != nil {
				log.Criticalf("[Server] Unable to build error response: %v", err2)
			} else { // Send the error response
				raven.SendResponse(rsp, InstanceID)
			}
		} else { // Send the succesful response
			raven.SendResponse(rsp, InstanceID)
		}
	}
}
Пример #15
0
// Init is a local init call that handles setup
func Init() {
	// Parse flags and handle them. No other code should be calling flag.Parse()
	handleFlags()

	if len(Name) == 0 {
		log.Critical("No service name found")
		cleanupLogs()
		os.Exit(1)
	}

	// GO!
	log.Infof("Starting up %v (%v)\n%v\n", Name, Version, banner.PrintS(Name))

	rand.Seed(time.Now().UnixNano())
	tokens = make(map[string]chan bool, 100)

	servicesConfigFile := configDir + "/" + Name + "-seelog.xml"
	if _, err := os.Stat(servicesConfigFile); err == nil {
		loadLogConfig(servicesConfigFile)
	}

	// Try and open the access log file
	if accessLogDir := os.Getenv(accessLogEnv); accessLogDir != "" {
		var err error
		accessLogFilename := filepath.Join(accessLogDir, Name+"-access.log")
		commonLogger, err = os.OpenFile(accessLogFilename, os.O_APPEND|os.O_WRONLY|os.O_CREATE, 0666)
		if err != nil {
			log.Errorf("[Server] Error opening access log file: %v", err)
			commonLogger = nil
		}
	} else {
		log.Errorf("[Server] No access log env (%v) set", accessLogEnv)
	}

	log.Infof("[Server] Running with %d cores", runtime.GOMAXPROCS(0))
	uuid, _ := uuid.NewV4()
	InstanceID = fmt.Sprintf("server-%s-%s", Name, uuid)

	// Configure key service layer components, loading config from the config service automatically
	config.LoadFromService(Name)
	inst.SetNamespace(Name)
	ssync.SetRegionLockNamespace(Name)

	// Register region leader cleanup function
	RegisterCleanupHandler(ssync.CleanupRegionLeaders)

	// Ping graphite that we have started
	inst.Counter(1.0, "runtime.started", 1)

	// Connect the raven and keep checking its status
	ch := raven.Connect()
	if online := <-ch; !online {
		log.Warn("[Server] Failed to connect the raven on first attempt")
	}
	go monitorRaven(ch)

	// Create a new registry for the endpoints
	reg = newRegistry()

	// Add default middleware
	registerMiddleware(authMiddleware)
	registerMiddleware(tracingMiddleware)
	registerMiddleware(instrumentedMiddleware)
	registerMiddleware(tokenConstrainedMiddleware)
	registerMiddleware(waitGroupMiddleware)
	registerMiddleware(commonLoggerMiddleware(commonLogger))

	// Add default endpoints
	registerEndpoint(&Endpoint{
		Name:             "health",
		Mean:             100,
		Upper95:          200,
		Handler:          healthHandler,
		RequestProtocol:  new(healthproto.Request),
		ResponseProtocol: new(healthproto.Response),
	})
	registerEndpoint(&Endpoint{
		Name:             "stats",
		Mean:             100,
		Upper95:          200,
		Handler:          statsHandler,
		RequestProtocol:  new(statsproto.Request),
		ResponseProtocol: new(statsproto.PlatformStats),
	})
	registerEndpoint(&Endpoint{
		Name:             "loadedconfig",
		Mean:             100,
		Upper95:          200,
		Handler:          loadedConfigHandler,
		RequestProtocol:  new(loadedconfigproto.Request),
		ResponseProtocol: new(loadedconfigproto.Response),
	})
	registerEndpoint(&Endpoint{
		Name:             "jsonschema",
		Mean:             100,
		Upper95:          200,
		Handler:          jsonschemaHandler,
		RequestProtocol:  new(jsonschemaproto.Request),
		ResponseProtocol: new(jsonschemaproto.Response),
		Authoriser:       OpenToTheWorldAuthoriser(),
	})

	// Initialise platform healthchecks
	initHealthChecks()
	initialised = true
}