func (manager *StateManager) Start() error { corelog.LogInfoMessage("starting manager") manager.Lock() defer manager.Unlock() var err error manager.currentReplicaSetState, err = manager.generateReplicaSetState() if err != nil { return err return errors.New(fmt.Sprintf("error starting statemanager, replicaset in flux: %v", err)) } healthyAddrs := manager.currentReplicaSetState.Addrs() // Ensure we have at least one health address. if len(healthyAddrs) == 0 { return stackerr.Newf("no healthy primaries or secondaries: %s", manager.replicaSet.Addrs) } manager.addProxies(healthyAddrs...) for _, proxy := range manager.proxies { go manager.startProxy(proxy) } manager.refreshTime = time.Now() return nil }
func (manager *StateManager) removeProxy(proxy *Proxy) { if _, ok := manager.proxyToReal[proxy.ProxyAddr]; !ok { corelog.LogErrorMessage(fmt.Sprintf("proxy %s does not exist in ReplicaSet", proxy.ProxyAddr)) } if _, ok := manager.realToProxy[proxy.MongoAddr]; !ok { corelog.LogErrorMessage(fmt.Sprintf("mongo %s does not exist in ReplicaSet", proxy.ProxyAddr)) } corelog.LogInfoMessage(fmt.Sprintf("removed %s", proxy)) delete(manager.proxyToReal, proxy.ProxyAddr) delete(manager.realToProxy, proxy.MongoAddr) delete(manager.proxies, proxy.ProxyAddr) }
func (manager *StateManager) addProxy(proxy *Proxy) (*Proxy, error) { if _, ok := manager.proxyToReal[proxy.ProxyAddr]; ok { return nil, fmt.Errorf("proxy %s already used in ReplicaSet", proxy.ProxyAddr) } if _, ok := manager.realToProxy[proxy.MongoAddr]; ok { return nil, fmt.Errorf("mongo %s already exists in ReplicaSet", proxy.MongoAddr) } corelog.LogInfoMessage(fmt.Sprintf("added %s", proxy)) manager.proxyToReal[proxy.ProxyAddr] = proxy.MongoAddr manager.realToProxy[proxy.MongoAddr] = proxy.ProxyAddr manager.proxies[proxy.ProxyAddr] = proxy return proxy, nil }
// proxyMessage proxies a message, possibly it's response, and possibly a // follow up call. func (p *Proxy) proxyMessage( h *messageHeader, client net.Conn, server net.Conn, lastError *LastError, ) error { deadline := time.Now().Add(p.ReplicaSet.MessageTimeout) server.SetDeadline(deadline) client.SetDeadline(deadline) // OpQuery may need to be transformed and need special handling in order to // make the proxy transparent. if h.OpCode == OpQuery { stats.BumpSum(p.stats, "message.with.response", 1) return p.ReplicaSet.ProxyQuery.Proxy(h, client, server, lastError) } // Anything besides a getlasterror call (which requires an OpQuery) resets // the lastError. if lastError.Exists() { corelog.LogInfoMessage("reset getLastError cache") lastError.Reset() } // For other Ops we proxy the header & raw body over. if err := h.WriteTo(server); err != nil { corelog.LogError("error", err) return err } if _, err := io.CopyN(server, client, int64(h.MessageLength-headerLen)); err != nil { corelog.LogError("error", err) return err } // For Ops with responses we proxy the raw response message over. if h.OpCode.HasResponse() { stats.BumpSum(p.stats, "message.with.response", 1) if err := copyMessage(client, server); err != nil { corelog.LogError("error", err) return err } } return nil }
// Proxy proxies an OpQuery and a corresponding response. func (p *ProxyQuery) Proxy( h *messageHeader, client io.ReadWriter, server io.ReadWriter, lastError *LastError, ) error { // https://github.com/mongodb/mongo/search?q=lastError.disableForCommand // Shows the logic we need to be in sync with. Unfortunately it isn't a // simple check to determine this, and may change underneath us at the mongo // layer. resetLastError := true parts := [][]byte{h.ToWire()} var flags [4]byte if _, err := io.ReadFull(client, flags[:]); err != nil { corelog.LogError("error", err) return err } parts = append(parts, flags[:]) fullCollectionName, err := readCString(client) if err != nil { corelog.LogError("error", err) return err } parts = append(parts, fullCollectionName) var rewriter responseRewriter if *proxyAllQueries || bytes.HasSuffix(fullCollectionName, cmdCollectionSuffix) { var twoInt32 [8]byte if _, err := io.ReadFull(client, twoInt32[:]); err != nil { corelog.LogError("error", err) return err } parts = append(parts, twoInt32[:]) queryDoc, err := readDocument(client) if err != nil { corelog.LogError("error", err) return err } parts = append(parts, queryDoc) var q bson.D if err := bson.Unmarshal(queryDoc, &q); err != nil { corelog.LogError("error", err) return err } if hasKey(q, "getLastError") { return p.GetLastErrorRewriter.Rewrite( h, parts, client, server, lastError, ) } if hasKey(q, "isMaster") { rewriter = p.IsMasterResponseRewriter } if bytes.Equal(adminCollectionName, fullCollectionName) && hasKey(q, "replSetGetStatus") { rewriter = p.ReplSetGetStatusResponseRewriter } if rewriter != nil { // If forShell is specified, we don't want to reset the last error. See // comment above around resetLastError for details. resetLastError = hasKey(q, "forShell") } } if resetLastError && lastError.Exists() { corelog.LogInfoMessage("reset getLastError cache") lastError.Reset() } var written int for _, b := range parts { n, err := server.Write(b) if err != nil { corelog.LogError("error", err) return err } written += n } pending := int64(h.MessageLength) - int64(written) if _, err := io.CopyN(server, client, pending); err != nil { corelog.LogError("error", err) return err } if rewriter != nil { if err := rewriter.Rewrite(client, server); err != nil { return err } return nil } if err := copyMessage(client, server); err != nil { corelog.LogError("error", err) return err } return nil }
// Rewrite handles getLastError requests. func (r *GetLastErrorRewriter) Rewrite( h *messageHeader, parts [][]byte, client io.ReadWriter, server io.ReadWriter, lastError *LastError, ) error { if !lastError.Exists() { // We're going to be performing a real getLastError query and caching the // response. var written int for _, b := range parts { n, err := server.Write(b) if err != nil { corelog.LogError("error", err) return err } written += n } pending := int64(h.MessageLength) - int64(written) if _, err := io.CopyN(server, client, pending); err != nil { corelog.LogError("error", err) return err } var err error if lastError.header, err = readHeader(server); err != nil { corelog.LogError("error", err) return err } pending = int64(lastError.header.MessageLength - headerLen) if _, err = io.CopyN(&lastError.rest, server, pending); err != nil { corelog.LogError("error", err) return err } corelog.LogInfoMessage(fmt.Sprintf("caching new getLastError response: %s", lastError.rest.Bytes())) } else { // We need to discard the pending bytes from the client from the query // before we send it our cached response. var written int for _, b := range parts { written += len(b) } pending := int64(h.MessageLength) - int64(written) if _, err := io.CopyN(ioutil.Discard, client, pending); err != nil { corelog.LogError("error", err) return err } // Modify and send the cached response for this request. lastError.header.ResponseTo = h.RequestID corelog.LogInfoMessage("using cached getLastError response: %s", lastError.rest.Bytes()) } if err := lastError.header.WriteTo(client); err != nil { corelog.LogError("error", err) return err } if _, err := client.Write(lastError.rest.Bytes()); err != nil { corelog.LogError("error", err) return err } return nil }
func (l *Logger) Debugf(f string, args ...interface{}) { corelog.LogInfoMessage(fmt.Sprintf(f, args...)) }
func Main() error { addrs := flag.String("addrs", "localhost:27017", "comma separated list of mongo addresses") clientIdleTimeout := flag.Duration("client_idle_timeout", 60*time.Minute, "idle timeout for client connections") getLastErrorTimeout := flag.Duration("get_last_error_timeout", time.Minute, "timeout for getLastError pinning") listenAddr := flag.String("listen", "127.0.0.1", "address for listening, for example, 127.0.0.1 for reachable only from the same machine, or 0.0.0.0 for reachable from other machines") maxConnections := flag.Uint("max_connections", 100, "maximum number of connections per mongo") maxPerClientConnections := flag.Uint("max_per_client_connections", 100, "maximum number of connections from a single client") messageTimeout := flag.Duration("message_timeout", 2*time.Minute, "timeout for one message to be proxied") password := flag.String("password", "", "mongodb password") portEnd := flag.Int("port_end", 6010, "end of port range") portStart := flag.Int("port_start", 6000, "start of port range") serverClosePoolSize := flag.Uint("server_close_pool_size", 1, "number of goroutines that will handle closing server connections.") serverIdleTimeout := flag.Duration("server_idle_timeout", 60*time.Minute, "duration after which a server connection will be considered idle") username := flag.String("username", "", "mongo db username") metricsAddress := flag.String("metrics", "127.0.0.1:8125", "UDP address to send metrics to datadog, default is 127.0.0.1:8125") replicaName := flag.String("replica_name", "", "Replica name, used in metrics and logging, default is empty") replicaSetName := flag.String("replica_set_name", "", "Replica set name, used to filter hosts runnning other replica sets") healthCheckInterval := flag.Duration("healthcheckinterval", 5*time.Second, "How often to run the health check") failedHealthCheckThreshold := flag.Uint("failedhealthcheckthreshold", 3, "How many failed checks before a restart") flag.Parse() statsClient := NewDataDogStatsDClient(*metricsAddress, "replica:"+*replicaName) replicaSet := dvara.ReplicaSet{ Addrs: *addrs, ClientIdleTimeout: *clientIdleTimeout, GetLastErrorTimeout: *getLastErrorTimeout, ListenAddr: *listenAddr, MaxConnections: *maxConnections, MaxPerClientConnections: *maxPerClientConnections, MessageTimeout: *messageTimeout, Password: *password, PortEnd: *portEnd, PortStart: *portStart, ServerClosePoolSize: *serverClosePoolSize, ServerIdleTimeout: *serverIdleTimeout, Username: *username, Name: *replicaSetName, } stateManager := dvara.NewStateManager(&replicaSet) // Actual logger corelog.SetupLogFmtLoggerTo(os.Stderr) corelog.SetStandardFields("replicaset", *replicaName) corelog.UseTimestamp(true) // Log command line args startupOptions := []interface{}{} flag.CommandLine.VisitAll(func(flag *flag.Flag) { if flag.Name != "password" { startupOptions = append(startupOptions, flag.Name, flag.Value.String()) } }) corelog.LogInfoMessage("starting with command line arguments", startupOptions...) // Wrapper for inject log := Logger{} var graph inject.Graph err := graph.Provide( &inject.Object{Value: &replicaSet}, &inject.Object{Value: &statsClient}, &inject.Object{Value: stateManager}, ) if err != nil { return err } if err := graph.Populate(); err != nil { return err } objects := graph.Objects() hc := &dvara.HealthChecker{ HealthCheckInterval: *healthCheckInterval, FailedHealthCheckThreshold: *failedHealthCheckThreshold, } if err := startstop.Start(objects, &log); err != nil { return err } defer startstop.Stop(objects, &log) syncChan := make(chan struct{}) go stateManager.KeepSynchronized(syncChan) go hc.HealthCheck(&replicaSet, syncChan) ch := make(chan os.Signal, 2) signal.Notify(ch, syscall.SIGTERM, syscall.SIGINT) <-ch signal.Stop(ch) return nil }