Пример #1
0
func TestReportStats(t *testing.T) {
	md, nr := startWithMockReporter()
	defer md.Stop()
	var remoteAddr atomic.Value

	// start server with byte counting
	l, err := net.Listen("tcp", "127.0.0.1:0")
	if !assert.NoError(t, err, "Listen should not fail") {
		return
	}

	// large enough interval so it will only report stats in Close()
	ml := md.Listener(l, 10*time.Second)
	s := http.Server{
		Handler: http.NotFoundHandler(),
		ConnState: func(c net.Conn, s http.ConnState) {
			if s == http.StateClosed {
				remoteAddr.Store(c.RemoteAddr().String())
			}
		},
	}
	go func() { _ = s.Serve(ml) }()

	time.Sleep(100 * time.Millisecond)
	// start client with byte counting
	c := http.Client{
		Transport: &http.Transport{
			// carefully chosen interval to report another once before Close()
			Dial: md.Dialer(net.Dial, 160*time.Millisecond),
		},
	}
	req, _ := http.NewRequest("GET", "http://"+l.Addr().String(), nil)
	resp, _ := c.Do(req)
	assert.Equal(t, 404, resp.StatusCode)

	// Close without reading from body, to force server to close connection
	_ = resp.Body.Close()
	time.Sleep(100 * time.Millisecond)
	nr.Lock()
	defer nr.Unlock()
	t.Logf("Traffic entries: %+v", nr.traffic)
	if assert.Equal(t, 2, len(nr.traffic)) {
		ct := nr.traffic[l.Addr().String()]
		st := nr.traffic[remoteAddr.Load().(string)]

		if assert.NotNil(t, ct) {
			assert.Equal(t, 0, int(ct.MinOut), "client stats should only report increased byte count")
			assert.Equal(t, 0, int(ct.MinIn), "client stats should only report increased byte count")
			assert.Equal(t, 96, int(ct.MaxOut), "client stats should only report increased byte count")
			assert.Equal(t, 176, int(ct.MaxIn), "client stats should only report increased byte count")
			assert.Equal(t, 96, int(ct.TotalOut), "client stats should only report increased byte count")
			assert.Equal(t, 176, int(ct.TotalIn), "client stats should only report increased byte count")
		}

		if assert.NotNil(t, st) {
			assert.Equal(t, ct.TotalOut, st.TotalIn, "should report server stats with bytes in")
			assert.Equal(t, ct.TotalIn, st.TotalOut, "should report server stats with bytes out")
		}
	}
}
Пример #2
0
func LeaderReqFn(name, port string) ReqFn {
	events := make(chan *discoverd.Event)
	if _, err := discoverd.NewService(name).Watch(events); err != nil {
		log.Fatalf("error creating %s cache: %s", name, err)
	}
	var leader atomic.Value // addr string
	leader.Store("")
	go func() {
		for e := range events {
			if e.Kind != discoverd.EventKindLeader || e.Instance == nil {
				continue
			}
			leader.Store(e.Instance.Addr)
		}
	}()
	return func() (*http.Request, error) {
		addr := leader.Load().(string)
		if addr == "" {
			return nil, errors.New("no leader")
		}
		if port != "" {
			host, _, _ := net.SplitHostPort(addr)
			addr = net.JoinHostPort(host, port)
		}
		return http.NewRequest("GET", fmt.Sprintf("http://%s%s", addr, status.Path), nil)
	}
}
Пример #3
0
func (cmd *Start) TailStagingLogs(app models.Application, stopChan chan bool, startWait, doneWait *sync.WaitGroup) {
	var connectionStatus atomic.Value
	connectionStatus.Store(NoConnection)

	onConnect := func() {
		if connectionStatus.Load() != StoppedTrying {
			connectionStatus.Store(ConnectionWasEstablished)
			startWait.Done()
		}
	}

	timer := time.NewTimer(cmd.LogServerConnectionTimeout)

	c := make(chan logs.Loggable)
	e := make(chan error)

	defer doneWait.Done()

	go cmd.logRepo.TailLogsFor(app.GUID, onConnect, c, e)

	for {
		select {
		case <-timer.C:
			if connectionStatus.Load() == NoConnection {
				connectionStatus.Store(StoppedTrying)
				cmd.ui.Warn("timeout connecting to log server, no log will be shown")
				startWait.Done()
				return
			}
		case msg, ok := <-c:
			if !ok {
				return
			} else if msg.GetSourceName() == LogMessageTypeStaging {
				cmd.ui.Say(msg.ToSimpleLog())
			}

		case err, ok := <-e:
			if ok {
				if connectionStatus.Load() != ConnectionWasClosed {
					cmd.ui.Warn(T("Warning: error tailing logs"))
					cmd.ui.Say("%s", err)
					if connectionStatus.Load() == NoConnection {
						startWait.Done()
					}
					return
				}
			}

		case <-stopChan:
			if connectionStatus.Load() == ConnectionWasEstablished {
				connectionStatus.Store(ConnectionWasClosed)
				cmd.logRepo.Close()
			} else {
				return
			}
		}
	}
}
Пример #4
0
// TestStoreScanInconsistentResolvesIntents lays down 10 intents,
// commits the txn without resolving intents, then does repeated
// inconsistent reads until the data shows up, showing that the
// inconsistent reads are triggering intent resolution.
func TestStoreScanInconsistentResolvesIntents(t *testing.T) {
	defer leaktest.AfterTest(t)
	// This test relies on having a committed Txn record and open intents on
	// the same Range. This only works with auto-gc turned off; alternatively
	// the test could move to splitting its underlying Range.
	defer withoutTxnAutoGC()()
	var intercept atomic.Value
	intercept.Store(true)
	TestingCommandFilter = func(args proto.Request) error {
		if _, ok := args.(*proto.ResolveIntentRequest); ok && intercept.Load().(bool) {
			return util.Errorf("error on purpose")
		}
		return nil
	}
	store, _, stopper := createTestStore(t)
	defer func() { TestingCommandFilter = nil }()
	defer stopper.Stop()

	// Lay down 10 intents to scan over.
	txn := newTransaction("test", proto.Key("foo"), 1, proto.SERIALIZABLE, store.ctx.Clock)
	keys := []proto.Key{}
	for j := 0; j < 10; j++ {
		key := proto.Key(fmt.Sprintf("key%02d", j))
		keys = append(keys, key)
		args := putArgs(key, []byte(fmt.Sprintf("value%02d", j)), 1, store.StoreID())
		args.Txn = txn
		if _, err := store.ExecuteCmd(context.Background(), &args); err != nil {
			t.Fatal(err)
		}
	}

	// Now, commit txn without resolving intents. If we hadn't disabled auto-gc
	// of Txn entries in this test, the Txn entry would be removed and later
	// attempts to resolve the intents would fail.
	etArgs := endTxnArgs(txn, true, 1, store.StoreID())
	etArgs.Timestamp = txn.Timestamp
	if _, err := store.ExecuteCmd(context.Background(), &etArgs); err != nil {
		t.Fatal(err)
	}

	intercept.Store(false) // allow async intent resolution

	// Scan the range repeatedly until we've verified count.
	sArgs := scanArgs(keys[0], keys[9].Next(), 1, store.StoreID())
	sArgs.ReadConsistency = proto.INCONSISTENT
	util.SucceedsWithin(t, time.Second, func() error {
		if reply, err := store.ExecuteCmd(context.Background(), &sArgs); err != nil {
			return err
		} else if sReply := reply.(*proto.ScanResponse); len(sReply.Rows) != 10 {
			return util.Errorf("could not read rows as expected")
		}
		return nil
	})
}
Пример #5
0
func (db *ETCDDB) DesiredLRPs(logger lager.Logger, filter models.DesiredLRPFilter) (*models.DesiredLRPs, *models.Error) {
	root, bbsErr := db.fetchRecursiveRaw(logger, DesiredLRPSchemaRoot)
	if bbsErr.Equal(models.ErrResourceNotFound) {
		return &models.DesiredLRPs{}, nil
	}
	if bbsErr != nil {
		return nil, bbsErr
	}
	if root.Nodes.Len() == 0 {
		return &models.DesiredLRPs{}, nil
	}

	desiredLRPs := models.DesiredLRPs{}

	lrpsLock := sync.Mutex{}
	var workErr atomic.Value
	works := []func(){}

	for _, node := range root.Nodes {
		node := node

		works = append(works, func() {
			var lrp models.DesiredLRP
			deserializeErr := models.FromJSON([]byte(node.Value), &lrp)
			if deserializeErr != nil {
				logger.Error("failed-parsing-desired-lrp", deserializeErr)
				workErr.Store(fmt.Errorf("cannot parse lrp JSON for key %s: %s", node.Key, deserializeErr.Error()))
				return
			}

			if filter.Domain == "" || lrp.GetDomain() == filter.Domain {
				lrpsLock.Lock()
				desiredLRPs.DesiredLrps = append(desiredLRPs.DesiredLrps, &lrp)
				lrpsLock.Unlock()
			}
		})
	}

	throttler, err := workpool.NewThrottler(maxDesiredLRPGetterWorkPoolSize, works)
	if err != nil {
		logger.Error("failed-constructing-throttler", err, lager.Data{"max-workers": maxDesiredLRPGetterWorkPoolSize, "num-works": len(works)})
		return &models.DesiredLRPs{}, models.ErrUnknownError
	}

	logger.Debug("performing-deserialization-work")
	throttler.Work()
	if err, ok := workErr.Load().(error); ok {
		logger.Error("failed-performing-deserialization-work", err)
		return &models.DesiredLRPs{}, models.ErrUnknownError
	}
	logger.Debug("succeeded-performing-deserialization-work", lager.Data{"num-desired-lrps": len(desiredLRPs.GetDesiredLrps())})

	return &desiredLRPs, nil
}
Пример #6
0
// Test that leases held before a restart are not used after the restart.
// See replica.mu.minLeaseProposedTS for the reasons why this isn't allowed.
func TestLeaseNotUsedAfterRestart(t *testing.T) {
	defer leaktest.AfterTest(t)()
	sc := storage.TestStoreConfig(nil)
	var leaseAcquisitionTrap atomic.Value
	// Disable the split queue so that no ranges are split. This makes it easy
	// below to trap any lease request and infer that it refers to the range we're
	// interested in.
	sc.TestingKnobs.DisableSplitQueue = true
	sc.TestingKnobs.LeaseRequestEvent = func(ts hlc.Timestamp) {
		val := leaseAcquisitionTrap.Load()
		if val == nil {
			return
		}
		trapCallback := val.(func(ts hlc.Timestamp))
		if trapCallback != nil {
			trapCallback(ts)
		}
	}
	mtc := &multiTestContext{storeConfig: &sc}
	mtc.Start(t, 1)
	defer mtc.Stop()

	// Send a read, to acquire a lease.
	getArgs := getArgs([]byte("a"))
	if _, err := client.SendWrapped(context.Background(), rg1(mtc.stores[0]), &getArgs); err != nil {
		t.Fatal(err)
	}

	// Restart the mtc. Before we do that, we're installing a callback used to
	// assert that a new lease has been requested. The callback is installed
	// before the restart, as the lease might be requested at any time and for
	// many reasons by background processes, even before we send the read below.
	leaseAcquisitionCh := make(chan error)
	var once sync.Once
	leaseAcquisitionTrap.Store(func(_ hlc.Timestamp) {
		once.Do(func() {
			close(leaseAcquisitionCh)
		})
	})
	mtc.restart()

	// Send another read and check that the pre-existing lease has not been used.
	// Concretely, we check that a new lease is requested.
	if _, err := client.SendWrapped(context.Background(), rg1(mtc.stores[0]), &getArgs); err != nil {
		t.Fatal(err)
	}
	// Check that the Send above triggered a lease acquisition.
	select {
	case <-leaseAcquisitionCh:
	case <-time.After(time.Second):
		t.Fatalf("read did not acquire a new lease")
	}
}
Пример #7
0
func (db *ETCDDB) ActualLRPGroups(logger lager.Logger, filter models.ActualLRPFilter) ([]*models.ActualLRPGroup, error) {
	node, err := db.fetchRecursiveRaw(logger, ActualLRPSchemaRoot)
	bbsErr := models.ConvertError(err)
	if bbsErr != nil {
		if bbsErr.Type == models.Error_ResourceNotFound {
			return []*models.ActualLRPGroup{}, nil
		}
		return nil, err
	}
	if len(node.Nodes) == 0 {
		return []*models.ActualLRPGroup{}, nil
	}

	groups := []*models.ActualLRPGroup{}

	var workErr atomic.Value
	groupChan := make(chan []*models.ActualLRPGroup, len(node.Nodes))
	wg := sync.WaitGroup{}

	logger.Debug("performing-deserialization-work")
	for _, node := range node.Nodes {
		node := node

		wg.Add(1)
		go func() {
			defer wg.Done()
			g, err := db.parseActualLRPGroups(logger, node, filter)
			if err != nil {
				workErr.Store(err)
				return
			}
			groupChan <- g
		}()
	}

	go func() {
		wg.Wait()
		close(groupChan)
	}()

	for g := range groupChan {
		groups = append(groups, g...)
	}

	if err, ok := workErr.Load().(error); ok {
		logger.Error("failed-performing-deserialization-work", err)
		return []*models.ActualLRPGroup{}, models.ErrUnknownError
	}
	logger.Debug("succeeded-performing-deserialization-work", lager.Data{"num_actual_lrp_groups": len(groups)})

	return groups, nil
}
Пример #8
0
func (db *ETCDDB) ActualLRPGroups(logger lager.Logger, filter models.ActualLRPFilter) (*models.ActualLRPGroups, *models.Error) {
	node, bbsErr := db.fetchRecursiveRaw(logger, ActualLRPSchemaRoot)
	if bbsErr.Equal(models.ErrResourceNotFound) {
		return &models.ActualLRPGroups{}, nil
	}
	if bbsErr != nil {
		return nil, bbsErr
	}
	if node.Nodes.Len() == 0 {
		return &models.ActualLRPGroups{}, nil
	}

	groups := &models.ActualLRPGroups{}

	groupsLock := sync.Mutex{}
	var workErr atomic.Value
	works := []func(){}

	for _, node := range node.Nodes {
		node := node

		works = append(works, func() {
			g, err := parseActualLRPGroups(logger, node, filter)
			if err != nil {
				workErr.Store(err)
				return
			}
			groupsLock.Lock()
			groups.ActualLrpGroups = append(groups.ActualLrpGroups, g.ActualLrpGroups...)
			groupsLock.Unlock()
		})
	}

	throttler, err := workpool.NewThrottler(maxActualGroupGetterWorkPoolSize, works)
	if err != nil {
		logger.Error("failed-constructing-throttler", err, lager.Data{"max-workers": maxActualGroupGetterWorkPoolSize, "num-works": len(works)})
		return &models.ActualLRPGroups{}, models.ErrUnknownError
	}

	logger.Debug("performing-deserialization-work")
	throttler.Work()
	if err, ok := workErr.Load().(error); ok {
		logger.Error("failed-performing-deserialization-work", err)
		return &models.ActualLRPGroups{}, models.ErrUnknownError
	}
	logger.Debug("succeeded-performing-deserialization-work", lager.Data{"num-actual-lrp-groups": len(groups.ActualLrpGroups)})

	return groups, nil
}
Пример #9
0
func (tagger *Tagger) processDir(src, dst string) error {
	utils.Log(utils.INFO, "Start processing directory '%v'", src)

	allFiles := getAllFiles(src)
	tagger.counter.setTotal(len(allFiles))
	utils.Log(utils.INFO, "Found %v files", len(allFiles))

	var result atomic.Value
	var index int32 = -1
	var wg sync.WaitGroup

	for i := 0; i < numberOfThreads; i++ {
		wg.Add(1)
		go func() {
			defer wg.Done()
			for {
				if tagger.stop.Load().(bool) {
					utils.Log(utils.WARNING, "Processing directory '%v' interrupted by application stop", src)
					return
				}

				i := atomic.AddInt32(&index, 1)
				if i >= int32(len(allFiles)) {
					return
				}

				fmt.Printf("\rProcessing %v/%v", i, len(allFiles))
				destination, err := tagger.getDestinationPath(allFiles[i])
				if err != nil {
					result.Store(err)
					utils.Log(utils.ERROR, "Failed to get destination path: %v", err)
					continue
				}
				if err := tagger.processFile(allFiles[i], destination); err != nil {
					utils.Log(utils.ERROR, "Failed to process file '%v': %v", allFiles[i], err)
					result.Store(err)
				}
			}
		}()
	}
	wg.Wait()
	fmt.Printf("\r                        \r")

	if result.Load() != nil {
		return result.Load().(error)
	}
	return nil
}
Пример #10
0
func clientHandler(c *Client) {
	defer c.stopWg.Done()

	var conn io.ReadWriteCloser
	var err error
	var stopping atomic.Value

	for {
		dialChan := make(chan struct{})
		go func() {
			if conn, err = c.Dial(c.Addr); err != nil {
				if stopping.Load() == nil {
					c.LogError("gorpc.Client: [%s]. Cannot establish rpc connection: [%s]", c.Addr, err)
				}
			}
			close(dialChan)
		}()

		select {
		case <-c.clientStopChan:
			stopping.Store(true)
			<-dialChan
			return
		case <-dialChan:
			c.Stats.incDialCalls()
		}

		if err != nil {
			c.Stats.incDialErrors()
			select {
			case <-c.clientStopChan:
				return
			case <-time.After(time.Second):
			}
			continue
		}

		clientHandleConnection(c, conn)

		select {
		case <-c.clientStopChan:
			return
		default:
		}
	}
}
Пример #11
0
func (f *LanternProFilter) intercept(key []byte, atomicClient atomic.Value, w http.ResponseWriter, req *http.Request) {
	var err error
	if req.Method == "CONNECT" {
		var clientConn net.Conn
		var connOut net.Conn

		utils.RespondOK(w, req)
		if clientConn, _, err = w.(http.Hijacker).Hijack(); err != nil {
			utils.RespondBadGateway(w, req, fmt.Sprintf("Unable to hijack connection: %s", err))
			return
		}
		connOut, err = net.Dial("tcp", req.Host)
		// Pipe data through CONNECT tunnel
		closeConns := func() {
			if clientConn != nil {
				if err := clientConn.Close(); err != nil {
					fmt.Printf("Error closing the out connection: %s", err)
				}
			}
			if connOut != nil {
				if err := connOut.Close(); err != nil {
					fmt.Printf("Error closing the client connection: %s", err)
				}
			}
		}
		var closeOnce sync.Once
		go func() {
			n, _ := io.Copy(connOut, clientConn)
			client := atomicClient.Load().(*Client)
			atomic.AddInt64(&client.TransferIn, n)
			closeOnce.Do(closeConns)

		}()
		n, _ := io.Copy(clientConn, connOut)

		client := atomicClient.Load().(*Client)
		atomic.AddInt64(&client.TransferOut, n)

		closeOnce.Do(closeConns)
		fmt.Println("== CONNECT DONE ==")
	} else {
		f.next.ServeHTTP(w, req)
		// TODO: byte counting in this case
	}
}
Пример #12
0
func serverHandler(s *Server, workersCh chan struct{}) {
	defer s.stopWg.Done()

	var conn io.ReadWriteCloser
	var clientAddr string
	var err error
	var stopping atomic.Value

	for {
		acceptChan := make(chan struct{})
		go func() {
			if conn, clientAddr, err = s.Listener.Accept(); err != nil {
				if stopping.Load() == nil {
					s.LogError("gorpc.Server: [%s]. Cannot accept new connection: [%s]", s.Addr, err)
				}
			}
			close(acceptChan)
		}()

		select {
		case <-s.serverStopChan:
			stopping.Store(true)
			s.Listener.Close()
			<-acceptChan
			return
		case <-acceptChan:
			s.Stats.incAcceptCalls()
		}

		if err != nil {
			s.Stats.incAcceptErrors()
			select {
			case <-s.serverStopChan:
				return
			case <-time.After(time.Second):
			}
			continue
		}

		s.stopWg.Add(1)
		go serverHandleConnection(s, conn, clientAddr, workersCh)
	}
}
Пример #13
0
func Start(cfg *config.Config, version string) func() {
	var addr atomic.Value
	go func() {
		ip := geolookup.GetIP(maxWaitForIP)
		if ip == "" {
			log.Errorf("No IP found within %v, not starting analytics session", maxWaitForIP)
			return
		}
		addr.Store(ip)
		log.Debugf("Starting analytics session with ip %v", ip)
		startSession(ip, version, client.Addr, cfg.Client.DeviceID)
	}()

	stop := func() {
		if addr.Load() != nil {
			ip := addr.Load().(string)
			log.Debugf("Ending analytics session with ip %v", ip)
			endSession(ip, version, client.Addr, cfg.Client.DeviceID)
		}
	}
	return stop
}
Пример #14
0
// doWithError spawns workers with index 0 to n-1, limiting their numbers by max.
// Similar to do but with error handling.
// The first error encountered aborts all processing and is then returned.
func doWithError(n int, worker func(int) error, max int) error {
	var (
		errv atomic.Value // worker error
		wg   sync.WaitGroup
	)

	if n <= max {
		// spawn as many goroutines as number of workers
		wg.Add(n)
		for i := 0; i < n; i++ {
			go func(idx int) {
				if errv.Load() == nil {
					if err := worker(idx); err != nil {
						errv.Store(err)
					}
				}
				wg.Done()
			}(i)
		}
		wg.Wait()

		if err := errv.Load(); err != nil {
			return err.(error)
		}
		return nil
	}

	// spawn the maximum number of goroutines
	wg.Add(max)
	for i := 0; i < max; i++ {
		go func(idx int) {
			for ; idx < n && errv.Load() == nil; idx += max {
				if err := worker(idx); err != nil {
					errv.Store(err)
					break
				}
			}
			wg.Done()
		}(i)
	}
	wg.Wait()

	if err := errv.Load(); err != nil {
		return err.(error)
	}
	return nil
}
Пример #15
0
func natsOptions(logger lager.Logger, c *config.Config, natsHost *atomic.Value, startMsg chan<- struct{}) nats.Options {
	natsServers := c.NatsServers()

	options := nats.DefaultOptions
	options.Servers = natsServers
	options.PingInterval = c.NatsClientPingInterval
	options.ClosedCB = func(conn *nats.Conn) {
		logger.Fatal("nats-connection-closed", errors.New("unexpected close"), lager.Data{"last_error": conn.LastError()})
	}

	options.DisconnectedCB = func(conn *nats.Conn) {
		hostStr := natsHost.Load().(string)
		logger.Info("nats-connection-disconnected", lager.Data{"nats-host": hostStr})
	}

	options.ReconnectedCB = func(conn *nats.Conn) {
		natsURL, err := url.Parse(conn.ConnectedUrl())
		natsHostStr := ""
		if err != nil {
			logger.Error("nats-url-parse-error", err)
		} else {
			natsHostStr = natsURL.Host
		}
		natsHost.Store(natsHostStr)

		data := lager.Data{"nats-host": natsHostStr}
		logger.Info("nats-connection-reconnected", data)
		startMsg <- struct{}{}
	}

	// in the case of suspending pruning, we need to ensure we retry reconnects indefinitely
	if c.SuspendPruningIfNatsUnavailable {
		options.MaxReconnect = -1
	}

	return options
}
Пример #16
0
func TestRangeTransferLease(t *testing.T) {
	defer leaktest.AfterTest(t)()
	ctx := storage.TestStoreContext()
	var filterMu syncutil.Mutex
	var filter func(filterArgs storagebase.FilterArgs) *roachpb.Error
	ctx.TestingKnobs.TestingCommandFilter =
		func(filterArgs storagebase.FilterArgs) *roachpb.Error {
			filterMu.Lock()
			filterCopy := filter
			filterMu.Unlock()
			if filterCopy != nil {
				return filterCopy(filterArgs)
			}
			return nil
		}
	var waitForTransferBlocked atomic.Value
	waitForTransferBlocked.Store(false)
	transferBlocked := make(chan struct{})
	ctx.TestingKnobs.LeaseTransferBlockedOnExtensionEvent = func(
		_ roachpb.ReplicaDescriptor) {
		if waitForTransferBlocked.Load().(bool) {
			transferBlocked <- struct{}{}
			waitForTransferBlocked.Store(false)
		}
	}
	mtc := &multiTestContext{}
	mtc.storeContext = &ctx
	mtc.Start(t, 2)
	defer mtc.Stop()

	// First, do a write; we'll use it to determine when the dust has settled.
	leftKey := roachpb.Key("a")
	incArgs := incrementArgs(leftKey, 1)
	if _, pErr := client.SendWrapped(mtc.distSenders[0], nil, &incArgs); pErr != nil {
		t.Fatal(pErr)
	}

	// Get the left range's ID.
	rangeID := mtc.stores[0].LookupReplica(roachpb.RKey("a"), nil).RangeID

	// Replicate the left range onto node 1.
	mtc.replicateRange(rangeID, 1)

	replica0 := mtc.stores[0].LookupReplica(roachpb.RKey("a"), nil)
	replica1 := mtc.stores[1].LookupReplica(roachpb.RKey("a"), nil)
	gArgs := getArgs(leftKey)
	replica0Desc, err := replica0.GetReplicaDescriptor()
	if err != nil {
		t.Fatal(err)
	}
	// Check that replica0 can serve reads OK.
	if _, pErr := client.SendWrappedWith(
		mtc.senders[0], nil, roachpb.Header{Replica: replica0Desc}, &gArgs); pErr != nil {
		t.Fatal(pErr)
	}

	{
		// Transferring the lease to ourself should be a no-op.
		origLeasePtr, _ := replica0.GetLease()
		origLease := *origLeasePtr
		if err := replica0.AdminTransferLease(replica0Desc.StoreID); err != nil {
			t.Fatal(err)
		}
		newLeasePtr, _ := replica0.GetLease()
		if origLeasePtr != newLeasePtr || origLease != *newLeasePtr {
			t.Fatalf("expected %+v, but found %+v", origLeasePtr, newLeasePtr)
		}
	}

	{
		// An invalid target should result in an error.
		const expected = "unable to find store .* in range"
		if err := replica0.AdminTransferLease(1000); !testutils.IsError(err, expected) {
			t.Fatalf("expected %s, but found %v", expected, err)
		}
	}

	// Move the lease to store 1.
	var newHolderDesc roachpb.ReplicaDescriptor
	util.SucceedsSoon(t, func() error {
		var err error
		newHolderDesc, err = replica1.GetReplicaDescriptor()
		return err
	})

	if err := replica0.AdminTransferLease(newHolderDesc.StoreID); err != nil {
		t.Fatal(err)
	}

	// Check that replica0 doesn't serve reads any more.
	replica0Desc, err = replica0.GetReplicaDescriptor()
	if err != nil {
		t.Fatal(err)
	}
	_, pErr := client.SendWrappedWith(
		mtc.senders[0], nil, roachpb.Header{Replica: replica0Desc}, &gArgs)
	nlhe, ok := pErr.GetDetail().(*roachpb.NotLeaseHolderError)
	if !ok {
		t.Fatalf("expected %T, got %s", &roachpb.NotLeaseHolderError{}, pErr)
	}
	if *(nlhe.LeaseHolder) != newHolderDesc {
		t.Fatalf("expected lease holder %+v, got %+v",
			newHolderDesc, nlhe.LeaseHolder)
	}

	// Check that replica1 now has the lease (or gets it soon).
	util.SucceedsSoon(t, func() error {
		if _, pErr := client.SendWrappedWith(
			mtc.senders[1], nil, roachpb.Header{Replica: replica0Desc}, &gArgs); pErr != nil {
			return pErr.GoError()
		}
		return nil
	})

	replica1Lease, _ := replica1.GetLease()

	// Verify the timestamp cache low water. Because we executed a transfer lease
	// request, the low water should be set to the new lease start time which is
	// less than the previous lease's expiration time.
	if lowWater := replica1.GetTimestampCacheLowWater(); lowWater != replica1Lease.Start {
		t.Fatalf("expected timestamp cache low water %s, but found %s",
			replica1Lease.Start, lowWater)
	}

	// Make replica1 extend its lease and transfer the lease immediately after
	// that. Test that the transfer still happens (it'll wait until the extension
	// is done).
	extensionSem := make(chan struct{})
	filterMu.Lock()
	filter = func(filterArgs storagebase.FilterArgs) *roachpb.Error {
		if filterArgs.Sid != mtc.stores[1].Ident.StoreID {
			return nil
		}
		llReq, ok := filterArgs.Req.(*roachpb.RequestLeaseRequest)
		if !ok {
			return nil
		}
		if llReq.Lease.Replica == newHolderDesc {
			// Notify the main thread that the extension is in progress and wait for
			// the signal to proceed.
			filterMu.Lock()
			filter = nil
			filterMu.Unlock()
			extensionSem <- struct{}{}
			<-extensionSem
		}
		return nil
	}
	filterMu.Unlock()
	// Initiate an extension.
	var wg sync.WaitGroup
	wg.Add(1)
	go func() {
		defer wg.Done()
		shouldRenewTS := replica1Lease.StartStasis.Add(-1, 0)
		mtc.manualClock.Set(shouldRenewTS.WallTime + 1)
		if _, pErr := client.SendWrappedWith(
			mtc.senders[1], nil,
			roachpb.Header{Replica: replica0Desc}, &gArgs); pErr != nil {
			panic(pErr)
		}
	}()

	<-extensionSem
	waitForTransferBlocked.Store(true)
	// Initiate a transfer.
	wg.Add(1)
	go func() {
		defer wg.Done()
		// Transfer back from replica1 to replica0.
		if err := replica1.AdminTransferLease(replica0Desc.StoreID); err != nil {
			panic(err)
		}
	}()
	// Wait for the transfer to be blocked by the extension.
	<-transferBlocked
	// Now unblock the extension.
	extensionSem <- struct{}{}
	// Check that the transfer to replica1 eventually happens.
	util.SucceedsSoon(t, func() error {
		if _, pErr := client.SendWrappedWith(
			mtc.senders[0], nil,
			roachpb.Header{Replica: replica0Desc}, &gArgs); pErr != nil {
			return pErr.GoError()
		}
		return nil
	})
	filterMu.Lock()
	filter = nil
	filterMu.Unlock()
	wg.Wait()
}
Пример #17
0
// TestAmbiguousCommitDueToLeadershipChange verifies that an ambiguous
// commit error is returned from sql.Exec in situations where an
// EndTransaction is part of a batch and the disposition of the batch
// request is unknown after a network failure or timeout. The goal
// here is to prevent spurious transaction retries after the initial
// transaction actually succeeded. In cases where there's an
// auto-generated primary key, this can result in silent
// duplications. In cases where the primary key is specified in
// advance, it can result in violated uniqueness constraints, or
// duplicate key violations. See #6053, #7604, and #10023.
func TestAmbiguousCommitDueToLeadershipChange(t *testing.T) {
	defer leaktest.AfterTest(t)()
	t.Skip("#10341")

	// Create a command filter which prevents EndTransaction from
	// returning a response.
	params := base.TestServerArgs{}
	committed := make(chan struct{})
	wait := make(chan struct{})
	var tableStartKey atomic.Value
	var responseCount int32

	// Prevent the first conditional put on table 51 from returning to
	// waiting client in order to simulate a lost update or slow network
	// link.
	params.Knobs.Store = &storage.StoreTestingKnobs{
		TestingResponseFilter: func(ba roachpb.BatchRequest, br *roachpb.BatchResponse) *roachpb.Error {
			req, ok := ba.GetArg(roachpb.ConditionalPut)
			tsk := tableStartKey.Load()
			if tsk == nil {
				return nil
			}
			if !ok || !bytes.HasPrefix(req.Header().Key, tsk.([]byte)) {
				return nil
			}
			// If this is the first write to the table, wait to respond to the
			// client in order to simulate a retry.
			if atomic.AddInt32(&responseCount, 1) == 1 {
				close(committed)
				<-wait
			}
			return nil
		},
	}
	testClusterArgs := base.TestClusterArgs{
		ReplicationMode: base.ReplicationAuto,
		ServerArgs:      params,
	}
	const numReplicas = 3
	tc := testcluster.StartTestCluster(t, numReplicas, testClusterArgs)
	defer tc.Stopper().Stop()

	sqlDB := sqlutils.MakeSQLRunner(t, tc.Conns[0])

	sqlDB.Exec(`CREATE DATABASE test`)
	sqlDB.Exec(`CREATE TABLE test.t (k SERIAL PRIMARY KEY, v INT)`)

	tableID := sqlutils.QueryTableID(t, tc.Conns[0], "test", "t")
	tableStartKey.Store(keys.MakeTablePrefix(tableID))

	// Wait for new table to split.
	util.SucceedsSoon(t, func() error {
		startKey := tableStartKey.Load().([]byte)

		desc, err := tc.LookupRange(keys.MakeRowSentinelKey(startKey))
		if err != nil {
			t.Fatal(err)
		}
		if !desc.StartKey.Equal(startKey) {
			return errors.Errorf("expected range start key %s; got %s",
				startKey, desc.StartKey)
		}
		return nil
	})

	// Lookup the lease.
	tableRangeDesc, err := tc.LookupRange(keys.MakeRowSentinelKey(tableStartKey.Load().([]byte)))
	if err != nil {
		t.Fatal(err)
	}
	leaseHolder, err := tc.FindRangeLeaseHolder(
		&tableRangeDesc,
		&testcluster.ReplicationTarget{
			NodeID:  tc.Servers[0].GetNode().Descriptor.NodeID,
			StoreID: tc.Servers[0].GetFirstStoreID(),
		})
	if err != nil {
		t.Fatal(err)
	}

	// In a goroutine, send an insert which will commit but not return
	// from the leader (due to the command filter we installed on node 0).
	sqlErrCh := make(chan error, 1)
	go func() {
		// Use a connection other than through the node which is the current
		// leaseholder to ensure that we use GRPC instead of the local server.
		// If we use a local server, the hanging response we simulate takes
		// up the dist sender thread of execution because local requests are
		// executed synchronously.
		sqlConn := tc.Conns[leaseHolder.NodeID%numReplicas]
		_, err := sqlConn.Exec(`INSERT INTO test.t (v) VALUES (1)`)
		sqlErrCh <- err
		close(wait)
	}()
	// Wait until the insert has committed.
	<-committed

	// Find a node other than the current lease holder to transfer the lease to.
	for i, s := range tc.Servers {
		if leaseHolder.StoreID != s.GetFirstStoreID() {
			if err := tc.TransferRangeLease(&tableRangeDesc, tc.Target(i)); err != nil {
				t.Fatal(err)
			}
			break
		}
	}

	// Wait for the error from the pending SQL insert.
	if err := <-sqlErrCh; !testutils.IsError(err, "result is ambiguous") {
		t.Errorf("expected ambiguous commit error; got %v", err)
	}

	// Verify a single row exists in the table.
	var rowCount int
	sqlDB.QueryRow(`SELECT COUNT(*) FROM test.t`).Scan(&rowCount)
	if rowCount != 1 {
		t.Errorf("expected 1 row but found %d", rowCount)
	}
}
Пример #18
0
// dialSsh is a helper that builds the transport layers and establishes the SSH connection.
// When additional dial configuration is used, DialStats are recorded and returned.
//
// The net.Conn return value is the value to be removed from pendingConns; additional
// layering (ThrottledConn, ActivityMonitoredConn) is applied, but this return value is the
// base dial conn. The *ActivityMonitoredConn return value is the layered conn passed into
// the ssh.Client.
func dialSsh(
	config *Config,
	pendingConns *common.Conns,
	serverEntry *protocol.ServerEntry,
	selectedProtocol,
	sessionId string) (*dialResult, error) {

	// The meek protocols tunnel obfuscated SSH. Obfuscated SSH is layered on top of SSH.
	// So depending on which protocol is used, multiple layers are initialized.

	useObfuscatedSsh := false
	var directTCPDialAddress string
	var meekConfig *MeekConfig
	var err error

	switch selectedProtocol {
	case protocol.TUNNEL_PROTOCOL_OBFUSCATED_SSH:
		useObfuscatedSsh = true
		directTCPDialAddress = fmt.Sprintf("%s:%d", serverEntry.IpAddress, serverEntry.SshObfuscatedPort)

	case protocol.TUNNEL_PROTOCOL_SSH:
		directTCPDialAddress = fmt.Sprintf("%s:%d", serverEntry.IpAddress, serverEntry.SshPort)

	default:
		useObfuscatedSsh = true
		meekConfig, err = initMeekConfig(config, serverEntry, selectedProtocol, sessionId)
		if err != nil {
			return nil, common.ContextError(err)
		}
	}

	NoticeConnectingServer(
		serverEntry.IpAddress,
		serverEntry.Region,
		selectedProtocol,
		directTCPDialAddress,
		meekConfig)

	// Use an asynchronous callback to record the resolved IP address when
	// dialing a domain name. Note that DialMeek doesn't immediately
	// establish any HTTPS connections, so the resolved IP address won't be
	// reported until during/after ssh session establishment (the ssh traffic
	// is meek payload). So don't Load() the IP address value until after that
	// has completed to ensure a result.
	var resolvedIPAddress atomic.Value
	resolvedIPAddress.Store("")
	setResolvedIPAddress := func(IPAddress string) {
		resolvedIPAddress.Store(IPAddress)
	}

	// Create the base transport: meek or direct connection
	dialConfig := &DialConfig{
		UpstreamProxyUrl:              config.UpstreamProxyUrl,
		UpstreamProxyCustomHeaders:    config.UpstreamProxyCustomHeaders,
		ConnectTimeout:                time.Duration(*config.TunnelConnectTimeoutSeconds) * time.Second,
		PendingConns:                  pendingConns,
		DeviceBinder:                  config.DeviceBinder,
		DnsServerGetter:               config.DnsServerGetter,
		UseIndistinguishableTLS:       config.UseIndistinguishableTLS,
		TrustedCACertificatesFilename: config.TrustedCACertificatesFilename,
		DeviceRegion:                  config.DeviceRegion,
		ResolvedIPCallback:            setResolvedIPAddress,
	}
	var dialConn net.Conn
	if meekConfig != nil {
		dialConn, err = DialMeek(meekConfig, dialConfig)
		if err != nil {
			return nil, common.ContextError(err)
		}
	} else {
		dialConn, err = DialTCP(directTCPDialAddress, dialConfig)
		if err != nil {
			return nil, common.ContextError(err)
		}
	}

	cleanupConn := dialConn
	defer func() {
		// Cleanup on error
		if cleanupConn != nil {
			cleanupConn.Close()
			pendingConns.Remove(cleanupConn)
		}
	}()

	// Activity monitoring is used to measure tunnel duration
	monitoredConn, err := common.NewActivityMonitoredConn(dialConn, 0, false, nil, nil)
	if err != nil {
		return nil, common.ContextError(err)
	}

	// Apply throttling (if configured)
	throttledConn := common.NewThrottledConn(monitoredConn, config.RateLimits)

	// Add obfuscated SSH layer
	var sshConn net.Conn = throttledConn
	if useObfuscatedSsh {
		sshConn, err = common.NewObfuscatedSshConn(
			common.OBFUSCATION_CONN_MODE_CLIENT, throttledConn, serverEntry.SshObfuscatedKey)
		if err != nil {
			return nil, common.ContextError(err)
		}
	}

	// Now establish the SSH session over the conn transport
	expectedPublicKey, err := base64.StdEncoding.DecodeString(serverEntry.SshHostKey)
	if err != nil {
		return nil, common.ContextError(err)
	}
	sshCertChecker := &ssh.CertChecker{
		HostKeyFallback: func(addr string, remote net.Addr, publicKey ssh.PublicKey) error {
			if !bytes.Equal(expectedPublicKey, publicKey.Marshal()) {
				return common.ContextError(errors.New("unexpected host public key"))
			}
			return nil
		},
	}

	sshPasswordPayload := &protocol.SSHPasswordPayload{
		SessionId:          sessionId,
		SshPassword:        serverEntry.SshPassword,
		ClientCapabilities: []string{protocol.CLIENT_CAPABILITY_SERVER_REQUESTS},
	}

	payload, err := json.Marshal(sshPasswordPayload)
	if err != nil {
		return nil, common.ContextError(err)
	}
	sshClientConfig := &ssh.ClientConfig{
		User: serverEntry.SshUsername,
		Auth: []ssh.AuthMethod{
			ssh.Password(string(payload)),
		},
		HostKeyCallback: sshCertChecker.CheckHostKey,
	}

	// The ssh session establishment (via ssh.NewClientConn) is wrapped
	// in a timeout to ensure it won't hang. We've encountered firewalls
	// that allow the TCP handshake to complete but then send a RST to the
	// server-side and nothing to the client-side, and if that happens
	// while ssh.NewClientConn is reading, it may wait forever. The timeout
	// closes the conn, which interrupts it.
	// Note: TCP handshake timeouts are provided by TCPConn, and session
	// timeouts *after* ssh establishment are provided by the ssh keep alive
	// in operate tunnel.
	// TODO: adjust the timeout to account for time-elapsed-from-start

	type sshNewClientResult struct {
		sshClient   *ssh.Client
		sshRequests <-chan *ssh.Request
		err         error
	}
	resultChannel := make(chan *sshNewClientResult, 2)
	if *config.TunnelConnectTimeoutSeconds > 0 {
		time.AfterFunc(time.Duration(*config.TunnelConnectTimeoutSeconds)*time.Second, func() {
			resultChannel <- &sshNewClientResult{nil, nil, errors.New("ssh dial timeout")}
		})
	}

	go func() {
		// The following is adapted from ssh.Dial(), here using a custom conn
		// The sshAddress is passed through to host key verification callbacks; we don't use it.
		sshAddress := ""
		sshClientConn, sshChannels, sshRequests, err := ssh.NewClientConn(
			sshConn, sshAddress, sshClientConfig)
		var sshClient *ssh.Client
		if err == nil {
			sshClient = ssh.NewClient(sshClientConn, sshChannels, nil)
		}
		resultChannel <- &sshNewClientResult{sshClient, sshRequests, err}
	}()

	result := <-resultChannel
	if result.err != nil {
		return nil, common.ContextError(result.err)
	}

	var dialStats *TunnelDialStats

	if dialConfig.UpstreamProxyUrl != "" || meekConfig != nil {
		dialStats = &TunnelDialStats{}

		if dialConfig.UpstreamProxyUrl != "" {

			// Note: UpstreamProxyUrl should have parsed correctly in the dial
			proxyURL, err := url.Parse(dialConfig.UpstreamProxyUrl)
			if err == nil {
				dialStats.UpstreamProxyType = proxyURL.Scheme
			}

			dialStats.UpstreamProxyCustomHeaderNames = make([]string, 0)
			for name, _ := range dialConfig.UpstreamProxyCustomHeaders {
				dialStats.UpstreamProxyCustomHeaderNames = append(dialStats.UpstreamProxyCustomHeaderNames, name)
			}
		}

		if meekConfig != nil {
			dialStats.MeekDialAddress = meekConfig.DialAddress
			dialStats.MeekResolvedIPAddress = resolvedIPAddress.Load().(string)
			dialStats.MeekSNIServerName = meekConfig.SNIServerName
			dialStats.MeekHostHeader = meekConfig.HostHeader
			dialStats.MeekTransformedHostName = meekConfig.TransformedHostName
		}

		NoticeConnectedTunnelDialStats(serverEntry.IpAddress, dialStats)
	}

	cleanupConn = nil

	// Note: dialConn may be used to close the underlying network connection
	// but should not be used to perform I/O as that would interfere with SSH
	// (and also bypasses throttling).

	return &dialResult{
			dialConn:      dialConn,
			monitoredConn: monitoredConn,
			sshClient:     result.sshClient,
			sshRequests:   result.sshRequests,
			dialStats:     dialStats},
		nil
}
Пример #19
0
func TestFailedReplicaChange(t *testing.T) {
	defer leaktest.AfterTest(t)
	defer func() {
		storage.TestingCommandFilter = nil
	}()
	mtc := multiTestContext{}
	mtc.Start(t, 2)
	defer mtc.Stop()

	var runFilter atomic.Value
	runFilter.Store(true)

	storage.TestingCommandFilter = func(args proto.Request, reply proto.Response) bool {
		if runFilter.Load().(bool) {
			if et, ok := args.(*proto.EndTransactionRequest); ok && et.Commit {
				reply.Header().SetGoError(util.Errorf("boom"))
				return true
			}
			return false
		}
		return false
	}

	rng, err := mtc.stores[0].GetRange(1)
	if err != nil {
		t.Fatal(err)
	}

	err = rng.ChangeReplicas(proto.ADD_REPLICA,
		proto.Replica{
			NodeID:  mtc.stores[1].Ident.NodeID,
			StoreID: mtc.stores[1].Ident.StoreID,
		})
	if err == nil || !strings.Contains(err.Error(), "boom") {
		t.Fatalf("did not get expected error: %s", err)
	}

	// After the aborted transaction, r.Desc was not updated.
	// TODO(bdarnell): expose and inspect raft's internal state.
	if len(rng.Desc().Replicas) != 1 {
		t.Fatalf("expected 1 replica, found %d", len(rng.Desc().Replicas))
	}

	// The pending config change flag was cleared, so a subsequent attempt
	// can succeed.
	runFilter.Store(false)

	err = rng.ChangeReplicas(proto.ADD_REPLICA,
		proto.Replica{
			NodeID:  mtc.stores[1].Ident.NodeID,
			StoreID: mtc.stores[1].Ident.StoreID,
		})
	if err != nil {
		t.Fatal(err)
	}

	// Wait for the range to sync to both replicas (mainly so leaktest doesn't
	// complain about goroutines involved in the process).
	if err := util.IsTrueWithin(func() bool {
		for _, store := range mtc.stores {
			rng, err := store.GetRange(1)
			if err != nil {
				return false
			}
			if len(rng.Desc().Replicas) == 1 {
				return false
			}
		}
		return true
	}, 1*time.Second); err != nil {
		t.Fatal(err)
	}
}
Пример #20
0
// CreateSnapshot generates a snapshot on the host specified by replicaID and
// persists the snapshot. The snapshot metadata will be persisted when
// SaveMetadataForReplica() is called.
func (s *SnapshotManager) CreateSnapshot(replicaID string) (*SnapshotStats, error) {
	start := time.Now()
	stats := SnapshotStats{}
	defer func() { stats.Duration = time.Since(start) }()

	snapshotID, err := s.getNextSnapshotID(replicaID)
	if err != nil {
		return &stats, err
	}
	Log(fmt.Sprintf("Creating snapshot for %s with ID %s", replicaID, snapshotID))
	snapshot, err := s.replica.CreateSnapshot(replicaID, snapshotID)
	if err != nil {
		return &stats, err
	}

	stats.NumFiles = len(snapshot.Metadata.Files)
	for _, file := range snapshot.Metadata.Files {
		stats.SizeFiles += file.Size
	}

	filesToSave, err := s.getFilesDelta(*snapshot.Metadata)
	if err != nil {
		return &stats, err
	}

	stats.NumIncrementalFiles = len(filesToSave)
	for _, file := range filesToSave {
		stats.SizeIncrementalFiles += file.Size
	}

	// copy to persistent storage
	nfiles := uint64(len(filesToSave))
	filesToSaveChan := make(chan File)
	var copyWG sync.WaitGroup
	go func() {
		for _, file := range filesToSave {
			filesToSaveChan <- file
		}
		close(filesToSaveChan)
	}()
	var nfinished uint64
	var errValue atomic.Value
	for t := 0; t < s.replica.MaxBackgroundCopies(); t++ {
		copyWG.Add(1)
		go func() {
			defer copyWG.Done()
			for file := range filesToSaveChan {
				path := getFilePath(replicaID, file)
				err := Try(func() error {
					reader, err := snapshot.GetReader(file.Name)
					if err != nil {
						return fmt.Errorf("getting reader for %s: %s", file.Name, err)
					}
					defer reader.Close()
					// Wrap reader so that checksum is computed as reader is Put
					checksummingReader := NewChecksummingReader(reader, nil)
					defer checksummingReader.Close()
					if err = s.storage.PutReader(path, checksummingReader); err != nil {
						return fmt.Errorf("putting reader at %s: %s", path, err)
					}
					return s.storeChecksum(checksummingReader.Sum(), replicaID, file)
				}, "Starting to save "+file.Name)
				if err == nil {
					Log(fmt.Sprintf("Finished saving %s", file.Name))
					Log(fmt.Sprintf("%d out of %d files saved", atomic.AddUint64(&nfinished, 1), nfiles))
				} else {
					Log(fmt.Sprintf("%s", err))
					Log(fmt.Sprintf("Unable to save %s", file.Name))
					errValue.Store(err)
				}
			}
		}()
	}
	copyWG.Wait()

	if errIface := errValue.Load(); errIface != nil {
		err, ok := errIface.(error)
		if !ok {
			return &stats, errors.New("errValue does not store an error")
		}
		return &stats, err
	}

	lazySM := NewLazySMFromM(snapshot.Metadata)
	lazySM.SaveMark = true
	err = s.metadata.Add(lazySM)
	if err != nil {
		return &stats, err
	}

	// Persist the stats
	stats.Duration = time.Since(start)
	err = Try(func() error {
		statsBytes, err := json.Marshal(stats)
		if err != nil {
			return err
		}
		if err = s.storage.Put(getStatsPath(snapshot.Metadata), statsBytes); err != nil {
			return err
		}
		return nil
	}, "Saving backup stats")
	if err == nil {
		Log("Saved backup stats")
	} else {
		Log("Non-fatal error: Could not save backup stats")
	}

	return &stats, nil
}
Пример #21
0
func PCAP2SFlowReplay(addr string, port int, file string, pps uint32, ppflow uint32) error {
	var nbPackets, packetsBytes, sflowSampleSeq, sflowSeq, droppedPackets uint32

	if pps < minPPS {
		return fmt.Errorf("Minimal packet per seconds is %d", minPPS)
	}

	conn, err := newUDPConnection(addr, port)
	if err != nil {
		return fmt.Errorf("UDP connection error: %s", err.Error())
	}
	conn.SetWriteBuffer(256 * 1024)

	f, err := os.Open(file)
	if err != nil {
		return fmt.Errorf("PCAP OpenOffline error(\"%s\"): %s", file, err.Error())
	}
	defer f.Close()

	handleRead, err := pcapgo.NewReader(f)
	if err != nil {
		return fmt.Errorf("PCAP OpenOffline error(handle to read packet): %s", err.Error())
	}

	var running atomic.Value
	running.Store(true)

	var wg sync.WaitGroup
	wg.Add(1)

	go func() {
		defer wg.Done()

		ticker := time.NewTicker(1 * time.Second)
		defer ticker.Stop()

		oldNbPackets := atomic.LoadUint32(&nbPackets)
		for running.Load() == true {
			<-ticker.C

			nb := atomic.LoadUint32(&nbPackets)
			dpkts := nb - oldNbPackets
			oldNbPackets = nb
			dropped := atomic.LoadUint32(&droppedPackets)
			logging.GetLogger().Debugf("%d packets replayed, pps %d, nbSFlowMsgDropped %d", nb, dpkts, dropped)
		}
	}()

	throt := throttle{maxHitPerSecond: pps}

	var packets [][]byte
	for {
		data, _, err := handleRead.ReadPacketData()
		if err != nil && err != io.EOF {
			logging.GetLogger().Debug("Capture file has been cut in the middle of a packet", err.Error())
			break
		} else if err == io.EOF {
			logging.GetLogger().Debug("End of capture file")
			break
		} else {
			atomic.AddUint32(&nbPackets, 1)
			atomic.AddUint32(&packetsBytes, uint32(len(data)))

			dataCopy := make([]byte, len(data))
			copy(dataCopy, data)
			packets = append(packets, dataCopy)

			if (atomic.LoadUint32(&nbPackets) % ppflow) != 0 {
				continue
			}

			throt.startHook(ppflow)
			sendPackets(conn, &packets, &sflowSampleSeq, &sflowSeq, &droppedPackets)
			throt.endHook()
		}
	}
	if len(packets) > 0 {
		sendPackets(conn, &packets, &sflowSampleSeq, &sflowSeq, &droppedPackets)
	}

	running.Store(false)
	wg.Wait()

	logging.GetLogger().Info("PCAP Trace replay finished")

	return nil
}
Пример #22
0
// RestoreSnapshot retrieves a Snapshot specified by the given metadata and triggers a restore.
// The Replica implementation determines the physical machine on which to write the restored files.
// The Replica implementation may or may not consider |replicaID| when making this determination.
// The restored files go to the database path specified by |targetPath|.
func (s *SnapshotManager) RestoreSnapshot(replicaID string, targetPath string, metadata SnapshotMetadata) (*SnapshotStats, error) {
	start := time.Now()
	stats := SnapshotStats{}
	defer func() { stats.Duration = time.Since(start) }()

	snapshot, err := s.GetSnapshot(metadata)
	if err != nil {
		return &stats, err
	}

	dbpath, err := s.replica.PrepareToRestoreSnapshot(replicaID, targetPath, snapshot)
	if err != nil {
		return &stats, err
	}

	// Determine which files we need to restore and keep them in |files|.
	// Remove unwanted files from database.
	Log("Determining files to restore")
	stats.NumFiles = len(snapshot.Metadata.Files)
	files := make(map[string]File, stats.NumFiles)
	for _, file := range snapshot.Metadata.Files {
		files[file.Name] = file
		stats.SizeFiles += file.Size
	}
	// TODO(agf): If we really want to support restores to non-local replicas,
	// then List() would need to take replicaID as an argument
	dbcontents, err := s.replica.List(dbpath, -1)
	if err != nil {
		return &stats, err
	}
	for _, dbfilename := range dbcontents {
		dbfilepath := dbpath + "/" + dbfilename
		if !strings.HasSuffix(dbfilename, ".sst") {
			s.replica.Delete(dbfilepath)
			continue
		}
		file, neededForRestore := files[dbfilename]
		if !neededForRestore {
			s.replica.Delete(dbfilepath)
			continue
		}
		same, err := s.replica.Same(dbfilepath, file)
		if !same || err != nil {
			s.replica.Delete(dbfilepath)
			continue
		}
		delete(files, dbfilename)
	}
	Log(fmt.Sprintf("Keeping %d files in current database", len(snapshot.Metadata.Files)-len(files)))

	// Write the files from snapshot
	stats.NumIncrementalFiles = len(files)
	filesToCopy := make(chan File)
	var copyWG sync.WaitGroup
	go func() {
		for _, file := range files {
			filesToCopy <- file
			stats.SizeIncrementalFiles += file.Size
		}
		close(filesToCopy)
	}()
	var nfinished uint64
	var errValue atomic.Value
	for t := 0; t < s.replica.MaxBackgroundCopies(); t++ {
		copyWG.Add(1)
		go func() {
			defer copyWG.Done()
			for file := range filesToCopy {
				err := Try(func() error {
					reader, err := snapshot.GetReader(file.Name)
					if err != nil {
						return fmt.Errorf("getting reader to restore %s: %s", file.Name, err)
					}
					defer reader.Close()
					// Wrap reader so that checksum is computed as reader is Put
					checksummingReader := NewChecksummingReader(reader, nil)
					defer checksummingReader.Close()
					if err := s.replica.PutReader(dbpath+"/"+file.Name, checksummingReader); err != nil {
						return fmt.Errorf("Putting reader to %s: %s", dbpath+"/"+file.Name, err)
					}
					return s.verifyChecksum(checksummingReader.Sum(), metadata.ReplicaID, file)
				}, "Starting to restore "+file.Name)
				if err == nil {
					Log(fmt.Sprintf("Finished restoring %s", file.Name))
					Log(fmt.Sprintf("%d out of %d files restored",
						atomic.AddUint64(&nfinished, 1), stats.NumIncrementalFiles))
				} else {
					Log(fmt.Sprintf("%s", err))
					Log(fmt.Sprintf("Unable to restore %s", file.Name))
					errValue.Store(err)
				}
			}
		}()
	}
	copyWG.Wait()

	if errIface := errValue.Load(); errIface != nil {
		err, ok := errIface.(error)
		if !ok {
			return &stats, errors.New("errValue does not store an error")
		}
		return &stats, err
	}
	return &stats, nil
}
Пример #23
0
		}
		fakeMetricSender = fake_metrics_sender.NewFakeMetricSender()
		metrics.Initialize(fakeMetricSender, nil)

		nextErr = atomic.Value{}
		nextErr := nextErr
		nextEvent.Store(nilEventHolder)

		eventSource.CloseStub = func() error {
			nextErr.Store(errors.New("closed"))
			return nil
		}

		eventSource.NextStub = func() (models.Event, error) {
			time.Sleep(10 * time.Millisecond)
			if eventHolder := nextEvent.Load(); eventHolder != nil || eventHolder != nilEventHolder {
				nextEvent.Store(nilEventHolder)

				eh := eventHolder.(EventHolder)
				if eh.event != nil {
					return eh.event, nil
				}
			}

			if err := nextErr.Load(); err != nil {
				return nil, err.(error)
			}

			return nil, nil
		}
	})
Пример #24
0
func TestRangeTransferLease(t *testing.T) {
	defer leaktest.AfterTest(t)()
	cfg := storage.TestStoreConfig(nil)
	var filterMu syncutil.Mutex
	var filter func(filterArgs storagebase.FilterArgs) *roachpb.Error
	cfg.TestingKnobs.TestingCommandFilter =
		func(filterArgs storagebase.FilterArgs) *roachpb.Error {
			filterMu.Lock()
			filterCopy := filter
			filterMu.Unlock()
			if filterCopy != nil {
				return filterCopy(filterArgs)
			}
			return nil
		}
	var waitForTransferBlocked atomic.Value
	waitForTransferBlocked.Store(false)
	transferBlocked := make(chan struct{})
	cfg.TestingKnobs.LeaseTransferBlockedOnExtensionEvent = func(
		_ roachpb.ReplicaDescriptor) {
		if waitForTransferBlocked.Load().(bool) {
			transferBlocked <- struct{}{}
			waitForTransferBlocked.Store(false)
		}
	}
	mtc := &multiTestContext{}
	mtc.storeConfig = &cfg
	defer mtc.Stop()
	mtc.Start(t, 2)

	// First, do a write; we'll use it to determine when the dust has settled.
	leftKey := roachpb.Key("a")
	incArgs := incrementArgs(leftKey, 1)
	if _, pErr := client.SendWrapped(context.Background(), mtc.distSenders[0], incArgs); pErr != nil {
		t.Fatal(pErr)
	}

	// Get the left range's ID.
	rangeID := mtc.stores[0].LookupReplica(roachpb.RKey("a"), nil).RangeID

	// Replicate the left range onto node 1.
	mtc.replicateRange(rangeID, 1)

	replica0 := mtc.stores[0].LookupReplica(roachpb.RKey("a"), nil)
	replica1 := mtc.stores[1].LookupReplica(roachpb.RKey("a"), nil)
	gArgs := getArgs(leftKey)
	replica0Desc, err := replica0.GetReplicaDescriptor()
	if err != nil {
		t.Fatal(err)
	}
	// Check that replica0 can serve reads OK.
	if _, pErr := client.SendWrappedWith(
		context.Background(),
		mtc.senders[0],
		roachpb.Header{Replica: replica0Desc},
		gArgs,
	); pErr != nil {
		t.Fatal(pErr)
	}

	origLease, _ := replica0.GetLease()
	{
		// Transferring the lease to ourself should be a no-op.
		if err := replica0.AdminTransferLease(context.Background(), replica0Desc.StoreID); err != nil {
			t.Fatal(err)
		}
		newLease, _ := replica0.GetLease()
		if err := origLease.Equivalent(*newLease); err != nil {
			t.Fatal(err)
		}
	}

	{
		// An invalid target should result in an error.
		const expected = "unable to find store .* in range"
		if err := replica0.AdminTransferLease(context.Background(), 1000); !testutils.IsError(err, expected) {
			t.Fatalf("expected %s, but found %v", expected, err)
		}
	}

	// Move the lease to replica 1.
	var newHolderDesc roachpb.ReplicaDescriptor
	testutils.SucceedsSoon(t, func() error {
		var err error
		newHolderDesc, err = replica1.GetReplicaDescriptor()
		return err
	})

	if err := replica0.AdminTransferLease(context.Background(), newHolderDesc.StoreID); err != nil {
		t.Fatal(err)
	}

	// Check that replica0 doesn't serve reads any more.
	replica0Desc, err = replica0.GetReplicaDescriptor()
	if err != nil {
		t.Fatal(err)
	}
	_, pErr := client.SendWrappedWith(
		context.Background(),
		mtc.senders[0],
		roachpb.Header{Replica: replica0Desc},
		gArgs,
	)
	nlhe, ok := pErr.GetDetail().(*roachpb.NotLeaseHolderError)
	if !ok {
		t.Fatalf("expected %T, got %s", &roachpb.NotLeaseHolderError{}, pErr)
	}
	if *(nlhe.LeaseHolder) != newHolderDesc {
		t.Fatalf("expected lease holder %+v, got %+v",
			newHolderDesc, nlhe.LeaseHolder)
	}

	// Check that replica1 now has the lease (or gets it soon).
	testutils.SucceedsSoon(t, func() error {
		if _, pErr := client.SendWrappedWith(
			context.Background(),
			mtc.senders[1],
			roachpb.Header{Replica: replica0Desc},
			gArgs,
		); pErr != nil {
			return pErr.GoError()
		}
		return nil
	})

	replica1Lease, _ := replica1.GetLease()

	// Verify the timestamp cache low water. Because we executed a transfer lease
	// request, the low water should be set to the new lease start time which is
	// less than the previous lease's expiration time.
	if lowWater := replica1.GetTimestampCacheLowWater(); lowWater != replica1Lease.Start {
		t.Fatalf("expected timestamp cache low water %s, but found %s",
			replica1Lease.Start, lowWater)
	}

	// Make replica1 extend its lease and transfer the lease immediately after
	// that. Test that the transfer still happens (it'll wait until the extension
	// is done).
	extensionSem := make(chan struct{})
	filterMu.Lock()
	filter = func(filterArgs storagebase.FilterArgs) *roachpb.Error {
		if filterArgs.Sid != mtc.stores[1].Ident.StoreID {
			return nil
		}
		llReq, ok := filterArgs.Req.(*roachpb.RequestLeaseRequest)
		if !ok {
			return nil
		}
		if llReq.Lease.Replica == newHolderDesc {
			// Notify the main thread that the extension is in progress and wait for
			// the signal to proceed.
			filterMu.Lock()
			filter = nil
			filterMu.Unlock()
			extensionSem <- struct{}{}
			<-extensionSem
		}
		return nil
	}
	filterMu.Unlock()
	// Initiate an extension.
	renewalErrCh := make(chan error)
	go func() {
		shouldRenewTS := replica1Lease.Expiration.Add(-1, 0)
		mtc.manualClock.Set(shouldRenewTS.WallTime + 1)
		_, pErr := client.SendWrappedWith(
			context.Background(), mtc.senders[1], roachpb.Header{Replica: replica0Desc}, gArgs,
		)
		renewalErrCh <- pErr.GoError()
	}()

	<-extensionSem
	waitForTransferBlocked.Store(true)
	// Initiate a transfer.
	transferErrCh := make(chan error)
	go func() {
		// Transfer back from replica1 to replica0.
		transferErrCh <- replica1.AdminTransferLease(context.Background(), replica0Desc.StoreID)
	}()
	// Wait for the transfer to be blocked by the extension.
	<-transferBlocked
	// Now unblock the extension.
	extensionSem <- struct{}{}
	// Check that the transfer to replica1 eventually happens.
	testutils.SucceedsSoon(t, func() error {
		if _, pErr := client.SendWrappedWith(
			context.Background(),
			mtc.senders[0],
			roachpb.Header{Replica: replica0Desc},
			gArgs,
		); pErr != nil {
			return pErr.GoError()
		}
		return nil
	})
	filterMu.Lock()
	filter = nil
	filterMu.Unlock()

	// We can sometimes receive an error from our renewal attempt
	// because the lease transfer ends up causing the renewal to
	// re-propose and second attempt fails because it's already been
	// renewed. This used to work before we compared the origin lease
	// with actual lease because the renewed lease still encompassed the
	// previous request.
	if err := <-renewalErrCh; err != nil {
		if _, ok := err.(*roachpb.NotLeaseHolderError); !ok {
			t.Errorf("expected not lease holder error due to re-proposal; got %s", err)
		}
	}
	if err := <-transferErrCh; err != nil {
		t.Errorf("unexpected error from lease transfer: %s", err)
	}
}
Пример #25
0
			{instance1, instance2},
			{instance1, instance2},
			{instance3, instance4},
		}

		logRepo = new(logsfakes.FakeRepository)
		logMessages.Store([]logs.Loggable{})

		closeWait := sync.WaitGroup{}
		closeWait.Add(1)

		logRepo.TailLogsForStub = func(appGUID string, onConnect func(), logChan chan<- logs.Loggable, errChan chan<- error) {
			onConnect()

			go func() {
				for _, log := range logMessages.Load().([]logs.Loggable) {
					logChan <- log
				}

				closeWait.Wait()
				close(logChan)
			}()
		}

		logRepo.CloseStub = func() {
			closeWait.Done()
		}
	})

	callStart := func(args []string) bool {
		updateCommandDependency(logRepo)
Пример #26
0
func TestFailedReplicaChange(t *testing.T) {
	defer leaktest.AfterTest(t)
	defer func() { storage.TestingCommandFilter = nil }()

	var runFilter atomic.Value
	runFilter.Store(true)

	storage.TestingCommandFilter = func(_ roachpb.StoreID, args roachpb.Request, _ roachpb.Header) error {
		if runFilter.Load().(bool) {
			if et, ok := args.(*roachpb.EndTransactionRequest); ok && et.Commit {
				return util.Errorf("boom")
			}
			return nil
		}
		return nil
	}

	mtc := startMultiTestContext(t, 2)
	defer mtc.Stop()

	rng, err := mtc.stores[0].GetReplica(1)
	if err != nil {
		t.Fatal(err)
	}

	err = rng.ChangeReplicas(roachpb.ADD_REPLICA,
		roachpb.ReplicaDescriptor{
			NodeID:  mtc.stores[1].Ident.NodeID,
			StoreID: mtc.stores[1].Ident.StoreID,
		}, rng.Desc())
	if err == nil || !strings.Contains(err.Error(), "boom") {
		t.Fatalf("did not get expected error: %s", err)
	}

	// After the aborted transaction, r.Desc was not updated.
	// TODO(bdarnell): expose and inspect raft's internal state.
	if len(rng.Desc().Replicas) != 1 {
		t.Fatalf("expected 1 replica, found %d", len(rng.Desc().Replicas))
	}

	// The pending config change flag was cleared, so a subsequent attempt
	// can succeed.
	runFilter.Store(false)

	// The first failed replica change has laid down intents. Make sure those
	// are pushable by making the transaction abandoned.
	mtc.manualClock.Increment(10 * storage.DefaultHeartbeatInterval.Nanoseconds())

	err = rng.ChangeReplicas(roachpb.ADD_REPLICA,
		roachpb.ReplicaDescriptor{
			NodeID:  mtc.stores[1].Ident.NodeID,
			StoreID: mtc.stores[1].Ident.StoreID,
		}, rng.Desc())
	if err != nil {
		t.Fatal(err)
	}

	// Wait for the range to sync to both replicas (mainly so leaktest doesn't
	// complain about goroutines involved in the process).
	if err := util.IsTrueWithin(func() bool {
		for _, store := range mtc.stores {
			rang, err := store.GetReplica(1)
			if err != nil {
				return false
			}
			if len(rang.Desc().Replicas) == 1 {
				return false
			}
		}
		return true
	}, 1*time.Second); err != nil {
		t.Fatal(err)
	}
}
Пример #27
0
// TestLeaseMetricsOnSplitAndTransfer verifies that lease-related metrics
// are updated after splitting a range and then initiating one successful
// and one failing lease transfer.
func TestLeaseMetricsOnSplitAndTransfer(t *testing.T) {
	defer leaktest.AfterTest(t)()
	var injectLeaseTransferError atomic.Value
	sc := storage.TestStoreConfig(nil)
	sc.TestingKnobs.DisableSplitQueue = true
	sc.TestingKnobs.TestingCommandFilter =
		func(filterArgs storagebase.FilterArgs) *roachpb.Error {
			if args, ok := filterArgs.Req.(*roachpb.TransferLeaseRequest); ok {
				if val := injectLeaseTransferError.Load(); val != nil && val.(bool) {
					// Note that we can't just return an error here as we only
					// end up counting failures in the metrics if the command
					// makes it through to being executed. So use a fake store ID.
					args.Lease.Replica.StoreID = roachpb.StoreID(1000)
				}
			}
			return nil
		}
	mtc := &multiTestContext{storeConfig: &sc}
	defer mtc.Stop()
	mtc.Start(t, 2)

	// Up-replicate to two replicas.
	keyMinReplica0 := mtc.stores[0].LookupReplica(roachpb.RKeyMin, nil)
	mtc.replicateRange(keyMinReplica0.RangeID, 1)

	// Split the key space at key "a".
	splitKey := roachpb.RKey("a")
	splitArgs := adminSplitArgs(splitKey.AsRawKey(), splitKey.AsRawKey())
	if _, pErr := client.SendWrapped(
		context.Background(), rg1(mtc.stores[0]), splitArgs,
	); pErr != nil {
		t.Fatal(pErr)
	}

	// Now, a successful transfer from LHS replica 0 to replica 1.
	injectLeaseTransferError.Store(false)
	if err := mtc.dbs[0].AdminTransferLease(
		context.TODO(), keyMinReplica0.Desc().StartKey.AsRawKey(), mtc.stores[1].StoreID(),
	); err != nil {
		t.Fatalf("unable to transfer lease to replica 1: %s", err)
	}
	// Wait for all replicas to process.
	testutils.SucceedsSoon(t, func() error {
		for i := 0; i < 2; i++ {
			r := mtc.stores[i].LookupReplica(roachpb.RKeyMin, nil)
			if l, _ := r.GetLease(); l.Replica.StoreID != mtc.stores[1].StoreID() {
				return errors.Errorf("expected lease to transfer to replica 2: got %s", l)
			}
		}
		return nil
	})

	// Next a failed transfer from RHS replica 0 to replica 1.
	injectLeaseTransferError.Store(true)
	keyAReplica0 := mtc.stores[0].LookupReplica(splitKey, nil)
	if err := mtc.dbs[0].AdminTransferLease(
		context.TODO(), keyAReplica0.Desc().StartKey.AsRawKey(), mtc.stores[1].StoreID(),
	); err == nil {
		t.Fatal("expected an error transferring to an unknown store ID")
	}

	metrics := mtc.stores[0].Metrics()
	if a, e := metrics.LeaseTransferSuccessCount.Count(), int64(1); a != e {
		t.Errorf("expected %d lease transfer successes; got %d", e, a)
	}
	if a, e := metrics.LeaseTransferErrorCount.Count(), int64(1); a != e {
		t.Errorf("expected %d lease transfer errors; got %d", e, a)
	}

	// Expire current leases and put a key to RHS of split to request
	// an epoch-based lease.
	testutils.SucceedsSoon(t, func() error {
		mtc.expireLeases(context.TODO())
		if err := mtc.stores[0].DB().Put(context.TODO(), "a", "foo"); err != nil {
			return err
		}

		// Update replication gauges on store 1 and verify we have 1 each of
		// expiration and epoch leases. These values are counted from store 1
		// because it will have the higher replica IDs. Expire leases to make
		// sure that epoch-based leases are used for the split range.
		if err := mtc.stores[1].ComputeMetrics(context.Background(), 0); err != nil {
			return err
		}
		metrics = mtc.stores[1].Metrics()
		if a, e := metrics.LeaseExpirationCount.Value(), int64(1); a != e {
			return errors.Errorf("expected %d expiration lease count; got %d", e, a)
		}
		if a, e := metrics.LeaseEpochCount.Value(), int64(1); a != e {
			return errors.Errorf("expected %d epoch lease count; got %d", e, a)
		}
		return nil
	})
}
Пример #28
0
// WaitForState watches an object and waits for it to achieve the state
// specified in the configuration using the specified Refresh() func,
// waiting the number of seconds specified in the timeout configuration.
//
// If the Refresh function returns a error, exit immediately with that error.
//
// If the Refresh function returns a state other than the Target state or one
// listed in Pending, return immediately with an error.
//
// If the Timeout is exceeded before reaching the Target state, return an
// error.
//
// Otherwise, result the result of the first call to the Refresh function to
// reach the target state.
func (conf *StateChangeConf) WaitForState() (interface{}, error) {
	log.Printf("[DEBUG] Waiting for state to become: %s", conf.Target)

	notfoundTick := 0
	targetOccurence := 0

	// Set a default for times to check for not found
	if conf.NotFoundChecks == 0 {
		conf.NotFoundChecks = 20
	}

	if conf.ContinuousTargetOccurence == 0 {
		conf.ContinuousTargetOccurence = 1
	}

	// We can't safely read the result values if we timeout, so store them in
	// an atomic.Value
	type Result struct {
		Result interface{}
		State  string
		Error  error
	}
	var lastResult atomic.Value
	lastResult.Store(Result{})

	doneCh := make(chan struct{})
	go func() {
		defer close(doneCh)

		// Wait for the delay
		time.Sleep(conf.Delay)

		wait := 100 * time.Millisecond

		for {
			res, currentState, err := conf.Refresh()
			result := Result{
				Result: res,
				State:  currentState,
				Error:  err,
			}
			lastResult.Store(result)

			if err != nil {
				return
			}

			// If we're waiting for the absence of a thing, then return
			if res == nil && len(conf.Target) == 0 {
				targetOccurence += 1
				if conf.ContinuousTargetOccurence == targetOccurence {
					return
				} else {
					continue
				}
			}

			if res == nil {
				// If we didn't find the resource, check if we have been
				// not finding it for awhile, and if so, report an error.
				notfoundTick += 1
				if notfoundTick > conf.NotFoundChecks {
					result.Error = &NotFoundError{
						LastError: err,
					}
					lastResult.Store(result)
					return
				}
			} else {
				// Reset the counter for when a resource isn't found
				notfoundTick = 0
				found := false

				for _, allowed := range conf.Target {
					if currentState == allowed {
						found = true
						targetOccurence += 1
						if conf.ContinuousTargetOccurence == targetOccurence {
							return
						} else {
							continue
						}
					}
				}

				for _, allowed := range conf.Pending {
					if currentState == allowed {
						found = true
						targetOccurence = 0
						break
					}
				}

				if !found {
					result.Error = &UnexpectedStateError{
						LastError:     err,
						State:         result.State,
						ExpectedState: conf.Target,
					}
					lastResult.Store(result)
					return
				}
			}

			// If a poll interval has been specified, choose that interval.
			// Otherwise bound the default value.
			if conf.PollInterval > 0 && conf.PollInterval < 180*time.Second {
				wait = conf.PollInterval
			} else {
				if wait < conf.MinTimeout {
					wait = conf.MinTimeout
				} else if wait > 10*time.Second {
					wait = 10 * time.Second
				}
			}

			log.Printf("[TRACE] Waiting %s before next try", wait)
			time.Sleep(wait)

			// Wait between refreshes using exponential backoff, except when
			// waiting for the target state to reoccur.
			if targetOccurence == 0 {
				wait *= 2
			}
		}
	}()

	select {
	case <-doneCh:
		r := lastResult.Load().(Result)
		return r.Result, r.Error
	case <-time.After(conf.Timeout):
		r := lastResult.Load().(Result)
		return nil, &TimeoutError{
			LastError:     r.Error,
			LastState:     r.State,
			ExpectedState: conf.Target,
		}
	}
}
Пример #29
0
// dialSsh is a helper that builds the transport layers and establishes the SSH connection.
// When a meek protocols is selected, additional MeekStats are recorded and returned.
func dialSsh(
	config *Config,
	pendingConns *Conns,
	serverEntry *ServerEntry,
	selectedProtocol,
	sessionId string) (
	conn net.Conn, sshClient *ssh.Client, meekStats *MeekStats, err error) {

	// The meek protocols tunnel obfuscated SSH. Obfuscated SSH is layered on top of SSH.
	// So depending on which protocol is used, multiple layers are initialized.

	useObfuscatedSsh := false
	var directTCPDialAddress string
	var meekConfig *MeekConfig

	switch selectedProtocol {
	case TUNNEL_PROTOCOL_OBFUSCATED_SSH:
		useObfuscatedSsh = true
		directTCPDialAddress = fmt.Sprintf("%s:%d", serverEntry.IpAddress, serverEntry.SshObfuscatedPort)

	case TUNNEL_PROTOCOL_SSH:
		directTCPDialAddress = fmt.Sprintf("%s:%d", serverEntry.IpAddress, serverEntry.SshPort)

	default:
		useObfuscatedSsh = true
		meekConfig, err = initMeekConfig(config, serverEntry, selectedProtocol, sessionId)
		if err != nil {
			return nil, nil, nil, ContextError(err)
		}
	}

	NoticeConnectingServer(
		serverEntry.IpAddress,
		serverEntry.Region,
		selectedProtocol,
		directTCPDialAddress,
		meekConfig)

	// Use an asynchronous callback to record the resolved IP address when
	// dialing a domain name. Note that DialMeek doesn't immediately
	// establish any HTTPS connections, so the resolved IP address won't be
	// reported until during/after ssh session establishment (the ssh traffic
	// is meek payload). So don't Load() the IP address value until after that
	// has completed to ensure a result.
	var resolvedIPAddress atomic.Value
	resolvedIPAddress.Store("")
	setResolvedIPAddress := func(IPAddress string) {
		resolvedIPAddress.Store(IPAddress)
	}

	// Create the base transport: meek or direct connection
	dialConfig := &DialConfig{
		UpstreamProxyUrl:              config.UpstreamProxyUrl,
		ConnectTimeout:                time.Duration(*config.TunnelConnectTimeoutSeconds) * time.Second,
		PendingConns:                  pendingConns,
		DeviceBinder:                  config.DeviceBinder,
		DnsServerGetter:               config.DnsServerGetter,
		UseIndistinguishableTLS:       config.UseIndistinguishableTLS,
		TrustedCACertificatesFilename: config.TrustedCACertificatesFilename,
		DeviceRegion:                  config.DeviceRegion,
		ResolvedIPCallback:            setResolvedIPAddress,
	}
	if meekConfig != nil {
		conn, err = DialMeek(meekConfig, dialConfig)
		if err != nil {
			return nil, nil, nil, ContextError(err)
		}
	} else {
		conn, err = DialTCP(directTCPDialAddress, dialConfig)
		if err != nil {
			return nil, nil, nil, ContextError(err)
		}
	}

	cleanupConn := conn
	defer func() {
		// Cleanup on error
		if err != nil {
			cleanupConn.Close()
		}
	}()

	// Add obfuscated SSH layer
	var sshConn net.Conn
	sshConn = conn
	if useObfuscatedSsh {
		sshConn, err = NewObfuscatedSshConn(
			OBFUSCATION_CONN_MODE_CLIENT, conn, serverEntry.SshObfuscatedKey)
		if err != nil {
			return nil, nil, nil, ContextError(err)
		}
	}

	// Now establish the SSH session over the sshConn transport
	expectedPublicKey, err := base64.StdEncoding.DecodeString(serverEntry.SshHostKey)
	if err != nil {
		return nil, nil, nil, ContextError(err)
	}
	sshCertChecker := &ssh.CertChecker{
		HostKeyFallback: func(addr string, remote net.Addr, publicKey ssh.PublicKey) error {
			if !bytes.Equal(expectedPublicKey, publicKey.Marshal()) {
				return ContextError(errors.New("unexpected host public key"))
			}
			return nil
		},
	}
	sshPasswordPayload, err := json.Marshal(
		struct {
			SessionId   string `json:"SessionId"`
			SshPassword string `json:"SshPassword"`
		}{sessionId, serverEntry.SshPassword})
	if err != nil {
		return nil, nil, nil, ContextError(err)
	}
	sshClientConfig := &ssh.ClientConfig{
		User: serverEntry.SshUsername,
		Auth: []ssh.AuthMethod{
			ssh.Password(string(sshPasswordPayload)),
		},
		HostKeyCallback: sshCertChecker.CheckHostKey,
	}

	// The ssh session establishment (via ssh.NewClientConn) is wrapped
	// in a timeout to ensure it won't hang. We've encountered firewalls
	// that allow the TCP handshake to complete but then send a RST to the
	// server-side and nothing to the client-side, and if that happens
	// while ssh.NewClientConn is reading, it may wait forever. The timeout
	// closes the conn, which interrupts it.
	// Note: TCP handshake timeouts are provided by TCPConn, and session
	// timeouts *after* ssh establishment are provided by the ssh keep alive
	// in operate tunnel.
	// TODO: adjust the timeout to account for time-elapsed-from-start

	type sshNewClientResult struct {
		sshClient *ssh.Client
		err       error
	}
	resultChannel := make(chan *sshNewClientResult, 2)
	if *config.TunnelConnectTimeoutSeconds > 0 {
		time.AfterFunc(time.Duration(*config.TunnelConnectTimeoutSeconds)*time.Second, func() {
			resultChannel <- &sshNewClientResult{nil, errors.New("ssh dial timeout")}
		})
	}

	go func() {
		// The following is adapted from ssh.Dial(), here using a custom conn
		// The sshAddress is passed through to host key verification callbacks; we don't use it.
		sshAddress := ""
		sshClientConn, sshChans, sshReqs, err := ssh.NewClientConn(sshConn, sshAddress, sshClientConfig)
		var sshClient *ssh.Client
		if err == nil {
			sshClient = ssh.NewClient(sshClientConn, sshChans, sshReqs)
		}
		resultChannel <- &sshNewClientResult{sshClient, err}
	}()

	result := <-resultChannel
	if result.err != nil {
		return nil, nil, nil, ContextError(result.err)
	}

	if meekConfig != nil {
		meekStats = &MeekStats{
			DialAddress:         meekConfig.DialAddress,
			ResolvedIPAddress:   resolvedIPAddress.Load().(string),
			SNIServerName:       meekConfig.SNIServerName,
			HostHeader:          meekConfig.HostHeader,
			TransformedHostName: meekConfig.TransformedHostName,
		}

		NoticeConnectedMeekStats(serverEntry.IpAddress, meekStats)
	}

	return conn, result.sshClient, meekStats, nil
}
Пример #30
0
func serverHandleConnection(s *Server, conn io.ReadWriteCloser, clientAddr string, workersCh chan struct{}) {
	defer s.stopWg.Done()

	if s.OnConnect != nil {
		newConn, err := s.OnConnect(clientAddr, conn)
		if err != nil {
			s.LogError("gorpc.Server: [%s]->[%s]. OnConnect error: [%s]", clientAddr, s.Addr, err)
			conn.Close()
			return
		}
		conn = newConn
	}

	var enabledCompression bool
	var err error
	var stopping atomic.Value

	zChan := make(chan bool, 1)
	go func() {
		var buf [1]byte
		if _, err = conn.Read(buf[:]); err != nil {
			if stopping.Load() == nil {
				s.LogError("gorpc.Server: [%s]->[%s]. Error when reading handshake from client: [%s]", clientAddr, s.Addr, err)
			}
		}
		zChan <- (buf[0] != 0)
	}()
	select {
	case enabledCompression = <-zChan:
		if err != nil {
			conn.Close()
			return
		}
	case <-s.serverStopChan:
		stopping.Store(true)
		conn.Close()
		return
	case <-time.After(10 * time.Second):
		s.LogError("gorpc.Server: [%s]->[%s]. Cannot obtain handshake from client during 10s", clientAddr, s.Addr)
		conn.Close()
		return
	}

	responsesChan := make(chan *serverMessage, s.PendingResponses)
	stopChan := make(chan struct{})

	readerDone := make(chan struct{})
	go serverReader(s, conn, clientAddr, responsesChan, stopChan, readerDone, enabledCompression, workersCh)

	writerDone := make(chan struct{})
	go serverWriter(s, conn, clientAddr, responsesChan, stopChan, writerDone, enabledCompression)

	select {
	case <-readerDone:
		close(stopChan)
		conn.Close()
		<-writerDone
	case <-writerDone:
		close(stopChan)
		conn.Close()
		<-readerDone
	case <-s.serverStopChan:
		close(stopChan)
		conn.Close()
		<-readerDone
		<-writerDone
	}
}