// operateTunnel monitors the health of the tunnel and performs
// periodic work.
//
// BytesTransferred and TotalBytesTransferred notices are emitted
// for live reporting and diagnostics reporting, respectively.
//
// Status requests are sent to the Psiphon API to report bytes
// transferred.
//
// Periodic SSH keep alive packets are sent to ensure the underlying
// TCP connection isn't terminated by NAT, or other network
// interference -- or test if it has been terminated while the device
// has been asleep. When a keep alive times out, the tunnel is
// considered failed.
//
// An immediate SSH keep alive "probe" is sent to test the tunnel and
// server responsiveness when a port forward failure is detected: a
// failed dial or failed read/write. This keep alive has a shorter
// timeout.
//
// Note that port foward failures may be due to non-failure conditions.
// For example, when the user inputs an invalid domain name and
// resolution is done by the ssh server; or trying to connect to a
// non-white-listed port; and the error message in these cases is not
// distinguishable from a a true server error (a common error message,
// "ssh: rejected: administratively prohibited (open failed)", may be
// returned for these cases but also if the server has run out of
// ephemeral ports, for example).
//
// SSH keep alives are not sent when the tunnel has been recently
// active (not only does tunnel activity obviate the necessity of a keep
// alive, testing has shown that keep alives may time out for "busy"
// tunnels, especially over meek protocol and other high latency
// conditions).
//
// "Recently active" is defined has having received payload bytes. Sent
// bytes are not considered as testing has shown bytes may appear to
// send when certain NAT devices have interfered with the tunnel, while
// no bytes are received. In a pathological case, with DNS implemented
// as tunneled UDP, a browser may wait excessively for a domain name to
// resolve, while no new port forward is attempted which would otherwise
// result in a tunnel failure detection.
//
// TODO: change "recently active" to include having received any
// SSH protocol messages from the server, not just user payload?
//
func (tunnel *Tunnel) operateTunnel(tunnelOwner TunnelOwner) {
	defer tunnel.operateWaitGroup.Done()

	lastBytesReceivedTime := time.Now()

	lastTotalBytesTransferedTime := time.Now()
	totalSent := int64(0)
	totalReceived := int64(0)

	noticeBytesTransferredTicker := time.NewTicker(1 * time.Second)
	defer noticeBytesTransferredTicker.Stop()

	// The next status request and ssh keep alive times are picked at random,
	// from a range, to make the resulting traffic less fingerprintable,
	// Note: not using Tickers since these are not fixed time periods.
	nextStatusRequestPeriod := func() time.Duration {
		return MakeRandomPeriod(
			PSIPHON_API_STATUS_REQUEST_PERIOD_MIN,
			PSIPHON_API_STATUS_REQUEST_PERIOD_MAX)
	}

	statsTimer := time.NewTimer(nextStatusRequestPeriod())
	defer statsTimer.Stop()

	// Schedule an immediate status request to deliver any unreported
	// tunnel stats.
	// Note: this may not be effective when there's an outstanding
	// asynchronous untunneled final status request is holding the
	// tunnel stats records. It may also conflict with other
	// tunnel candidates which attempt to send an immediate request
	// before being discarded. For now, we mitigate this with a short,
	// random delay.
	unreported := CountUnreportedTunnelStats()
	if unreported > 0 {
		NoticeInfo("Unreported tunnel stats: %d", unreported)
		statsTimer.Reset(MakeRandomPeriod(
			PSIPHON_API_STATUS_REQUEST_SHORT_PERIOD_MIN,
			PSIPHON_API_STATUS_REQUEST_SHORT_PERIOD_MAX))
	}

	nextSshKeepAlivePeriod := func() time.Duration {
		return MakeRandomPeriod(
			TUNNEL_SSH_KEEP_ALIVE_PERIOD_MIN,
			TUNNEL_SSH_KEEP_ALIVE_PERIOD_MAX)
	}

	// TODO: don't initialize timer when config.DisablePeriodicSshKeepAlive is set
	sshKeepAliveTimer := time.NewTimer(nextSshKeepAlivePeriod())
	if tunnel.config.DisablePeriodicSshKeepAlive {
		sshKeepAliveTimer.Stop()
	} else {
		defer sshKeepAliveTimer.Stop()
	}

	// Perform network requests in separate goroutines so as not to block
	// other operations.
	requestsWaitGroup := new(sync.WaitGroup)

	requestsWaitGroup.Add(1)
	signalStatusRequest := make(chan struct{})
	go func() {
		defer requestsWaitGroup.Done()
		for _ = range signalStatusRequest {
			sendStats(tunnel)
		}
	}()

	requestsWaitGroup.Add(1)
	signalSshKeepAlive := make(chan time.Duration)
	sshKeepAliveError := make(chan error, 1)
	go func() {
		defer requestsWaitGroup.Done()
		for timeout := range signalSshKeepAlive {
			err := sendSshKeepAlive(tunnel.sshClient, tunnel.conn, timeout)
			if err != nil {
				select {
				case sshKeepAliveError <- err:
				default:
				}
			}
		}
	}()

	shutdown := false
	var err error
	for !shutdown && err == nil {
		select {
		case <-noticeBytesTransferredTicker.C:
			sent, received := transferstats.ReportRecentBytesTransferredForServer(
				tunnel.serverEntry.IpAddress)

			if received > 0 {
				lastBytesReceivedTime = time.Now()
			}

			totalSent += sent
			totalReceived += received

			if lastTotalBytesTransferedTime.Add(TOTAL_BYTES_TRANSFERRED_NOTICE_PERIOD).Before(time.Now()) {
				NoticeTotalBytesTransferred(tunnel.serverEntry.IpAddress, totalSent, totalReceived)
				lastTotalBytesTransferedTime = time.Now()
			}

			// Only emit the frequent BytesTransferred notice when tunnel is not idle.
			if tunnel.config.EmitBytesTransferred && (sent > 0 || received > 0) {
				NoticeBytesTransferred(tunnel.serverEntry.IpAddress, sent, received)
			}

		case <-statsTimer.C:
			select {
			case signalStatusRequest <- *new(struct{}):
			default:
			}
			statsTimer.Reset(nextStatusRequestPeriod())

		case <-sshKeepAliveTimer.C:
			if lastBytesReceivedTime.Add(TUNNEL_SSH_KEEP_ALIVE_PERIODIC_INACTIVE_PERIOD).Before(time.Now()) {
				select {
				case signalSshKeepAlive <- time.Duration(*tunnel.config.TunnelSshKeepAlivePeriodicTimeoutSeconds) * time.Second:
				default:
				}
			}
			sshKeepAliveTimer.Reset(nextSshKeepAlivePeriod())

		case <-tunnel.signalPortForwardFailure:
			// Note: no mutex on portForwardFailureTotal; only referenced here
			tunnel.totalPortForwardFailures++
			NoticeInfo("port forward failures for %s: %d",
				tunnel.serverEntry.IpAddress, tunnel.totalPortForwardFailures)

			if lastBytesReceivedTime.Add(TUNNEL_SSH_KEEP_ALIVE_PROBE_INACTIVE_PERIOD).Before(time.Now()) {
				select {
				case signalSshKeepAlive <- time.Duration(*tunnel.config.TunnelSshKeepAliveProbeTimeoutSeconds) * time.Second:
				default:
				}
			}
			if !tunnel.config.DisablePeriodicSshKeepAlive {
				sshKeepAliveTimer.Reset(nextSshKeepAlivePeriod())
			}

		case err = <-sshKeepAliveError:

		case <-tunnel.shutdownOperateBroadcast:
			shutdown = true
		}
	}

	close(signalSshKeepAlive)
	close(signalStatusRequest)
	requestsWaitGroup.Wait()

	// Capture bytes transferred since the last noticeBytesTransferredTicker tick
	sent, received := transferstats.ReportRecentBytesTransferredForServer(tunnel.serverEntry.IpAddress)
	totalSent += sent
	totalReceived += received

	// Always emit a final NoticeTotalBytesTransferred
	NoticeTotalBytesTransferred(tunnel.serverEntry.IpAddress, totalSent, totalReceived)

	// The stats for this tunnel will be reported via the next successful
	// status request.
	// Note: Since client clocks are unreliable, we use the server's reported
	// timestamp in the handshake response as the tunnel start time. This time
	// will be slightly earlier than the actual tunnel activation time, as the
	// client has to receive and parse the response and activate the tunnel.
	if !tunnel.IsDiscarded() {
		err := RecordTunnelStats(
			tunnel.serverContext.sessionId,
			tunnel.serverContext.tunnelNumber,
			tunnel.serverEntry.IpAddress,
			tunnel.serverContext.serverHandshakeTimestamp,
			fmt.Sprintf("%d", time.Now().Sub(tunnel.startTime)),
			totalSent,
			totalReceived)
		if err != nil {
			NoticeAlert("RecordTunnelStats failed: %s", ContextError(err))
		}
	}

	// Final status request notes:
	//
	// It's highly desirable to send a final status request in order to report
	// domain bytes transferred stats as well as to report tunnel stats as
	// soon as possible. For this reason, we attempt untunneled requests when
	// the tunneled request isn't possible or has failed.
	//
	// In an orderly shutdown (err == nil), the Controller is stopping and
	// everything must be wrapped up quickly. Also, we still have a working
	// tunnel. So we first attempt a tunneled status request (with a short
	// timeout) and then attempt, synchronously -- otherwise the Contoller's
	// runWaitGroup.Wait() will return while a request is still in progress
	// -- untunneled requests (also with short timeouts). Note that in this
	// case the untunneled request will opt out of untunneledPendingConns so
	// that it's not inadvertently canceled by the Controller shutdown
	// sequence (see doUntunneledStatusRequest).
	//
	// If the tunnel has failed, the Controller may continue working. We want
	// to re-establish as soon as possible (so don't want to block on status
	// requests, even for a second). We may have a long time to attempt
	// untunneled requests in the background. And there is no tunnel through
	// which to attempt tunneled requests. So we spawn a goroutine to run the
	// untunneled requests, which are allowed a longer timeout. These requests
	// will be interrupted by the Controller's untunneledPendingConns.CloseAll()
	// in the case of a shutdown.

	if err == nil {
		NoticeInfo("shutdown operate tunnel")
		if !sendStats(tunnel) {
			sendUntunneledStats(tunnel, true)
		}
	} else {
		NoticeAlert("operate tunnel error for %s: %s", tunnel.serverEntry.IpAddress, err)
		go sendUntunneledStats(tunnel, false)
		tunnelOwner.SignalTunnelFailure(tunnel)
	}
}
// operateTunnel monitors the health of the tunnel and performs
// periodic work.
//
// BytesTransferred and TotalBytesTransferred notices are emitted
// for live reporting and diagnostics reporting, respectively.
//
// Status requests are sent to the Psiphon API to report bytes
// transferred.
//
// Periodic SSH keep alive packets are sent to ensure the underlying
// TCP connection isn't terminated by NAT, or other network
// interference -- or test if it has been terminated while the device
// has been asleep. When a keep alive times out, the tunnel is
// considered failed.
//
// An immediate SSH keep alive "probe" is sent to test the tunnel and
// server responsiveness when a port forward failure is detected: a
// failed dial or failed read/write. This keep alive has a shorter
// timeout.
//
// Note that port foward failures may be due to non-failure conditions.
// For example, when the user inputs an invalid domain name and
// resolution is done by the ssh server; or trying to connect to a
// non-white-listed port; and the error message in these cases is not
// distinguishable from a a true server error (a common error message,
// "ssh: rejected: administratively prohibited (open failed)", may be
// returned for these cases but also if the server has run out of
// ephemeral ports, for example).
//
// SSH keep alives are not sent when the tunnel has been recently
// active (not only does tunnel activity obviate the necessity of a keep
// alive, testing has shown that keep alives may time out for "busy"
// tunnels, especially over meek protocol and other high latency
// conditions).
//
// "Recently active" is defined has having received payload bytes. Sent
// bytes are not considered as testing has shown bytes may appear to
// send when certain NAT devices have interfered with the tunnel, while
// no bytes are received. In a pathological case, with DNS implemented
// as tunneled UDP, a browser may wait excessively for a domain name to
// resolve, while no new port forward is attempted which would otherwise
// result in a tunnel failure detection.
//
// TODO: change "recently active" to include having received any
// SSH protocol messages from the server, not just user payload?
//
func (tunnel *Tunnel) operateTunnel(tunnelOwner TunnelOwner) {
	defer tunnel.operateWaitGroup.Done()

	lastBytesReceivedTime := monotime.Now()

	lastTotalBytesTransferedTime := monotime.Now()
	totalSent := int64(0)
	totalReceived := int64(0)

	noticeBytesTransferredTicker := time.NewTicker(1 * time.Second)
	defer noticeBytesTransferredTicker.Stop()

	// The next status request and ssh keep alive times are picked at random,
	// from a range, to make the resulting traffic less fingerprintable,
	// Note: not using Tickers since these are not fixed time periods.
	nextStatusRequestPeriod := func() time.Duration {
		return makeRandomPeriod(
			PSIPHON_API_STATUS_REQUEST_PERIOD_MIN,
			PSIPHON_API_STATUS_REQUEST_PERIOD_MAX)
	}

	statsTimer := time.NewTimer(nextStatusRequestPeriod())
	defer statsTimer.Stop()

	// Schedule an immediate status request to deliver any unreported
	// persistent stats.
	// Note: this may not be effective when there's an outstanding
	// asynchronous untunneled final status request is holding the
	// persistent stats records. It may also conflict with other
	// tunnel candidates which attempt to send an immediate request
	// before being discarded. For now, we mitigate this with a short,
	// random delay.
	unreported := CountUnreportedPersistentStats()
	if unreported > 0 {
		NoticeInfo("Unreported persistent stats: %d", unreported)
		statsTimer.Reset(makeRandomPeriod(
			PSIPHON_API_STATUS_REQUEST_SHORT_PERIOD_MIN,
			PSIPHON_API_STATUS_REQUEST_SHORT_PERIOD_MAX))
	}

	nextSshKeepAlivePeriod := func() time.Duration {
		return makeRandomPeriod(
			TUNNEL_SSH_KEEP_ALIVE_PERIOD_MIN,
			TUNNEL_SSH_KEEP_ALIVE_PERIOD_MAX)
	}

	// TODO: don't initialize timer when config.DisablePeriodicSshKeepAlive is set
	sshKeepAliveTimer := time.NewTimer(nextSshKeepAlivePeriod())
	if tunnel.config.DisablePeriodicSshKeepAlive {
		sshKeepAliveTimer.Stop()
	} else {
		defer sshKeepAliveTimer.Stop()
	}

	// Perform network requests in separate goroutines so as not to block
	// other operations.
	requestsWaitGroup := new(sync.WaitGroup)

	requestsWaitGroup.Add(1)
	signalStatusRequest := make(chan struct{})
	go func() {
		defer requestsWaitGroup.Done()
		for _ = range signalStatusRequest {
			sendStats(tunnel)
		}
	}()

	requestsWaitGroup.Add(1)
	signalSshKeepAlive := make(chan time.Duration)
	sshKeepAliveError := make(chan error, 1)
	go func() {
		defer requestsWaitGroup.Done()
		for timeout := range signalSshKeepAlive {
			err := sendSshKeepAlive(tunnel.sshClient, tunnel.conn, timeout)
			if err != nil {
				select {
				case sshKeepAliveError <- err:
				default:
				}
			}
		}
	}()

	requestsWaitGroup.Add(1)
	signalStopClientVerificationRequests := make(chan struct{})
	go func() {
		defer requestsWaitGroup.Done()

		clientVerificationRequestSuccess := true
		clientVerificationPayload := ""
		failCount := 0
		for {
			// TODO: use reflect.SelectCase?
			if clientVerificationRequestSuccess == true {
				failCount = 0
				select {
				case clientVerificationPayload = <-tunnel.newClientVerificationPayload:
				case <-signalStopClientVerificationRequests:
					return
				}
			} else {
				// If sendClientVerification failed to send the payload we
				// will retry after a delay. Will use a new payload instead
				// if that arrives in the meantime.
				// If failures count is more than PSIPHON_API_CLIENT_VERIFICATION_REQUEST_MAX_RETRIES
				// stop retrying for this tunnel.
				failCount += 1
				if failCount > PSIPHON_API_CLIENT_VERIFICATION_REQUEST_MAX_RETRIES {
					return
				}
				timeout := time.After(PSIPHON_API_CLIENT_VERIFICATION_REQUEST_RETRY_PERIOD)
				select {
				case <-timeout:
				case clientVerificationPayload = <-tunnel.newClientVerificationPayload:
				case <-signalStopClientVerificationRequests:
					return
				}
			}

			clientVerificationRequestSuccess = sendClientVerification(tunnel, clientVerificationPayload)
		}
	}()

	shutdown := false
	var err error
	for !shutdown && err == nil {
		select {
		case <-noticeBytesTransferredTicker.C:
			sent, received := transferstats.ReportRecentBytesTransferredForServer(
				tunnel.serverEntry.IpAddress)

			if received > 0 {
				lastBytesReceivedTime = monotime.Now()
			}

			totalSent += sent
			totalReceived += received

			if lastTotalBytesTransferedTime.Add(TOTAL_BYTES_TRANSFERRED_NOTICE_PERIOD).Before(monotime.Now()) {
				NoticeTotalBytesTransferred(tunnel.serverEntry.IpAddress, totalSent, totalReceived)
				lastTotalBytesTransferedTime = monotime.Now()
			}

			// Only emit the frequent BytesTransferred notice when tunnel is not idle.
			if tunnel.config.EmitBytesTransferred && (sent > 0 || received > 0) {
				NoticeBytesTransferred(tunnel.serverEntry.IpAddress, sent, received)
			}

		case <-statsTimer.C:
			select {
			case signalStatusRequest <- *new(struct{}):
			default:
			}
			statsTimer.Reset(nextStatusRequestPeriod())

		case <-sshKeepAliveTimer.C:
			if lastBytesReceivedTime.Add(TUNNEL_SSH_KEEP_ALIVE_PERIODIC_INACTIVE_PERIOD).Before(monotime.Now()) {
				select {
				case signalSshKeepAlive <- time.Duration(*tunnel.config.TunnelSshKeepAlivePeriodicTimeoutSeconds) * time.Second:
				default:
				}
			}
			sshKeepAliveTimer.Reset(nextSshKeepAlivePeriod())

		case <-tunnel.signalPortForwardFailure:
			// Note: no mutex on portForwardFailureTotal; only referenced here
			tunnel.totalPortForwardFailures++
			NoticeInfo("port forward failures for %s: %d",
				tunnel.serverEntry.IpAddress, tunnel.totalPortForwardFailures)

			if lastBytesReceivedTime.Add(TUNNEL_SSH_KEEP_ALIVE_PROBE_INACTIVE_PERIOD).Before(monotime.Now()) {
				select {
				case signalSshKeepAlive <- time.Duration(*tunnel.config.TunnelSshKeepAliveProbeTimeoutSeconds) * time.Second:
				default:
				}
			}
			if !tunnel.config.DisablePeriodicSshKeepAlive {
				sshKeepAliveTimer.Reset(nextSshKeepAlivePeriod())
			}

		case err = <-sshKeepAliveError:

		case serverRequest := <-tunnel.sshServerRequests:
			if serverRequest != nil {
				err := HandleServerRequest(tunnelOwner, tunnel, serverRequest.Type, serverRequest.Payload)
				if err == nil {
					serverRequest.Reply(true, nil)
				} else {
					NoticeAlert("HandleServerRequest for %s failed: %s", serverRequest.Type, err)
					serverRequest.Reply(false, nil)

				}
			}

		case <-tunnel.shutdownOperateBroadcast:
			shutdown = true
		}
	}

	close(signalSshKeepAlive)
	close(signalStatusRequest)
	close(signalStopClientVerificationRequests)
	requestsWaitGroup.Wait()

	// Capture bytes transferred since the last noticeBytesTransferredTicker tick
	sent, received := transferstats.ReportRecentBytesTransferredForServer(tunnel.serverEntry.IpAddress)
	totalSent += sent
	totalReceived += received

	// Always emit a final NoticeTotalBytesTransferred
	NoticeTotalBytesTransferred(tunnel.serverEntry.IpAddress, totalSent, totalReceived)

	// Tunnel does not have a serverContext when DisableApi is set.
	if tunnel.serverContext != nil && !tunnel.IsDiscarded() {

		// The stats for this tunnel will be reported via the next successful
		// status request.

		// Since client clocks are unreliable, we report the server's timestamp from
		// the handshake response as the absolute tunnel start time. This time
		// will be slightly earlier than the actual tunnel activation time, as the
		// client has to receive and parse the response and activate the tunnel.

		tunnelStartTime := tunnel.serverContext.serverHandshakeTimestamp

		// For the tunnel duration calculation, we use the local clock. The start time
		// is tunnel.establishedTime as recorded when the tunnel was established. For the
		// end time, we do not use the current time as we may now be long past the
		// actual termination time of the tunnel. For example, the host or device may
		// have resumed after a long sleep (it's not clear that the monotonic clock service
		// used to measure elapsed time will or will not stop during device sleep). Instead,
		// we use the last data received time as the estimated tunnel end time.
		//
		// One potential issue with using the last received time is receiving data
		// after an extended sleep because the device sleep occured with data still in
		// the OS socket read buffer. This is not expected to happen on Android, as the
		// OS will wake a process when it has TCP data available to read. (For this reason,
		// the actual long sleep issue is only with an idle tunnel; in this case the client
		// is responsible for sending SSH keep alives but a device sleep will delay the
		// golang SSH keep alive timer.)
		//
		// Idle tunnels will only read data when a SSH keep alive is sent. As a result,
		// the last-received-time scheme can undercount tunnel durations by up to
		// TUNNEL_SSH_KEEP_ALIVE_PERIOD_MAX for idle tunnels.

		tunnelDuration := tunnel.conn.GetLastActivityMonotime().Sub(tunnel.establishedTime)

		err := RecordTunnelStat(
			tunnel.serverContext.sessionId,
			tunnel.serverContext.tunnelNumber,
			tunnel.serverEntry.IpAddress,
			fmt.Sprintf("%d", tunnel.establishDuration),
			tunnelStartTime,
			fmt.Sprintf("%d", tunnelDuration),
			totalSent,
			totalReceived)
		if err != nil {
			NoticeAlert("RecordTunnelStats failed: %s", common.ContextError(err))
		}
	}

	// Final status request notes:
	//
	// It's highly desirable to send a final status request in order to report
	// domain bytes transferred stats as well as to report tunnel stats as
	// soon as possible. For this reason, we attempt untunneled requests when
	// the tunneled request isn't possible or has failed.
	//
	// In an orderly shutdown (err == nil), the Controller is stopping and
	// everything must be wrapped up quickly. Also, we still have a working
	// tunnel. So we first attempt a tunneled status request (with a short
	// timeout) and then attempt, synchronously -- otherwise the Contoller's
	// runWaitGroup.Wait() will return while a request is still in progress
	// -- untunneled requests (also with short timeouts). Note that in this
	// case the untunneled request will opt out of untunneledPendingConns so
	// that it's not inadvertently canceled by the Controller shutdown
	// sequence (see doUntunneledStatusRequest).
	//
	// If the tunnel has failed, the Controller may continue working. We want
	// to re-establish as soon as possible (so don't want to block on status
	// requests, even for a second). We may have a long time to attempt
	// untunneled requests in the background. And there is no tunnel through
	// which to attempt tunneled requests. So we spawn a goroutine to run the
	// untunneled requests, which are allowed a longer timeout. These requests
	// will be interrupted by the Controller's untunneledPendingConns.CloseAll()
	// in the case of a shutdown.

	if err == nil {
		NoticeInfo("shutdown operate tunnel")
		if !sendStats(tunnel) {
			sendUntunneledStats(tunnel, true)
		}
	} else {
		NoticeAlert("operate tunnel error for %s: %s", tunnel.serverEntry.IpAddress, err)
		go sendUntunneledStats(tunnel, false)
		tunnelOwner.SignalTunnelFailure(tunnel)
	}
}