// StartBinlogDumpFromTimestamp requests a replication binlog dump from // the master mysqld at the given timestamp and then sends binlog // events to the provided channel. // // The startup phase will list all the binary logs, and find the one // that has events starting strictly before the provided timestamp. It // will then start from there, and skip all events that are before the // provided timestamp. // // The stream will continue in the background, waiting for new events if // necessary, until the connection is closed, either by the master or // by canceling the context. // // Note the context is valid and used until eventChan is closed. func (sc *SlaveConnection) StartBinlogDumpFromTimestamp(ctx context.Context, timestamp int64) (<-chan replication.BinlogEvent, error) { ctx, sc.cancel = context.WithCancel(ctx) flavor, err := sc.mysqld.flavor() if err != nil { return nil, fmt.Errorf("StartBinlogDump needs flavor: %v", err) } // List the binlogs. binlogs, err := sc.Conn.ExecuteFetch("SHOW BINARY LOGS", 1000, false) if err != nil { return nil, fmt.Errorf("failed to SHOW BINARY LOGS: %v", err) } // Start with the most recent binlog file until we find the right event. var binlogIndex int var firstEvent replication.BinlogEvent for binlogIndex = len(binlogs.Rows) - 1; binlogIndex >= 0; binlogIndex-- { // Exit the loop early if context is canceled. select { case <-ctx.Done(): return nil, ctx.Err() default: } // Start dumping the logs. The position is '4' to skip the // Binlog File Header. See this page for more info: // https://dev.mysql.com/doc/internals/en/binlog-file.html binlog := binlogs.Rows[binlogIndex][0].String() cmd := makeBinlogDumpCommand(4, 0, sc.slaveID, binlog) if err := sc.Conn.SendCommand(ComBinlogDump, cmd); err != nil { return nil, fmt.Errorf("failed to send the ComBinlogDump command: %v", err) } // Get the first event to get its timestamp. We skip ROTATE // events, as they don't have timestamps. for { buf, err := sc.Conn.ReadPacket() if err != nil { return nil, fmt.Errorf("couldn't start binlog dump of binlog %v: %v", binlog, err) } // Why would the master tell us to stop here? if buf[0] == 254 { return nil, fmt.Errorf("received EOF packet for first packet of binlog %v", binlog) } // Parse the full event. firstEvent = flavor.MakeBinlogEvent(buf[1:]) if !firstEvent.IsValid() { return nil, fmt.Errorf("first event from binlog %v is not valid", binlog) } if !firstEvent.IsRotate() { break } } if int64(firstEvent.Timestamp()) < timestamp { // The first event in this binlog has a smaller // timestamp than what we need, we found a good // starting point. break } // The timestamp is higher, we need to try the older files. // Close and re-open our connection. sc.Conn.Close() conn, err := sc.mysqld.connectForReplication() if err != nil { return nil, err } sc.Conn = conn } if binlogIndex == -1 { // We haven't found a suitable binlog log.Errorf("couldn't find an old enough binlog to match timestamp >= %v (looked at %v files)", timestamp, len(binlogs.Rows)) return nil, ErrBinlogUnavailable } // Now skip all events that have a smaller timestamp var event replication.BinlogEvent for { buf, err := sc.Conn.ReadPacket() if err != nil { return nil, fmt.Errorf("error reading packet while skipping binlog events: %v", err) } if buf[0] == 254 { // The master is telling us to stop. return nil, fmt.Errorf("received EOF packet in binlog dump while skipping packets: %#v", buf) } event = flavor.MakeBinlogEvent(buf[1:]) if !event.IsValid() { return nil, fmt.Errorf("event from binlog is not valid (while skipping)") } if int64(event.Timestamp()) >= timestamp { // we found the first event to send break } // Exit the loop early if context is canceled. select { case <-ctx.Done(): return nil, ctx.Err() default: } } // Now just loop sending and reading events. // FIXME(alainjobart) I think we can use a buffered channel for better performance. eventChan := make(chan replication.BinlogEvent) // Start reading events. sc.wg.Add(1) go func() { defer func() { close(eventChan) sc.wg.Done() }() // Send the first binlog event, it has the format description. select { case eventChan <- firstEvent: case <-ctx.Done(): return } // Then send the rest. for { select { case eventChan <- event: case <-ctx.Done(): return } buf, err := sc.Conn.ReadPacket() if err != nil { if sqlErr, ok := err.(*sqldb.SQLError); ok && sqlErr.Number() == mysql.ErrServerLost { // ErrServerLost = Lost connection to MySQL server during query // This is not necessarily an error. It could just be that we closed // the connection from outside. log.Infof("connection closed during binlog stream (possibly intentional): %v", err) return } log.Errorf("read error while streaming binlog events: %v", err) return } if buf[0] == 254 { // The master is telling us to stop. log.Infof("received EOF packet in binlog dump: %#v", buf) return } // Skip the first byte because it's only used // for signaling EOF. event = flavor.MakeBinlogEvent(buf[1:]) } }() return eventChan, nil }
// parseEvents processes the raw binlog dump stream from the server, one event // at a time, and groups them into transactions. It is called from within the // service function launched by Stream(). // // If the sendTransaction func returns io.EOF, parseEvents returns ErrClientEOF. // If the events channel is closed, parseEvents returns ErrServerEOF. // If the context is done, returns ctx.Err(). func (bls *Streamer) parseEvents(ctx context.Context, events <-chan replication.BinlogEvent) (replication.Position, error) { var statements []*binlogdatapb.BinlogTransaction_Statement var format replication.BinlogFormat var gtid replication.GTID var pos = bls.startPos var autocommit = true var err error // A begin can be triggered either by a BEGIN query, or by a GTID_EVENT. begin := func() { if statements != nil { // If this happened, it would be a legitimate error. log.Errorf("BEGIN in binlog stream while still in another transaction; dropping %d statements: %v", len(statements), statements) binlogStreamerErrors.Add("ParseEvents", 1) } statements = make([]*binlogdatapb.BinlogTransaction_Statement, 0, 10) autocommit = false } // A commit can be triggered either by a COMMIT query, or by an XID_EVENT. // Statements that aren't wrapped in BEGIN/COMMIT are committed immediately. commit := func(timestamp uint32) error { if int64(timestamp) >= bls.timestamp { trans := &binlogdatapb.BinlogTransaction{ Statements: statements, EventToken: &querypb.EventToken{ Timestamp: int64(timestamp), Position: replication.EncodePosition(pos), }, } if err = bls.sendTransaction(trans); err != nil { if err == io.EOF { return ErrClientEOF } return fmt.Errorf("send reply error: %v", err) } } statements = nil autocommit = true return nil } // Parse events. for { var ev replication.BinlogEvent var ok bool select { case ev, ok = <-events: if !ok { // events channel has been closed, which means the connection died. log.Infof("reached end of binlog event stream") return pos, ErrServerEOF } case <-ctx.Done(): log.Infof("stopping early due to binlog Streamer service shutdown or client disconnect") return pos, ctx.Err() } // Validate the buffer before reading fields from it. if !ev.IsValid() { return pos, fmt.Errorf("can't parse binlog event, invalid data: %#v", ev) } // We need to keep checking for FORMAT_DESCRIPTION_EVENT even after we've // seen one, because another one might come along (e.g. on log rotate due to // binlog settings change) that changes the format. if ev.IsFormatDescription() { format, err = ev.Format() if err != nil { return pos, fmt.Errorf("can't parse FORMAT_DESCRIPTION_EVENT: %v, event data: %#v", err, ev) } continue } // We can't parse anything until we get a FORMAT_DESCRIPTION_EVENT that // tells us the size of the event header. if format.IsZero() { // The only thing that should come before the FORMAT_DESCRIPTION_EVENT // is a fake ROTATE_EVENT, which the master sends to tell us the name // of the current log file. if ev.IsRotate() { continue } return pos, fmt.Errorf("got a real event before FORMAT_DESCRIPTION_EVENT: %#v", ev) } // Strip the checksum, if any. We don't actually verify the checksum, so discard it. ev, _, err = ev.StripChecksum(format) if err != nil { return pos, fmt.Errorf("can't strip checksum from binlog event: %v, event data: %#v", err, ev) } // Update the GTID if the event has one. The actual event type could be // something special like GTID_EVENT (MariaDB, MySQL 5.6), or it could be // an arbitrary event with a GTID in the header (Google MySQL). if ev.HasGTID(format) { gtid, err = ev.GTID(format) if err != nil { return pos, fmt.Errorf("can't get GTID from binlog event: %v, event data: %#v", err, ev) } pos = replication.AppendGTID(pos, gtid) } switch { case ev.IsGTID(): // GTID_EVENT if ev.IsBeginGTID(format) { begin() } case ev.IsXID(): // XID_EVENT (equivalent to COMMIT) if err = commit(ev.Timestamp()); err != nil { return pos, err } case ev.IsIntVar(): // INTVAR_EVENT name, value, err := ev.IntVar(format) if err != nil { return pos, fmt.Errorf("can't parse INTVAR_EVENT: %v, event data: %#v", err, ev) } statements = append(statements, &binlogdatapb.BinlogTransaction_Statement{ Category: binlogdatapb.BinlogTransaction_Statement_BL_SET, Sql: []byte(fmt.Sprintf("SET %s=%d", name, value)), }) case ev.IsRand(): // RAND_EVENT seed1, seed2, err := ev.Rand(format) if err != nil { return pos, fmt.Errorf("can't parse RAND_EVENT: %v, event data: %#v", err, ev) } statements = append(statements, &binlogdatapb.BinlogTransaction_Statement{ Category: binlogdatapb.BinlogTransaction_Statement_BL_SET, Sql: []byte(fmt.Sprintf("SET @@RAND_SEED1=%d, @@RAND_SEED2=%d", seed1, seed2)), }) case ev.IsQuery(): // QUERY_EVENT // Extract the query string and group into transactions. q, err := ev.Query(format) if err != nil { return pos, fmt.Errorf("can't get query from binlog event: %v, event data: %#v", err, ev) } switch cat := getStatementCategory(q.SQL); cat { case binlogdatapb.BinlogTransaction_Statement_BL_BEGIN: begin() case binlogdatapb.BinlogTransaction_Statement_BL_ROLLBACK: // Rollbacks are possible under some circumstances. Since the stream // client keeps track of its replication position by updating the set // of GTIDs it's seen, we must commit an empty transaction so the client // can update its position. statements = nil fallthrough case binlogdatapb.BinlogTransaction_Statement_BL_COMMIT: if err = commit(ev.Timestamp()); err != nil { return pos, err } default: // BL_DDL, BL_DML, BL_SET, BL_UNRECOGNIZED if q.Database != "" && q.Database != bls.dbname { // Skip cross-db statements. continue } setTimestamp := &binlogdatapb.BinlogTransaction_Statement{ Category: binlogdatapb.BinlogTransaction_Statement_BL_SET, Sql: []byte(fmt.Sprintf("SET TIMESTAMP=%d", ev.Timestamp())), } statement := &binlogdatapb.BinlogTransaction_Statement{ Category: cat, Sql: []byte(q.SQL), } // If the statement has a charset and it's different than our client's // default charset, send it along with the statement. // If our client hasn't told us its charset, always send it. if bls.clientCharset == nil || (q.Charset != nil && *q.Charset != *bls.clientCharset) { setTimestamp.Charset = q.Charset statement.Charset = q.Charset } statements = append(statements, setTimestamp, statement) if autocommit { if err = commit(ev.Timestamp()); err != nil { return pos, err } } } case ev.IsPreviousGTIDs(): // PREVIOUS_GTIDS_EVENT // MySQL 5.6 only: The Binlogs contain an // event that gives us all the previously // applied commits. It is *not* an // authoritative value, unless we started from // the beginning of a binlog file. if !bls.usePreviousGTIDs { continue } newPos, err := ev.PreviousGTIDs(format) if err != nil { return pos, err } pos = newPos if err = commit(ev.Timestamp()); err != nil { return pos, err } } } }