// Converts database replication row events to elasticsearch bulk actions func Convert(rules *config.Runtime, e *canal.RowsEvent) ([]elastic.BulkableRequest, error) { rule := rules.GetRule(e.Table.Schema, e.Table.Name) if rule == nil { return nil, errors.Errorf("no rule found for %s.%s", e.Table.Schema, e.Table.Name) } log.Debugf("Converting %v", rule) var reqs []elastic.BulkableRequest var err error switch e.Action { case canal.InsertAction: reqs, err = convertInsert(rule, e.Rows) case canal.DeleteAction: reqs, err = convertDelete(rule, e.Rows) case canal.UpdateAction: log.Debugf("Converting update: %+v", e.Rows) reqs, err = convertUpdate(rule, e.Rows) log.Debugf("Converted update: %+v", reqs) default: return nil, errors.Errorf("Unrecognized action action %s", e.Action) } if err != nil { return nil, errors.Errorf("Error adding %s to bulk request: %v", e.Action, err) } return reqs, nil }
// Creates a scanner that splits on words or quoted strings func NewQuotedScanner(r io.Reader) *bufio.Scanner { scanner := bufio.NewScanner(r) split := func(data []byte, atEOF bool) (advance int, token []byte, err error) { // Skip leading spaces. start := 0 for width := 0; start < len(data); start += width { var r rune r, width = utf8.DecodeRune(data[start:]) if !unicode.IsSpace(r) { break } } // Does word start with a quote? quote, width := utf8.DecodeRune(data[start:]) i := start if IsQuote(quote) { log.Debugf("Quote detected '%c'", quote) i = i + width } else { quote = 0 } // Scan until space, marking end of word. for width := 0; i < len(data); i += width { var r rune r, width = utf8.DecodeRune(data[i:]) if quote == 0 { if unicode.IsSpace(r) { return i + width, data[start:i], nil } } else { // Look for ending quote // BUG: need to implement escape handling if r == quote { log.Debugf("Found end quote %d chars after start", i) quote = 0 } } } // If we're at EOF, we have a final, non-empty, non-terminated word. Return it. if atEOF && len(data) > start { return len(data), data[start:], nil } // Request more data. return start, nil, nil } scanner.Split(split) return scanner }
func ParseQuery(query string) (*AlterTableQuery, error) { scanner := NewQuotedScanner(strings.NewReader(query)) scanner.Scan() switch strings.ToUpper(scanner.Text()) { case "ALTER": if scanner.Scan(); strings.ToUpper(scanner.Text()) == "TABLE" { log.Debugf("Scanned TABLE") return parseAlterTable(scanner) } default: log.Debugf("Ignoring query starting with: %v", scanner.Text()) return nil, ErrIgnored } return nil, errors.NotValidf("Unrecognized query '%v'", query) }
func (s *syncer) ignoreEvent(e *canal.RowsEvent) bool { ignore := s.rules.GetRule(e.Table.Schema, e.Table.Name) == nil if ignore { log.Debugf("Ignoring event for table not configured for replication: %s.%s", e.Table.Schema, e.Table.Name) } return ignore }
// Submit submits the current batch of actions in bulk and resets Count to 0. func (b *Bulker) Submit() error { size := b.bulker.NumberOfActions() if size == 0 { return nil } b.LastResponse, b.LastError = b.bulker.Do() if b.LastError != nil { log.Errorf("Bulk update %d/%d failed due to %v: %+v", size, b.MaxActions, b.LastError, b.LastResponse) return b.LastError } if b.LastResponse.Errors { var buffer bytes.Buffer failed := b.LastResponse.Failed() count := len(failed) buffer.WriteString(fmt.Sprintf("%v actions failed in bulk update:\n", count)) for i, er := range failed { buffer.WriteString(fmt.Sprintf("\t%v:%v\n", er, er.Error)) if i == 2 { if count > 3 { buffer.WriteString(fmt.Sprintf("\t...\n")) } break } } log.Errorf(buffer.String()) // show bulk errors but continue //b.LastError = errors.Errorf("%v actions failed during bulk update", count) } else { log.Debugf("Bulk update %d/%d succeeded", size, b.MaxActions) } return b.LastError }
func (b *BinlogSyncer) onStream(s *BinlogStreamer) { defer func() { if e := recover(); e != nil { s.closeWithError(fmt.Errorf("Err: %v\n Stack: %s", e, Pstack())) } b.wg.Done() }() for { log.Debugf("Reading next packet") data, err := b.c.ReadPacket() if err != nil { log.Debugf("Reading next packet failed; shutting down") s.closeWithError(err) return } log.Debugf("Packet read") switch data[0] { case OK_HEADER: log.Debugf("found OK_HEADER") if err = b.parseEvent(s, data); err != nil { s.closeWithError(err) return } case ERR_HEADER: log.Debugf("found ERR_HEADER") err = b.c.HandleErrorPacket(data) s.closeWithError(err) return default: s.closeWithError(fmt.Errorf("invalid stream header %c", data[0])) return } } }
func (p *BinlogParser) parse(data []byte) (*BinlogEvent, error) { rawData := data h, err := p.parseHeader(data) log.Debugf("parser.parse: header: %+v", h) if err != nil { return nil, err } data = data[EventHeaderSize:] eventLen := int(h.EventSize) - EventHeaderSize if len(data) != eventLen { return nil, fmt.Errorf("invalid data size %d in event %s, less event length %d", len(data), h.EventType, eventLen) } e, err := p.parseEvent(h, data) log.Debugf("parser.parse: event: %+v", e) if err != nil { return nil, err } return &BinlogEvent{rawData, h, e}, nil }
func (h *dumpParseHandler) Data(db string, table string, values []string) error { if h.c.isClosed() { return errCanalClosed } tableInfo, err := h.c.GetTable(db, table) if err != nil { log.Errorf("get %s.%s information err: %v", db, table, err) return errors.Trace(err) } vs := make([]interface{}, len(values)) log.Debugf("Handling Data: %v", values) for i, v := range values { if v == "NULL" { vs[i] = nil } else if firstChar := v[0]; firstChar == '\'' || firstChar == '"' { vs[i] = v[1 : len(v)-1] } else { if tableInfo.Columns[i].Type == schema.TYPE_NUMBER { n, err := strconv.ParseInt(v, 10, 64) if err != nil { log.Errorf("parse row %v at %d error %v, skip", values, i, err) return dump.ErrSkip } vs[i] = n } else if tableInfo.Columns[i].Type == schema.TYPE_FLOAT { f, err := strconv.ParseFloat(v, 64) if err != nil { log.Errorf("parse row %v at %d error %v, skip", values, i, err) return dump.ErrSkip } vs[i] = f } else { log.Errorf("parse row %v at %d err: invalid type %v for value %v, skip", values, i, tableInfo.Columns[i].Type, v) return dump.ErrSkip } } } events := newRowsEvent(tableInfo, InsertAction, [][]interface{}{vs}) return h.c.travelRowsEventHandler(events) }
// Adds actions to be submitted in the next request. If adding these actions causes // count to exceed MaxActions, auto-submits the current batch by calling Submit. func (b *Bulker) Add(actions []elastic.BulkableRequest) error { for _, req := range actions { switch req.(type) { case *elastic.BulkDeleteRequest: b.Stats.DeleteCount++ case *elastic.BulkIndexRequest: b.Stats.InsertCount++ case *elastic.BulkUpdateRequest: b.Stats.UpdateCount++ } b.Stats.Total++ log.Debugf("Adding %s\n", req.String()) b.bulker.Add(req) } if b.bulker.EstimatedSizeInBytes() >= b.MaxBytes { b.Submit() } else if b.bulker.NumberOfActions() >= b.MaxActions { b.Submit() } return b.LastError }
func (c *Canal) handleQueryEvent(e *replication.BinlogEvent) error { ev := e.Event.(*replication.QueryEvent) query, err := replication.ParseQuery(string(ev.Query)) log.Debugf("query parsed: %v, %v", query, err) if err == replication.ErrIgnored { return nil } else if err != nil { log.Infof("failed to parse: %v, %v", string(ev.Query), err) return nil } else { schema := string(ev.Schema) if query.Schema != "" { // Schema overridden in query schema = query.Schema } table, err := c.GetTable(schema, query.Table) if err == errTableIgnored { // ignore return nil } else if err != nil { return errors.Trace(err) } switch query.Operation { case replication.ADD: // Flush everything before changing schema c.flushEventHandlers() table.AddColumn(query.Column, query.Type, query.Extra) log.Infof("Adding new column %v %v to %v.%v", query.Column, query.Type, schema, query.Table) break case replication.MODIFY: case replication.DELETE: default: } return nil } }
func (c *Canal) startSyncBinlog() error { pos := mysql.Position{c.master.Name, c.master.Position} log.Infof("Start sync'ing binlog from %v", pos) s, err := c.syncer.StartSync(pos) if err != nil { return errors.Errorf("Failed starting sync at %v: %v", pos, err) } originalTimeout := time.Second timeout := originalTimeout forceSavePos := false for { ev, err := s.GetEventTimeout(timeout) if err != nil && err != replication.ErrGetEventTimeout { return errors.Trace(err) } else if err == replication.ErrGetEventTimeout { if timeout == 2*originalTimeout { log.Debugf("Flushing event handlers since sync has gone idle") if err := c.flushEventHandlers(); err != nil { log.Warnf("Error occurred during flush: %v", err) } } timeout = 2 * timeout continue } timeout = time.Second //next binlog pos pos.Pos = ev.Header.LogPos forceSavePos = false log.Debugf("Syncing %v", ev) switch e := ev.Event.(type) { case *replication.RotateEvent: c.flushEventHandlers() pos.Name = string(e.NextLogName) pos.Pos = uint32(e.Position) // r.ev <- pos forceSavePos = true log.Infof("Rotate binlog to %v", pos) case *replication.RowsEvent: // we only focus row based event if err = c.handleRowsEvent(ev); err != nil { log.Errorf("Error handling rows event: %v", err) return errors.Trace(err) } case *replication.QueryEvent: if err = c.handleQueryEvent(ev); err != nil { log.Errorf("Error handling rows event: %v", err) return errors.Trace(err) } default: log.Debugf("Ignored event: %+v", e) } c.master.Update(pos.Name, pos.Pos) c.master.Save(forceSavePos) } return nil }
// Parse the dump data with Dumper generate. // It can not parse all the data formats with mysqldump outputs func Parse(r io.Reader, h ParseHandler) error { rb := bufio.NewReaderSize(r, 1024*16) binlogExp := regexp.MustCompile("^CHANGE MASTER TO MASTER_LOG_FILE='(.+)', MASTER_LOG_POS=(\\d+);") useExp := regexp.MustCompile("^USE `(.+)`;") insertWithValuesExp := regexp.MustCompile("^INSERT INTO `(.+)` VALUES \\((.+)\\);") insertExp := regexp.MustCompile("INSERT INTO `(.+)` VALUES") valuesExp := regexp.MustCompile("^\\((.+)\\)[;,]") var db string var binlogParsed bool var currentInsertTable string for { line, err := rb.ReadString('\n') if err != nil && err != io.EOF { return errors.Trace(err) } else if err == io.EOF { break } if firstChar := line[0]; firstChar == '"' || firstChar == '\'' { // remove quotes line = line[0 : len(line)-1] } if !binlogParsed { if m := binlogExp.FindAllStringSubmatch(line, -1); len(m) == 1 { log.Debugf("Parse binlog: %s", line) name := m[0][1] pos, err := strconv.ParseUint(m[0][2], 10, 64) if err != nil { return errors.Errorf("parse binlog %v err, invalid number", line) } if err = h.BinLog(name, pos); err != nil && err != ErrSkip { return errors.Trace(err) } binlogParsed = true } } if m := useExp.FindStringSubmatch(line); len(m) == 2 { db = m[1] } else if m = insertWithValuesExp.FindStringSubmatch(line); len(m) == 3 { log.Debugf("Parse insert: %s", line) table := m[1] values, err := parseValues(m[2]) if err != nil { return errors.Errorf("parse values %v err", line) } if err = h.Data(db, table, values); err != nil && err != ErrSkip { return errors.Trace(err) } } else if m = insertExp.FindStringSubmatch(line); len(m) == 2 { log.Debugf("Parse insert start: %s", line) currentInsertTable = m[1] } else if m = valuesExp.FindStringSubmatch(line); len(m) == 2 { log.Debugf("Parse insert value: %s", line) values, err := parseValues(m[1]) if err != nil { return errors.Errorf("parse values %v err", line) } if err = h.Data(db, currentInsertTable, values); err != nil && err != ErrSkip { return errors.Trace(err) } } } return h.Complete() }
func (h *testRowsEventHandler) Do(e *RowsEvent) error { log.Debugf("%s %v\n", e.Action, e.Rows) return nil }
// see mysql sql/log_event.cc log_event_print_value func (e *RowsEvent) decodeValue(data []byte, tp byte, meta uint16) (v interface{}, n int, err error) { var length int = 0 log.Debugf("Decode: t:%d, m:%d, l:%d", tp, meta, len(data)) if tp == MYSQL_TYPE_STRING { if meta >= 256 { b0 := uint8(meta >> 8) b1 := uint8(meta & 0xFF) if b0&0x30 != 0x30 { length = int(uint16(b1) | (uint16((b0&0x30)^0x30) << 4)) tp = byte(b0 | 0x30) } else { length = int(meta & 0xFF) tp = b0 } } else { length = int(meta) } } switch tp { case MYSQL_TYPE_NULL: return nil, 0, nil case MYSQL_TYPE_LONG: n = 4 v = ParseBinaryInt32(data) case MYSQL_TYPE_TINY: n = 1 v = ParseBinaryInt8(data) case MYSQL_TYPE_SHORT: n = 2 v = ParseBinaryInt16(data) case MYSQL_TYPE_INT24: n = 3 v = ParseBinaryInt24(data) case MYSQL_TYPE_LONGLONG: n = 8 v = ParseBinaryInt64(data) case MYSQL_TYPE_NEWDECIMAL: prec := uint8(meta >> 8) scale := uint8(meta & 0xFF) v, n, err = decodeDecimal(data, int(prec), int(scale)) case MYSQL_TYPE_FLOAT: n = 4 v = ParseBinaryFloat32(data) case MYSQL_TYPE_DOUBLE: n = 8 v = ParseBinaryFloat64(data) case MYSQL_TYPE_BIT: nbits := ((meta >> 8) * 8) + (meta & 0xFF) n = int(nbits+7) / 8 //use int64 for bit v, err = decodeBit(data, int(nbits), int(n)) case MYSQL_TYPE_TIMESTAMP: n = 4 t := binary.LittleEndian.Uint32(data) v = time.Unix(int64(t), 0) case MYSQL_TYPE_TIMESTAMP2: v, n, err = decodeTimestamp2(data, meta) case MYSQL_TYPE_DATETIME: n = 8 i64 := binary.LittleEndian.Uint64(data) d := i64 / 1000000 t := i64 % 1000000 v = time.Date(int(d/10000), time.Month((d%10000)/100), int(d%100), int(t/10000), int((t%10000)/100), int(t%100), 0, time.UTC).Format(TimeFormat) case MYSQL_TYPE_DATETIME2: v, n, err = decodeDatetime2(data, meta) case MYSQL_TYPE_TIME: n = 3 i32 := uint32(FixedLengthInt(data[0:3])) if i32 == 0 { v = "00:00:00" } else { sign := "" if i32 < 0 { sign = "-" } v = fmt.Sprintf("%s%02d:%02d:%02d", sign, i32/10000, (i32%10000)/100, i32%100) } case MYSQL_TYPE_TIME2: v, n, err = decodeTime2(data, meta) case MYSQL_TYPE_DATE: n = 3 i32 := uint32(FixedLengthInt(data[0:3])) if i32 == 0 { v = "0000-00-00" } else { v = fmt.Sprintf("%04d-%02d-%02d", i32/(16*32), i32/32%16, i32%32) } case MYSQL_TYPE_YEAR: n = 1 v = int(data[0]) + 1900 case MYSQL_TYPE_ENUM: l := meta & 0xFF switch l { case 1: v = int64(data[0]) n = 1 case 2: v = int64(binary.BigEndian.Uint16(data)) n = 2 default: err = fmt.Errorf("Unknown ENUM packlen=%d", l) } case MYSQL_TYPE_SET: nbits := meta & 0xFF n = int(nbits+7) / 8 v, err = decodeBit(data, int(nbits), n) case MYSQL_TYPE_BLOB, MYSQL_TYPE_GEOMETRY: // Warning: GEOMETRY seems to be stored using that same format as blob. // This was determined by reverse engineering rather than inpecting // the MySQL source. This is known to work for MYSQL >= 5.6 // where meta == 4, but hasn't been tested on other versions. switch meta { case 1: length = int(data[0]) v = data[1 : 1+length] n = length + 1 case 2: length = int(binary.LittleEndian.Uint16(data)) v = data[2 : 2+length] n = length + 2 case 3: length = int(FixedLengthInt(data[0:3])) v = data[3 : 3+length] n = length + 3 case 4: length = int(binary.LittleEndian.Uint32(data)) v = data[4 : 4+length] n = length + 4 default: err = fmt.Errorf("invalid blob packlen = %d", meta) } case MYSQL_TYPE_VARCHAR, MYSQL_TYPE_VAR_STRING: length = int(meta) v, n = decodeString(data, length) case MYSQL_TYPE_STRING: v, n = decodeString(data, length) default: err = fmt.Errorf("unsupport type %d in binlog and don't know how to handle", tp) } return }
func (p *BinlogParser) parseEvent(h *EventHeader, data []byte) (Event, error) { var e Event log.Debugf("Parsing %v", h.EventType) if h.EventType == FORMAT_DESCRIPTION_EVENT { p.format = &FormatDescriptionEvent{} e = p.format } else { if p.format != nil && p.format.ChecksumAlgorithm == BINLOG_CHECKSUM_ALG_CRC32 { data = data[0 : len(data)-4] } if h.EventType == ROTATE_EVENT { e = &RotateEvent{} } else if !p.rawMode { switch h.EventType { case QUERY_EVENT: e = &QueryEvent{} case XID_EVENT: e = &XIDEvent{} case TABLE_MAP_EVENT: te := &TableMapEvent{} if p.format.EventTypeHeaderLengths[TABLE_MAP_EVENT-1] == 6 { te.tableIDSize = 4 } else { te.tableIDSize = 6 } e = te case WRITE_ROWS_EVENTv0, UPDATE_ROWS_EVENTv0, DELETE_ROWS_EVENTv0, WRITE_ROWS_EVENTv1, DELETE_ROWS_EVENTv1, UPDATE_ROWS_EVENTv1, WRITE_ROWS_EVENTv2, UPDATE_ROWS_EVENTv2, DELETE_ROWS_EVENTv2: e = p.newRowsEvent(h) case ROWS_QUERY_EVENT: e = &RowsQueryEvent{} case GTID_EVENT: e = >IDEvent{} case BEGIN_LOAD_QUERY_EVENT: e = &BeginLoadQueryEvent{} case EXECUTE_LOAD_QUERY_EVENT: e = &ExecuteLoadQueryEvent{} case MARIADB_ANNOTATE_ROWS_EVENT: e = &MariadbAnnotaeRowsEvent{} case MARIADB_BINLOG_CHECKPOINT_EVENT: e = &MariadbBinlogCheckPointEvent{} case MARIADB_GTID_LIST_EVENT: e = &MariadbGTIDListEvent{} case MARIADB_GTID_EVENT: ee := &MariadbGTIDEvent{} ee.GTID.ServerID = h.ServerID e = ee default: e = &GenericEvent{} } } else { e = &GenericEvent{} } } if err := e.Decode(data); err != nil { log.Errorf("Parser decoding %v failed %v", h.EventType, err) return nil, &EventError{h, err.Error(), data} } //e.Dump(os.Stdout) if te, ok := e.(*TableMapEvent); ok { p.tables[te.TableID] = te } // If MySQL restart, it may use the same table id for different tables. // We must clear the table map before parsing new events. // We have no better way to known whether the event is before or after restart, // So we have to clear the table map on every rotate event. if _, ok := e.(*RotateEvent); ok { p.tables = make(map[uint64]*TableMapEvent) } return e, nil }