Пример #1
0
// Converts database replication row events to elasticsearch bulk actions
func Convert(rules *config.Runtime, e *canal.RowsEvent) ([]elastic.BulkableRequest, error) {
	rule := rules.GetRule(e.Table.Schema, e.Table.Name)
	if rule == nil {
		return nil, errors.Errorf("no rule found for %s.%s", e.Table.Schema, e.Table.Name)
	}

	log.Debugf("Converting %v", rule)
	var reqs []elastic.BulkableRequest
	var err error

	switch e.Action {
	case canal.InsertAction:
		reqs, err = convertInsert(rule, e.Rows)
	case canal.DeleteAction:
		reqs, err = convertDelete(rule, e.Rows)
	case canal.UpdateAction:
		log.Debugf("Converting update: %+v", e.Rows)
		reqs, err = convertUpdate(rule, e.Rows)
		log.Debugf("Converted update: %+v", reqs)
	default:
		return nil, errors.Errorf("Unrecognized action action %s", e.Action)
	}

	if err != nil {
		return nil, errors.Errorf("Error adding %s to bulk request: %v", e.Action, err)
	}

	return reqs, nil
}
Пример #2
0
// Creates a scanner that splits on words or quoted strings
func NewQuotedScanner(r io.Reader) *bufio.Scanner {
	scanner := bufio.NewScanner(r)
	split := func(data []byte, atEOF bool) (advance int, token []byte, err error) {
		// Skip leading spaces.
		start := 0
		for width := 0; start < len(data); start += width {
			var r rune
			r, width = utf8.DecodeRune(data[start:])
			if !unicode.IsSpace(r) {
				break
			}
		}

		// Does word start with a quote?
		quote, width := utf8.DecodeRune(data[start:])
		i := start
		if IsQuote(quote) {
			log.Debugf("Quote detected '%c'", quote)
			i = i + width
		} else {
			quote = 0

		}

		// Scan until space, marking end of word.
		for width := 0; i < len(data); i += width {
			var r rune
			r, width = utf8.DecodeRune(data[i:])
			if quote == 0 {
				if unicode.IsSpace(r) {
					return i + width, data[start:i], nil
				}
			} else {
				// Look for ending quote
				// BUG: need to implement escape handling
				if r == quote {
					log.Debugf("Found end quote %d chars after start", i)
					quote = 0
				}
			}
		}
		// If we're at EOF, we have a final, non-empty, non-terminated word. Return it.
		if atEOF && len(data) > start {
			return len(data), data[start:], nil
		}
		// Request more data.
		return start, nil, nil
	}
	scanner.Split(split)
	return scanner
}
Пример #3
0
func ParseQuery(query string) (*AlterTableQuery, error) {
	scanner := NewQuotedScanner(strings.NewReader(query))
	scanner.Scan()
	switch strings.ToUpper(scanner.Text()) {
	case "ALTER":
		if scanner.Scan(); strings.ToUpper(scanner.Text()) == "TABLE" {
			log.Debugf("Scanned TABLE")
			return parseAlterTable(scanner)
		}
	default:
		log.Debugf("Ignoring query starting with: %v", scanner.Text())
		return nil, ErrIgnored
	}
	return nil, errors.NotValidf("Unrecognized query '%v'", query)
}
Пример #4
0
func (s *syncer) ignoreEvent(e *canal.RowsEvent) bool {
	ignore := s.rules.GetRule(e.Table.Schema, e.Table.Name) == nil
	if ignore {
		log.Debugf("Ignoring event for table not configured for replication: %s.%s", e.Table.Schema, e.Table.Name)
	}
	return ignore
}
Пример #5
0
// Submit submits the current batch of actions in bulk and resets Count to 0.
func (b *Bulker) Submit() error {
	size := b.bulker.NumberOfActions()
	if size == 0 {
		return nil
	}
	b.LastResponse, b.LastError = b.bulker.Do()
	if b.LastError != nil {
		log.Errorf("Bulk update %d/%d failed due to %v: %+v", size, b.MaxActions, b.LastError, b.LastResponse)
		return b.LastError
	}
	if b.LastResponse.Errors {
		var buffer bytes.Buffer
		failed := b.LastResponse.Failed()
		count := len(failed)
		buffer.WriteString(fmt.Sprintf("%v actions failed in bulk update:\n", count))
		for i, er := range failed {
			buffer.WriteString(fmt.Sprintf("\t%v:%v\n", er, er.Error))
			if i == 2 {
				if count > 3 {
					buffer.WriteString(fmt.Sprintf("\t...\n"))
				}
				break
			}
		}
		log.Errorf(buffer.String())
		// show bulk errors but continue
		//b.LastError = errors.Errorf("%v actions failed during bulk update", count)
	} else {
		log.Debugf("Bulk update %d/%d succeeded", size, b.MaxActions)
	}
	return b.LastError
}
Пример #6
0
func (b *BinlogSyncer) onStream(s *BinlogStreamer) {
	defer func() {
		if e := recover(); e != nil {
			s.closeWithError(fmt.Errorf("Err: %v\n Stack: %s", e, Pstack()))
		}
		b.wg.Done()
	}()

	for {
		log.Debugf("Reading next packet")
		data, err := b.c.ReadPacket()
		if err != nil {
			log.Debugf("Reading next packet failed; shutting down")
			s.closeWithError(err)
			return
		}
		log.Debugf("Packet read")

		switch data[0] {
		case OK_HEADER:
			log.Debugf("found OK_HEADER")
			if err = b.parseEvent(s, data); err != nil {
				s.closeWithError(err)
				return
			}
		case ERR_HEADER:
			log.Debugf("found ERR_HEADER")
			err = b.c.HandleErrorPacket(data)
			s.closeWithError(err)
			return
		default:
			s.closeWithError(fmt.Errorf("invalid stream header %c", data[0]))
			return
		}
	}
}
Пример #7
0
func (p *BinlogParser) parse(data []byte) (*BinlogEvent, error) {
	rawData := data
	h, err := p.parseHeader(data)
	log.Debugf("parser.parse: header: %+v", h)

	if err != nil {
		return nil, err
	}

	data = data[EventHeaderSize:]
	eventLen := int(h.EventSize) - EventHeaderSize

	if len(data) != eventLen {
		return nil, fmt.Errorf("invalid data size %d in event %s, less event length %d", len(data), h.EventType, eventLen)
	}

	e, err := p.parseEvent(h, data)
	log.Debugf("parser.parse: event: %+v", e)
	if err != nil {
		return nil, err
	}

	return &BinlogEvent{rawData, h, e}, nil
}
Пример #8
0
func (h *dumpParseHandler) Data(db string, table string, values []string) error {
	if h.c.isClosed() {
		return errCanalClosed
	}

	tableInfo, err := h.c.GetTable(db, table)
	if err != nil {
		log.Errorf("get %s.%s information err: %v", db, table, err)
		return errors.Trace(err)
	}

	vs := make([]interface{}, len(values))
	log.Debugf("Handling Data: %v", values)
	for i, v := range values {
		if v == "NULL" {
			vs[i] = nil
		} else if firstChar := v[0]; firstChar == '\'' || firstChar == '"' {
			vs[i] = v[1 : len(v)-1]
		} else {
			if tableInfo.Columns[i].Type == schema.TYPE_NUMBER {
				n, err := strconv.ParseInt(v, 10, 64)
				if err != nil {
					log.Errorf("parse row %v at %d error %v, skip", values, i, err)
					return dump.ErrSkip
				}
				vs[i] = n
			} else if tableInfo.Columns[i].Type == schema.TYPE_FLOAT {
				f, err := strconv.ParseFloat(v, 64)
				if err != nil {
					log.Errorf("parse row %v at %d error %v, skip", values, i, err)
					return dump.ErrSkip
				}
				vs[i] = f
			} else {
				log.Errorf("parse row %v at %d err: invalid type %v for value %v, skip", values, i, tableInfo.Columns[i].Type, v)
				return dump.ErrSkip
			}
		}
	}

	events := newRowsEvent(tableInfo, InsertAction, [][]interface{}{vs})
	return h.c.travelRowsEventHandler(events)
}
Пример #9
0
// Adds actions to be submitted in the next request. If adding these actions causes
// count to exceed MaxActions, auto-submits the current batch by calling Submit.
func (b *Bulker) Add(actions []elastic.BulkableRequest) error {

	for _, req := range actions {
		switch req.(type) {
		case *elastic.BulkDeleteRequest:
			b.Stats.DeleteCount++
		case *elastic.BulkIndexRequest:
			b.Stats.InsertCount++
		case *elastic.BulkUpdateRequest:
			b.Stats.UpdateCount++
		}
		b.Stats.Total++
		log.Debugf("Adding %s\n", req.String())
		b.bulker.Add(req)
	}

	if b.bulker.EstimatedSizeInBytes() >= b.MaxBytes {
		b.Submit()
	} else if b.bulker.NumberOfActions() >= b.MaxActions {
		b.Submit()
	}
	return b.LastError
}
Пример #10
0
func (c *Canal) handleQueryEvent(e *replication.BinlogEvent) error {
	ev := e.Event.(*replication.QueryEvent)
	query, err := replication.ParseQuery(string(ev.Query))
	log.Debugf("query parsed: %v, %v", query, err)
	if err == replication.ErrIgnored {
		return nil
	} else if err != nil {
		log.Infof("failed to parse: %v, %v", string(ev.Query), err)
		return nil
	} else {
		schema := string(ev.Schema)
		if query.Schema != "" {
			// Schema overridden in query
			schema = query.Schema
		}
		table, err := c.GetTable(schema, query.Table)
		if err == errTableIgnored {
			// ignore
			return nil
		} else if err != nil {
			return errors.Trace(err)
		}

		switch query.Operation {
		case replication.ADD:
			// Flush everything before changing schema
			c.flushEventHandlers()
			table.AddColumn(query.Column, query.Type, query.Extra)
			log.Infof("Adding new column %v %v to %v.%v", query.Column, query.Type, schema, query.Table)
			break
		case replication.MODIFY:
		case replication.DELETE:
		default:
		}
		return nil
	}
}
Пример #11
0
func (c *Canal) startSyncBinlog() error {
	pos := mysql.Position{c.master.Name, c.master.Position}
	log.Infof("Start sync'ing binlog from %v", pos)
	s, err := c.syncer.StartSync(pos)
	if err != nil {
		return errors.Errorf("Failed starting sync at %v: %v", pos, err)
	}

	originalTimeout := time.Second
	timeout := originalTimeout
	forceSavePos := false
	for {
		ev, err := s.GetEventTimeout(timeout)
		if err != nil && err != replication.ErrGetEventTimeout {
			return errors.Trace(err)
		} else if err == replication.ErrGetEventTimeout {
			if timeout == 2*originalTimeout {
				log.Debugf("Flushing event handlers since sync has gone idle")
				if err := c.flushEventHandlers(); err != nil {
					log.Warnf("Error occurred during flush: %v", err)
				}
			}
			timeout = 2 * timeout
			continue
		}

		timeout = time.Second

		//next binlog pos
		pos.Pos = ev.Header.LogPos

		forceSavePos = false

		log.Debugf("Syncing %v", ev)
		switch e := ev.Event.(type) {
		case *replication.RotateEvent:
			c.flushEventHandlers()
			pos.Name = string(e.NextLogName)
			pos.Pos = uint32(e.Position)
			// r.ev <- pos
			forceSavePos = true
			log.Infof("Rotate binlog to %v", pos)
		case *replication.RowsEvent:
			// we only focus row based event
			if err = c.handleRowsEvent(ev); err != nil {
				log.Errorf("Error handling rows event: %v", err)
				return errors.Trace(err)
			}
		case *replication.QueryEvent:
			if err = c.handleQueryEvent(ev); err != nil {
				log.Errorf("Error handling rows event: %v", err)
				return errors.Trace(err)
			}
		default:
			log.Debugf("Ignored event: %+v", e)
		}
		c.master.Update(pos.Name, pos.Pos)
		c.master.Save(forceSavePos)
	}

	return nil
}
Пример #12
0
// Parse the dump data with Dumper generate.
// It can not parse all the data formats with mysqldump outputs
func Parse(r io.Reader, h ParseHandler) error {
	rb := bufio.NewReaderSize(r, 1024*16)

	binlogExp := regexp.MustCompile("^CHANGE MASTER TO MASTER_LOG_FILE='(.+)', MASTER_LOG_POS=(\\d+);")
	useExp := regexp.MustCompile("^USE `(.+)`;")
	insertWithValuesExp := regexp.MustCompile("^INSERT INTO `(.+)` VALUES \\((.+)\\);")
	insertExp := regexp.MustCompile("INSERT INTO `(.+)` VALUES")
	valuesExp := regexp.MustCompile("^\\((.+)\\)[;,]")

	var db string
	var binlogParsed bool
	var currentInsertTable string

	for {
		line, err := rb.ReadString('\n')
		if err != nil && err != io.EOF {
			return errors.Trace(err)
		} else if err == io.EOF {
			break
		}

		if firstChar := line[0]; firstChar == '"' || firstChar == '\'' {
			// remove quotes
			line = line[0 : len(line)-1]
		}

		if !binlogParsed {
			if m := binlogExp.FindAllStringSubmatch(line, -1); len(m) == 1 {
				log.Debugf("Parse binlog: %s", line)
				name := m[0][1]
				pos, err := strconv.ParseUint(m[0][2], 10, 64)
				if err != nil {
					return errors.Errorf("parse binlog %v err, invalid number", line)
				}

				if err = h.BinLog(name, pos); err != nil && err != ErrSkip {
					return errors.Trace(err)
				}

				binlogParsed = true
			}
		}

		if m := useExp.FindStringSubmatch(line); len(m) == 2 {
			db = m[1]
		} else if m = insertWithValuesExp.FindStringSubmatch(line); len(m) == 3 {
			log.Debugf("Parse insert: %s", line)
			table := m[1]
			values, err := parseValues(m[2])
			if err != nil {
				return errors.Errorf("parse values %v err", line)
			}

			if err = h.Data(db, table, values); err != nil && err != ErrSkip {
				return errors.Trace(err)
			}
		} else if m = insertExp.FindStringSubmatch(line); len(m) == 2 {
			log.Debugf("Parse insert start: %s", line)
			currentInsertTable = m[1]
		} else if m = valuesExp.FindStringSubmatch(line); len(m) == 2 {
			log.Debugf("Parse insert value: %s", line)
			values, err := parseValues(m[1])
			if err != nil {
				return errors.Errorf("parse values %v err", line)
			}

			if err = h.Data(db, currentInsertTable, values); err != nil && err != ErrSkip {
				return errors.Trace(err)
			}
		}
	}
	return h.Complete()
}
Пример #13
0
func (h *testRowsEventHandler) Do(e *RowsEvent) error {
	log.Debugf("%s %v\n", e.Action, e.Rows)
	return nil
}
Пример #14
0
// see mysql sql/log_event.cc log_event_print_value
func (e *RowsEvent) decodeValue(data []byte, tp byte, meta uint16) (v interface{}, n int, err error) {
	var length int = 0
	log.Debugf("Decode: t:%d, m:%d, l:%d", tp, meta, len(data))
	if tp == MYSQL_TYPE_STRING {
		if meta >= 256 {
			b0 := uint8(meta >> 8)
			b1 := uint8(meta & 0xFF)

			if b0&0x30 != 0x30 {
				length = int(uint16(b1) | (uint16((b0&0x30)^0x30) << 4))
				tp = byte(b0 | 0x30)
			} else {
				length = int(meta & 0xFF)
				tp = b0
			}
		} else {
			length = int(meta)
		}
	}

	switch tp {
	case MYSQL_TYPE_NULL:
		return nil, 0, nil
	case MYSQL_TYPE_LONG:
		n = 4
		v = ParseBinaryInt32(data)
	case MYSQL_TYPE_TINY:
		n = 1
		v = ParseBinaryInt8(data)
	case MYSQL_TYPE_SHORT:
		n = 2
		v = ParseBinaryInt16(data)
	case MYSQL_TYPE_INT24:
		n = 3
		v = ParseBinaryInt24(data)
	case MYSQL_TYPE_LONGLONG:
		n = 8
		v = ParseBinaryInt64(data)
	case MYSQL_TYPE_NEWDECIMAL:
		prec := uint8(meta >> 8)
		scale := uint8(meta & 0xFF)
		v, n, err = decodeDecimal(data, int(prec), int(scale))
	case MYSQL_TYPE_FLOAT:
		n = 4
		v = ParseBinaryFloat32(data)
	case MYSQL_TYPE_DOUBLE:
		n = 8
		v = ParseBinaryFloat64(data)
	case MYSQL_TYPE_BIT:
		nbits := ((meta >> 8) * 8) + (meta & 0xFF)
		n = int(nbits+7) / 8

		//use int64 for bit
		v, err = decodeBit(data, int(nbits), int(n))
	case MYSQL_TYPE_TIMESTAMP:
		n = 4
		t := binary.LittleEndian.Uint32(data)
		v = time.Unix(int64(t), 0)
	case MYSQL_TYPE_TIMESTAMP2:
		v, n, err = decodeTimestamp2(data, meta)
	case MYSQL_TYPE_DATETIME:
		n = 8
		i64 := binary.LittleEndian.Uint64(data)
		d := i64 / 1000000
		t := i64 % 1000000
		v = time.Date(int(d/10000),
			time.Month((d%10000)/100),
			int(d%100),
			int(t/10000),
			int((t%10000)/100),
			int(t%100),
			0,
			time.UTC).Format(TimeFormat)
	case MYSQL_TYPE_DATETIME2:
		v, n, err = decodeDatetime2(data, meta)
	case MYSQL_TYPE_TIME:
		n = 3
		i32 := uint32(FixedLengthInt(data[0:3]))
		if i32 == 0 {
			v = "00:00:00"
		} else {
			sign := ""
			if i32 < 0 {
				sign = "-"
			}
			v = fmt.Sprintf("%s%02d:%02d:%02d", sign, i32/10000, (i32%10000)/100, i32%100)
		}
	case MYSQL_TYPE_TIME2:
		v, n, err = decodeTime2(data, meta)
	case MYSQL_TYPE_DATE:
		n = 3
		i32 := uint32(FixedLengthInt(data[0:3]))
		if i32 == 0 {
			v = "0000-00-00"
		} else {
			v = fmt.Sprintf("%04d-%02d-%02d", i32/(16*32), i32/32%16, i32%32)
		}

	case MYSQL_TYPE_YEAR:
		n = 1
		v = int(data[0]) + 1900
	case MYSQL_TYPE_ENUM:
		l := meta & 0xFF
		switch l {
		case 1:
			v = int64(data[0])
			n = 1
		case 2:
			v = int64(binary.BigEndian.Uint16(data))
			n = 2
		default:
			err = fmt.Errorf("Unknown ENUM packlen=%d", l)
		}
	case MYSQL_TYPE_SET:
		nbits := meta & 0xFF
		n = int(nbits+7) / 8

		v, err = decodeBit(data, int(nbits), n)
	case MYSQL_TYPE_BLOB, MYSQL_TYPE_GEOMETRY:
		// Warning: GEOMETRY seems to be stored using that same format as blob.
		// This was determined by reverse engineering rather than inpecting
		// the MySQL source. This is known to work for MYSQL >= 5.6
		// where meta == 4, but hasn't been tested on other versions.
		switch meta {
		case 1:
			length = int(data[0])
			v = data[1 : 1+length]
			n = length + 1
		case 2:
			length = int(binary.LittleEndian.Uint16(data))
			v = data[2 : 2+length]
			n = length + 2
		case 3:
			length = int(FixedLengthInt(data[0:3]))
			v = data[3 : 3+length]
			n = length + 3
		case 4:
			length = int(binary.LittleEndian.Uint32(data))
			v = data[4 : 4+length]
			n = length + 4
		default:
			err = fmt.Errorf("invalid blob packlen = %d", meta)
		}
	case MYSQL_TYPE_VARCHAR, MYSQL_TYPE_VAR_STRING:
		length = int(meta)
		v, n = decodeString(data, length)
	case MYSQL_TYPE_STRING:
		v, n = decodeString(data, length)
	default:
		err = fmt.Errorf("unsupport type %d in binlog and don't know how to handle", tp)
	}
	return
}
Пример #15
0
func (p *BinlogParser) parseEvent(h *EventHeader, data []byte) (Event, error) {
	var e Event
	log.Debugf("Parsing %v", h.EventType)
	if h.EventType == FORMAT_DESCRIPTION_EVENT {
		p.format = &FormatDescriptionEvent{}
		e = p.format
	} else {
		if p.format != nil && p.format.ChecksumAlgorithm == BINLOG_CHECKSUM_ALG_CRC32 {
			data = data[0 : len(data)-4]
		}

		if h.EventType == ROTATE_EVENT {
			e = &RotateEvent{}
		} else if !p.rawMode {
			switch h.EventType {
			case QUERY_EVENT:
				e = &QueryEvent{}
			case XID_EVENT:
				e = &XIDEvent{}
			case TABLE_MAP_EVENT:
				te := &TableMapEvent{}
				if p.format.EventTypeHeaderLengths[TABLE_MAP_EVENT-1] == 6 {
					te.tableIDSize = 4
				} else {
					te.tableIDSize = 6
				}
				e = te
			case WRITE_ROWS_EVENTv0,
				UPDATE_ROWS_EVENTv0,
				DELETE_ROWS_EVENTv0,
				WRITE_ROWS_EVENTv1,
				DELETE_ROWS_EVENTv1,
				UPDATE_ROWS_EVENTv1,
				WRITE_ROWS_EVENTv2,
				UPDATE_ROWS_EVENTv2,
				DELETE_ROWS_EVENTv2:
				e = p.newRowsEvent(h)
			case ROWS_QUERY_EVENT:
				e = &RowsQueryEvent{}
			case GTID_EVENT:
				e = &GTIDEvent{}
			case BEGIN_LOAD_QUERY_EVENT:
				e = &BeginLoadQueryEvent{}
			case EXECUTE_LOAD_QUERY_EVENT:
				e = &ExecuteLoadQueryEvent{}
			case MARIADB_ANNOTATE_ROWS_EVENT:
				e = &MariadbAnnotaeRowsEvent{}
			case MARIADB_BINLOG_CHECKPOINT_EVENT:
				e = &MariadbBinlogCheckPointEvent{}
			case MARIADB_GTID_LIST_EVENT:
				e = &MariadbGTIDListEvent{}
			case MARIADB_GTID_EVENT:
				ee := &MariadbGTIDEvent{}
				ee.GTID.ServerID = h.ServerID
				e = ee
			default:
				e = &GenericEvent{}
			}
		} else {
			e = &GenericEvent{}
		}
	}

	if err := e.Decode(data); err != nil {
		log.Errorf("Parser decoding %v failed %v", h.EventType, err)
		return nil, &EventError{h, err.Error(), data}
	}
	//e.Dump(os.Stdout)

	if te, ok := e.(*TableMapEvent); ok {
		p.tables[te.TableID] = te
	}

	// If MySQL restart, it may use the same table id for different tables.
	// We must clear the table map before parsing new events.
	// We have no better way to known whether the event is before or after restart,
	// So we have to clear the table map on every rotate event.
	if _, ok := e.(*RotateEvent); ok {
		p.tables = make(map[uint64]*TableMapEvent)
	}

	return e, nil
}