Пример #1
0
//向sqlCode添加"插入1行数据"的语句,执行前须保证Create()、AddRow()已经执行
//insert into table1(field1,field2) values(rowValues[0],rowValues[1])
func (self *MyTable) Update(db *sql.DB) {
	if self.tableName != "" {
		self.sqlCode = `insert into ` + self.tableName + `(`
		if self.columnNames != nil {
			for _, v1 := range self.columnNames {
				self.sqlCode += "`" + v1 + "`" + `,`
			}
			self.sqlCode = string(self.sqlCode[:len(self.sqlCode)-1])
			self.sqlCode += `)values(`
		}
		if self.rowValues != nil {
			for _, v2 := range self.rowValues {
				v2 = strings.Replace(v2, `"`, `\"`, -1)
				self.sqlCode += `"` + v2 + `"` + `,`
			}
			self.sqlCode = string(self.sqlCode[:len(self.sqlCode)-1])
			self.sqlCode += `);`
		}
	}

	stmt, err := db.Prepare(self.sqlCode)
	util.CheckErr(err)

	_, err = stmt.Exec()
	util.CheckErr(err)

	// 清空临时数据
	self.rowValues = []string{}
}
Пример #2
0
//生成"创建表单"的语句,执行前须保证SetTableName()、AddColumn()已经执行
func (self *MyTable) Create(db *sql.DB) {
	if self.tableName != "" {
		self.sqlCode = `create table if not exists ` + self.tableName + `(`
		self.sqlCode += ` id int(8) not null primary key auto_increment`

		if self.columnNames != nil {
			for _, rowValues := range self.columnNames {
				self.sqlCode += `,` + rowValues + ` varchar(255) not null`
			}
		}
		self.sqlCode += `);`
	}
	stmt, err := db.Prepare(self.sqlCode)
	util.CheckErr(err)

	_, err = stmt.Exec()
	util.CheckErr(err)
}
Пример #3
0
//生成"创建表单"的语句,执行前须保证SetTableName()、AddColumn()已经执行
func (self *MyTable) Create() *MyTable {
	if len(self.columnNames) == 0 {
		return self
	}
	self.sqlCode = `create table if not exists ` + self.tableName + `(`
	if !self.customPrimaryKey {
		self.sqlCode += `id int(12) not null primary key auto_increment,`
	}
	for _, rowValues := range self.columnNames {
		self.sqlCode += rowValues[0] + ` ` + rowValues[1] + `,`
	}
	self.sqlCode = string(self.sqlCode[:len(self.sqlCode)-1])
	self.sqlCode += `);`
	stmt, err := self.DB.Prepare(self.sqlCode)
	util.CheckErr(err)

	_, err = stmt.Exec()
	util.CheckErr(err)
	return self
}
Пример #4
0
//智能插入数据,每次1行
func (self *MyTable) AutoInsert(value []string) *MyTable {
	var nsize int
	for _, v := range value {
		nsize += len(v)
	}
	if nsize > max_allowed_packet {
		logs.Log.Error("%v", "packet for query is too large. Try adjusting the 'maxallowedpacket'variable in the 'config.ini'")
		return self
	}
	self.size += nsize
	if self.size > max_allowed_packet {
		util.CheckErr(self.FlushInsert())
		return self.AutoInsert(value)
	}
	return self.addRow(value)
}
Пример #5
0
func init() {
	Output["mysql"] = func(self *Collector, dataIndex int) {
		db, ok := mysql.MysqlPool.GetOne().(*mysql.MysqlSrc)
		if !ok || db == nil {
			logs.Log.Error("链接Mysql数据库超时,无法输出!")
			return
		}
		defer mysql.MysqlPool.Free(db)

		var mysqls = make(map[string]*mysql.MyTable)
		var namespace = util.FileNameReplace(self.namespace())

		for _, datacell := range self.DockerQueue.Dockers[dataIndex] {
			subNamespace := util.FileNameReplace(self.subNamespace(datacell))
			var tName = namespace
			if subNamespace != "" {
				tName += "__" + subNamespace
			}
			if _, ok := mysqls[subNamespace]; !ok {
				mysqls[subNamespace] = mysql.New(db.DB)
				mysqls[subNamespace].SetTableName(tName)
				for _, title := range self.MustGetRule(datacell["RuleName"].(string)).ItemFields {
					mysqls[subNamespace].AddColumn(title + ` MEDIUMTEXT`)
				}

				mysqls[subNamespace].
					AddColumn(`Url VARCHAR(255)`, `ParentUrl VARCHAR(255)`, `DownloadTime VARCHAR(50)`).
					Create()
			}

			for _, title := range self.MustGetRule(datacell["RuleName"].(string)).ItemFields {
				vd := datacell["Data"].(map[string]interface{})
				if v, ok := vd[title].(string); ok || vd[title] == nil {
					mysqls[subNamespace].AddRow(v)
				} else {
					mysqls[subNamespace].AddRow(util.JsonString(vd[title]))
				}
			}

			err := mysqls[subNamespace].
				AddRow(datacell["Url"].(string), datacell["ParentUrl"].(string), datacell["DownloadTime"].(string)).
				Update()
			util.CheckErr(err)
		}
	}
}
Пример #6
0
func init() {
	var (
		mysqlTable     = map[string]*mysql.MyTable{}
		mysqlTableLock sync.RWMutex
	)

	var getMysqlTable = func(name string) (*mysql.MyTable, bool) {
		mysqlTableLock.RLock()
		defer mysqlTableLock.RUnlock()
		tab, ok := mysqlTable[name]
		if ok {
			return tab.Clone(), true
		}
		return nil, false
	}

	var setMysqlTable = func(name string, tab *mysql.MyTable) {
		mysqlTableLock.Lock()
		mysqlTable[name] = tab
		mysqlTableLock.Unlock()
	}

	DataOutput["mysql"] = func(self *Collector) error {
		_, err := mysql.DB()
		if err != nil {
			return fmt.Errorf("Mysql数据库链接失败: %v", err)
		}
		var (
			mysqls    = make(map[string]*mysql.MyTable)
			namespace = util.FileNameReplace(self.namespace())
		)
		for _, datacell := range self.dataDocker {
			subNamespace := util.FileNameReplace(self.subNamespace(datacell))
			tName := joinNamespaces(namespace, subNamespace)
			table, ok := mysqls[tName]
			if !ok {
				table, ok = getMysqlTable(tName)
				if ok {
					mysqls[tName] = table
				} else {
					table = mysql.New()
					table.SetTableName(tName)
					for _, title := range self.MustGetRule(datacell["RuleName"].(string)).ItemFields {
						table.AddColumn(title + ` MEDIUMTEXT`)
					}
					if self.Spider.OutDefaultField() {
						table.AddColumn(`Url VARCHAR(255)`, `ParentUrl VARCHAR(255)`, `DownloadTime VARCHAR(50)`)
					}
					if err := table.Create(); err != nil {
						logs.Log.Error("%v", err)
						continue
					} else {
						setMysqlTable(tName, table)
						mysqls[tName] = table
					}
				}
			}
			data := []string{}
			for _, title := range self.MustGetRule(datacell["RuleName"].(string)).ItemFields {
				vd := datacell["Data"].(map[string]interface{})
				if v, ok := vd[title].(string); ok || vd[title] == nil {
					data = append(data, v)
				} else {
					data = append(data, util.JsonString(vd[title]))
				}
			}
			if self.Spider.OutDefaultField() {
				data = append(data, datacell["Url"].(string), datacell["ParentUrl"].(string), datacell["DownloadTime"].(string))
			}
			table.AutoInsert(data)
		}
		for _, tab := range mysqls {
			util.CheckErr(tab.FlushInsert())
		}
		mysqls = nil
		return nil
	}
}
Пример #7
0
/************************ Kafka 输出 ***************************/
func init() {
	var (
		kafkaSenders    = map[string]*kafka.KafkaSender{}
		kafkaSenderLock sync.RWMutex
	)

	var getKafkaSender = func(name string) (*kafka.KafkaSender, bool) {
		kafkaSenderLock.RLock()
		tab, ok := kafkaSenders[name]
		kafkaSenderLock.RUnlock()
		return tab, ok
	}

	var setKafkaSender = func(name string, tab *kafka.KafkaSender) {
		kafkaSenderLock.Lock()
		kafkaSenders[name] = tab
		kafkaSenderLock.Unlock()
	}

	DataOutput["kafka"] = func(self *Collector) error {
		_, err := kafka.GetProducer()
		if err != nil {
			return fmt.Errorf("kafka producer失败: %v", err)
		}
		var (
			kafkas    = make(map[string]*kafka.KafkaSender)
			namespace = util.FileNameReplace(self.namespace())
		)
		for _, datacell := range self.dataDocker {
			subNamespace := util.FileNameReplace(self.subNamespace(datacell))
			topicName := joinNamespaces(namespace, subNamespace)
			sender, ok := kafkas[topicName]
			if !ok {
				sender, ok = getKafkaSender(topicName)
				if ok {
					kafkas[topicName] = sender
				} else {
					sender = kafka.New()
					sender.SetTopic(topicName)
					setKafkaSender(topicName, sender)
					kafkas[topicName] = sender
				}
			}
			data := make(map[string]interface{})
			for _, title := range self.MustGetRule(datacell["RuleName"].(string)).ItemFields {
				vd := datacell["Data"].(map[string]interface{})
				if v, ok := vd[title].(string); ok || vd[title] == nil {
					data[title] = v
				} else {
					data[title] = util.JsonString(vd[title])
				}
			}
			if self.Spider.OutDefaultField() {
				data["url"] = datacell["Url"].(string)
				data["parent_url"] = datacell["ParentUrl"].(string)
				data["download_time"] = datacell["DownloadTime"].(string)
			}
			err := sender.Push(data)
			util.CheckErr(err)
		}
		kafkas = nil
		return nil
	}
}