Beispiel #1
0
func (self *Deduplication) ReRead(provider string) {
	self.CleanRead()

	switch strings.ToLower(provider) {
	case status.MGO:
		docs, err := mgo.Mgo("find", map[string]interface{}{
			"Database":   config.MGO_OUTPUT.DefaultDB,
			"Collection": collection,
		})
		if err != nil {
			logs.Log.Error("去重读取mgo: %v", err)
			return
		}
		for _, v := range docs.(map[string]interface{})["Docs"].([]interface{}) {
			self.sampling[v.(bson.M)["_id"].(string)] = true
		}

	case status.FILE:
		fallthrough
	default:
		f, err := os.Open(dir + fileName)
		if err != nil {
			return
		}
		defer f.Close()
		b, _ := ioutil.ReadAll(f)
		json.Unmarshal(b, &self.sampling)
	}
	// fmt.Printf("%#v", self.sampling)
}
Beispiel #2
0
func (self *Deduplication) Write(provider string) {
	switch strings.ToLower(provider) {
	case status.MGO:
		var docs = make([]map[string]interface{}, len(self.sampling))
		var i int
		for key := range self.sampling {
			docs[i] = map[string]interface{}{"_id": key}
			i++
		}
		mgo.Mgo("insert", map[string]interface{}{
			"Database":   config.MGO_OUTPUT.DefaultDB,
			"Collection": collection,
			"Docs":       docs,
		})

	case status.FILE:
		fallthrough
	default:
		// 创建/打开目录
		d, err := os.Stat(dir)
		if err != nil || !d.IsDir() {
			if err := os.MkdirAll(dir, 0777); err != nil {
				logs.Log.Error("Error: %v\n", err)
			}
		}

		// 创建并写入文件
		f, _ := os.Create(dir + fileName)
		b, _ := json.Marshal(self.sampling)
		f.Write(b)
		f.Close()
	}
}
Beispiel #3
0
func (self *Deduplication) Write(provider string) {
	switch strings.ToLower(provider) {
	case status.MGO:
		var docs = make([]map[string]interface{}, len(self.sampling))
		var i int
		for key := range self.sampling {
			docs[i] = map[string]interface{}{"_id": key}
			i++
		}
		mgo.Mgo(nil, "insert", map[string]interface{}{
			"Database":   config.DEDUPLICATION.DB,
			"Collection": config.DEDUPLICATION.COLLECTION,
			"Docs":       docs,
		})

	case status.FILE:
		fallthrough
	default:
		p, _ := path.Split(config.DEDUPLICATION.FULL_FILE_NAME)
		// 创建/打开目录
		d, err := os.Stat(p)
		if err != nil || !d.IsDir() {
			if err := os.MkdirAll(p, 0777); err != nil {
				logs.Log.Error("Error: %v\n", err)
			}
		}

		// 创建并写入文件
		f, _ := os.Create(config.DEDUPLICATION.FULL_FILE_NAME)
		b, _ := json.Marshal(self.sampling)
		f.Write(b)
		f.Close()
	}
}
Beispiel #4
0
func (self *Deduplication) ReRead(provider string) {
	self.CleanRead()

	switch strings.ToLower(provider) {
	case status.MGO:
		var docs = map[string]interface{}{}
		err := mgo.Mgo(&docs, "find", map[string]interface{}{
			"Database":   config.DEDUPLICATION.DB,
			"Collection": config.DEDUPLICATION.COLLECTION,
		})
		if err != nil {
			logs.Log.Error("去重读取mgo: %v", err)
			return
		}
		for _, v := range docs["Docs"].([]interface{}) {
			self.sampling[v.(bson.M)["_id"].(string)] = true
		}

	case status.FILE:
		fallthrough
	default:
		f, err := os.Open(config.DEDUPLICATION.FULL_FILE_NAME)
		if err != nil {
			return
		}
		defer f.Close()
		b, _ := ioutil.ReadAll(f)
		json.Unmarshal(b, &self.sampling)
	}
	// fmt.Printf("%#v", self.sampling)
}
Beispiel #5
0
func (self *Success) flush(provider string) (sLen int) {
	self.RWMutex.Lock()
	defer self.RWMutex.Unlock()

	sLen = len(self.new)
	if sLen == 0 {
		return
	}

	switch provider {
	case "mgo":
		var docs = make([]map[string]interface{}, sLen)
		var i int
		for key := range self.new {
			docs[i] = map[string]interface{}{"_id": key}
			self.old[key] = true
			i++
		}
		mgo.Mgo(nil, "insert", map[string]interface{}{
			"Database":   MGO_DB,
			"Collection": SUCCESS_FILE,
			"Docs":       docs,
		})

	case "mysql":
		db, ok := mysql.MysqlPool.GetOne().(*mysql.MysqlSrc)
		if !ok || db == nil {
			logs.Log.Error("链接Mysql数据库超时,无法保存去重记录!")
			return 0
		}
		defer mysql.MysqlPool.Free(db)
		table := mysql.New(db.DB).
			SetTableName(SUCCESS_FILE).
			CustomPrimaryKey(`id VARCHAR(255) not null primary key`).
			Create()
		for key := range self.new {
			table.AddRow(key).Update()
			self.old[key] = true
		}

	default:
		once.Do(mkdir)
		f, _ := os.OpenFile(SUCCESS_FILE_FULL, os.O_CREATE|os.O_APPEND|os.O_RDWR, 0660)

		b, _ := json.Marshal(self.new)
		b[0] = ','
		f.Write(b[:len(b)-1])
		f.Close()

		for key := range self.new {
			self.old[key] = true
		}
	}
	self.new = make(map[string]bool)
	return
}
Beispiel #6
0
// 读取成功记录
func (self *History) ReadSuccess(provider string, inherit bool) {
	self.RWMutex.Lock()
	self.provider = provider
	self.RWMutex.Unlock()

	if !inherit {
		// 不继承历史记录时
		self.Success.old = make(map[string]bool)
		self.Success.new = make(map[string]bool)
		self.Success.inheritable = false
		return

	} else if self.Success.inheritable {
		// 本次与上次均继承历史记录时
		return

	} else {
		// 上次没有继承历史记录,但本次继承时
		self.Success.old = make(map[string]bool)
		self.Success.new = make(map[string]bool)
		self.Success.inheritable = true
	}

	switch provider {
	case "mgo":
		var docs = map[string]interface{}{}
		err := mgo.Mgo(&docs, "find", map[string]interface{}{
			"Database":   config.DB_NAME,
			"Collection": self.Success.tabName,
		})
		if err != nil {
			logs.Log.Error(" *     Fail  [读取成功记录][mgo]: %v\n", err)
			return
		}
		for _, v := range docs["Docs"].([]interface{}) {
			self.Success.old[v.(bson.M)["_id"].(string)] = true
		}

	case "mysql":
		_, err := mysql.DB()
		if err != nil {
			logs.Log.Error(" *     Fail  [读取成功记录][mysql]: %v\n", err)
			return
		}
		table, ok := getReadMysqlTable(self.Success.tabName)
		if !ok {
			table = mysql.New().SetTableName(self.Success.tabName)
			setReadMysqlTable(self.Success.tabName, table)
		}
		rows, err := table.SelectAll()
		if err != nil {
			return
		}

		for rows.Next() {
			var id string
			err = rows.Scan(&id)
			self.Success.old[id] = true
		}

	default:
		f, err := os.Open(self.Success.fileName)
		if err != nil {
			return
		}
		defer f.Close()
		b, _ := ioutil.ReadAll(f)
		if len(b) == 0 {
			return
		}
		b[0] = '{'
		json.Unmarshal(append(b, '}'), &self.Success.old)
	}
	logs.Log.Informational(" *     [读取成功记录]: %v 条\n", len(self.Success.old))
}
Beispiel #7
0
// 读取成功记录
func (self *History) ReadSuccess(provider string, inherit bool) {
	self.RWMutex.Lock()
	self.provider = provider
	self.RWMutex.Unlock()

	if !inherit {
		// 不继承历史记录时
		self.Success.old = make(map[string]bool)
		self.Success.new = make(map[string]bool)
		self.Success.inheritable = false
		return

	} else if self.Success.inheritable {
		// 本次与上次均继承历史记录时
		return

	} else {
		// 上次没有继承历史记录,但本次继承时
		self.Success.old = make(map[string]bool)
		self.Success.new = make(map[string]bool)
		self.Success.inheritable = true
	}

	switch provider {
	case "mgo":
		var docs = map[string]interface{}{}
		err := mgo.Mgo(&docs, "find", map[string]interface{}{
			"Database":   MGO_DB,
			"Collection": SUCCESS_FILE,
		})
		if err != nil {
			logs.Log.Error("从mgo读取成功记录: %v", err)
			return
		}
		for _, v := range docs["Docs"].([]interface{}) {
			self.Success.old[v.(bson.M)["_id"].(string)] = true
		}

	case "mysql":
		db, ok := mysql.MysqlPool.GetOne().(*mysql.MysqlSrc)
		if !ok || db == nil {
			// logs.Log.Error("链接Mysql数据库超时,无法读取成功记录!")
			return
		}
		defer mysql.MysqlPool.Free(db)
		rows, err := mysql.New(db.DB).
			SetTableName("`" + SUCCESS_FILE + "`").
			SelectAll()
		if err != nil {
			return
		}

		for rows.Next() {
			var id string
			err = rows.Scan(&id)
			self.Success.old[id] = true
		}

	default:
		f, err := os.Open(SUCCESS_FILE_FULL)
		if err != nil {
			return
		}
		defer f.Close()
		b, _ := ioutil.ReadAll(f)
		b[0] = '{'
		json.Unmarshal(append(b, '}'), &self.Success.old)
	}
	logs.Log.Informational(" *     读出 %v 条成功记录\n", len(self.Success.old))
}
Beispiel #8
0
func (self *Success) flush(provider string) (sLen int, err error) {
	self.RWMutex.Lock()
	defer self.RWMutex.Unlock()

	sLen = len(self.new)
	if sLen == 0 {
		return
	}

	switch provider {
	case "mgo":
		if mgo.Error() != nil {
			err = fmt.Errorf(" *     Fail  [添加成功记录][mgo]: %v 条 [ERROR]  %v\n", sLen, mgo.Error())
			return
		}
		var docs = make([]map[string]interface{}, sLen)
		var i int
		for key := range self.new {
			docs[i] = map[string]interface{}{"_id": key}
			self.old[key] = true
			i++
		}
		err := mgo.Mgo(nil, "insert", map[string]interface{}{
			"Database":   config.DB_NAME,
			"Collection": self.tabName,
			"Docs":       docs,
		})
		if err != nil {
			err = fmt.Errorf(" *     Fail  [添加成功记录][mgo]: %v 条 [ERROR]  %v\n", sLen, err)
		}

	case "mysql":
		_, err := mysql.DB()
		if err != nil {
			return sLen, fmt.Errorf(" *     Fail  [添加成功记录][mysql]: %v 条 [ERROR]  %v\n", sLen, err)
		}
		table, ok := getWriteMysqlTable(self.tabName)
		if !ok {
			table = mysql.New()
			table.SetTableName(self.tabName).CustomPrimaryKey(`id VARCHAR(255) not null primary key`)
			err = table.Create()
			if err != nil {
				return sLen, fmt.Errorf(" *     Fail  [添加成功记录][mysql]: %v 条 [ERROR]  %v\n", sLen, err)
			}
			setWriteMysqlTable(self.tabName, table)
		}
		for key := range self.new {
			table.AutoInsert([]string{key})
			self.old[key] = true
		}
		err = table.FlushInsert()
		if err != nil {
			return sLen, fmt.Errorf(" *     Fail  [添加成功记录][mysql]: %v 条 [ERROR]  %v\n", sLen, err)
		}

	default:
		f, _ := os.OpenFile(self.fileName, os.O_CREATE|os.O_APPEND|os.O_RDWR, 0777)

		b, _ := json.Marshal(self.new)
		b[0] = ','
		f.Write(b[:len(b)-1])
		f.Close()

		for key := range self.new {
			self.old[key] = true
		}
	}
	self.new = make(map[string]bool)
	return
}
Beispiel #9
0
func (self *Deduplication) Submit(provider string) {
	self.Mutex.Lock()
	defer self.Unlock()

	self.provider = provider

	if len(self.sampling.new) == 0 {
		return
	}

	switch self.provider {
	case "mgo":
		var docs = make([]map[string]interface{}, len(self.sampling.new))
		var i int
		for key := range self.sampling.new {
			docs[i] = map[string]interface{}{"_id": key}
			self.sampling.old[key] = true
			i++
		}
		mgo.Mgo(nil, "insert", map[string]interface{}{
			"Database":   config.MGO.DB,
			"Collection": config.DEDUPLICATION.FILE_NAME,
			"Docs":       docs,
		})

	case "mysql":
		db, ok := mysql.MysqlPool.GetOne().(*mysql.MysqlSrc)
		if !ok || db == nil {
			logs.Log.Error("链接Mysql数据库超时,无法保存去重记录!")
			return
		}
		defer mysql.MysqlPool.Free(db)
		table := mysql.New(db.DB).
			SetTableName(config.DEDUPLICATION.FILE_NAME).
			CustomPrimaryKey(`id VARCHAR(255) not null primary key`).
			Create()
		for key := range self.sampling.new {
			table.AddRow(key).Update()
			self.sampling.old[key] = true
		}

	default:
		p, _ := path.Split(config.COMM_PATH.CACHE + "/" + config.DEDUPLICATION.FILE_NAME)
		// 创建/打开目录
		d, err := os.Stat(p)
		if err != nil || !d.IsDir() {
			if err := os.MkdirAll(p, 0777); err != nil {
				logs.Log.Error("Error: %v\n", err)
			}
		}

		f, _ := os.OpenFile(config.COMM_PATH.CACHE+"/"+config.DEDUPLICATION.FILE_NAME, os.O_CREATE|os.O_APPEND|os.O_RDWR, 0660)

		b, _ := json.Marshal(self.sampling.new)
		b[0] = ','
		f.Write(b[:len(b)-1])
		f.Close()

		for key := range self.sampling.new {
			self.sampling.old[key] = true
		}
	}
	logs.Log.Informational(" *     新增 %v 条去重样本\n", len(self.sampling.new))
	self.sampling.new = make(map[string]bool)
}
Beispiel #10
0
func (self *Deduplication) Update(provider string, inherit bool) {
	self.Mutex.Lock()
	defer self.Unlock()

	self.provider = provider

	if !inherit {
		// 不继承历史记录时
		self.sampling.old = make(map[string]bool)
		self.sampling.new = make(map[string]bool)
		self.lastIsInherit = false
		return

	} else if self.lastIsInherit {
		// 本次与上次均继承历史记录时
		return

	} else {
		// 上次没有继承历史记录,但本次继承时
		self.sampling.old = make(map[string]bool)
		self.sampling.new = make(map[string]bool)
		self.lastIsInherit = true
	}

	switch self.provider {
	case "mgo":
		var docs = map[string]interface{}{}
		err := mgo.Mgo(&docs, "find", map[string]interface{}{
			"Database":   config.MGO.DB,
			"Collection": config.DEDUPLICATION.FILE_NAME,
		})
		if err != nil {
			logs.Log.Error("去重读取mgo: %v", err)
			return
		}
		for _, v := range docs["Docs"].([]interface{}) {
			self.sampling.old[v.(bson.M)["_id"].(string)] = true
		}

	case "mysql":
		db, ok := mysql.MysqlPool.GetOne().(*mysql.MysqlSrc)
		if !ok || db == nil {
			logs.Log.Error("链接Mysql数据库超时,无法读取去重记录!")
			return
		}
		defer mysql.MysqlPool.Free(db)
		rows, err := mysql.New(db.DB).
			SetTableName("`" + config.DEDUPLICATION.FILE_NAME + "`").
			SelectAll()
		if err != nil {
			// logs.Log.Error("读取Mysql数据库中去重记录失败:%v", err)
			return
		}

		for rows.Next() {
			var id string
			err = rows.Scan(&id)
			self.sampling.old[id] = true
		}

	default:
		f, err := os.Open(config.COMM_PATH.CACHE + "/" + config.DEDUPLICATION.FILE_NAME)
		if err != nil {
			return
		}
		defer f.Close()
		b, _ := ioutil.ReadAll(f)
		b[0] = '{'
		json.Unmarshal(
			append(b, '}'),
			&self.sampling.old,
		)
	}
	logs.Log.Informational(" *     读出 %v 条去重样本\n", len(self.sampling.old))
}