func (self *Deduplication) ReRead(provider string) { self.CleanRead() switch strings.ToLower(provider) { case status.MGO: docs, err := mgo.Mgo("find", map[string]interface{}{ "Database": config.MGO_OUTPUT.DefaultDB, "Collection": collection, }) if err != nil { logs.Log.Error("去重读取mgo: %v", err) return } for _, v := range docs.(map[string]interface{})["Docs"].([]interface{}) { self.sampling[v.(bson.M)["_id"].(string)] = true } case status.FILE: fallthrough default: f, err := os.Open(dir + fileName) if err != nil { return } defer f.Close() b, _ := ioutil.ReadAll(f) json.Unmarshal(b, &self.sampling) } // fmt.Printf("%#v", self.sampling) }
func (self *Deduplication) Write(provider string) { switch strings.ToLower(provider) { case status.MGO: var docs = make([]map[string]interface{}, len(self.sampling)) var i int for key := range self.sampling { docs[i] = map[string]interface{}{"_id": key} i++ } mgo.Mgo("insert", map[string]interface{}{ "Database": config.MGO_OUTPUT.DefaultDB, "Collection": collection, "Docs": docs, }) case status.FILE: fallthrough default: // 创建/打开目录 d, err := os.Stat(dir) if err != nil || !d.IsDir() { if err := os.MkdirAll(dir, 0777); err != nil { logs.Log.Error("Error: %v\n", err) } } // 创建并写入文件 f, _ := os.Create(dir + fileName) b, _ := json.Marshal(self.sampling) f.Write(b) f.Close() } }
func (self *Deduplication) Write(provider string) { switch strings.ToLower(provider) { case status.MGO: var docs = make([]map[string]interface{}, len(self.sampling)) var i int for key := range self.sampling { docs[i] = map[string]interface{}{"_id": key} i++ } mgo.Mgo(nil, "insert", map[string]interface{}{ "Database": config.DEDUPLICATION.DB, "Collection": config.DEDUPLICATION.COLLECTION, "Docs": docs, }) case status.FILE: fallthrough default: p, _ := path.Split(config.DEDUPLICATION.FULL_FILE_NAME) // 创建/打开目录 d, err := os.Stat(p) if err != nil || !d.IsDir() { if err := os.MkdirAll(p, 0777); err != nil { logs.Log.Error("Error: %v\n", err) } } // 创建并写入文件 f, _ := os.Create(config.DEDUPLICATION.FULL_FILE_NAME) b, _ := json.Marshal(self.sampling) f.Write(b) f.Close() } }
func (self *Deduplication) ReRead(provider string) { self.CleanRead() switch strings.ToLower(provider) { case status.MGO: var docs = map[string]interface{}{} err := mgo.Mgo(&docs, "find", map[string]interface{}{ "Database": config.DEDUPLICATION.DB, "Collection": config.DEDUPLICATION.COLLECTION, }) if err != nil { logs.Log.Error("去重读取mgo: %v", err) return } for _, v := range docs["Docs"].([]interface{}) { self.sampling[v.(bson.M)["_id"].(string)] = true } case status.FILE: fallthrough default: f, err := os.Open(config.DEDUPLICATION.FULL_FILE_NAME) if err != nil { return } defer f.Close() b, _ := ioutil.ReadAll(f) json.Unmarshal(b, &self.sampling) } // fmt.Printf("%#v", self.sampling) }
func (self *Success) flush(provider string) (sLen int) { self.RWMutex.Lock() defer self.RWMutex.Unlock() sLen = len(self.new) if sLen == 0 { return } switch provider { case "mgo": var docs = make([]map[string]interface{}, sLen) var i int for key := range self.new { docs[i] = map[string]interface{}{"_id": key} self.old[key] = true i++ } mgo.Mgo(nil, "insert", map[string]interface{}{ "Database": MGO_DB, "Collection": SUCCESS_FILE, "Docs": docs, }) case "mysql": db, ok := mysql.MysqlPool.GetOne().(*mysql.MysqlSrc) if !ok || db == nil { logs.Log.Error("链接Mysql数据库超时,无法保存去重记录!") return 0 } defer mysql.MysqlPool.Free(db) table := mysql.New(db.DB). SetTableName(SUCCESS_FILE). CustomPrimaryKey(`id VARCHAR(255) not null primary key`). Create() for key := range self.new { table.AddRow(key).Update() self.old[key] = true } default: once.Do(mkdir) f, _ := os.OpenFile(SUCCESS_FILE_FULL, os.O_CREATE|os.O_APPEND|os.O_RDWR, 0660) b, _ := json.Marshal(self.new) b[0] = ',' f.Write(b[:len(b)-1]) f.Close() for key := range self.new { self.old[key] = true } } self.new = make(map[string]bool) return }
// 读取成功记录 func (self *History) ReadSuccess(provider string, inherit bool) { self.RWMutex.Lock() self.provider = provider self.RWMutex.Unlock() if !inherit { // 不继承历史记录时 self.Success.old = make(map[string]bool) self.Success.new = make(map[string]bool) self.Success.inheritable = false return } else if self.Success.inheritable { // 本次与上次均继承历史记录时 return } else { // 上次没有继承历史记录,但本次继承时 self.Success.old = make(map[string]bool) self.Success.new = make(map[string]bool) self.Success.inheritable = true } switch provider { case "mgo": var docs = map[string]interface{}{} err := mgo.Mgo(&docs, "find", map[string]interface{}{ "Database": config.DB_NAME, "Collection": self.Success.tabName, }) if err != nil { logs.Log.Error(" * Fail [读取成功记录][mgo]: %v\n", err) return } for _, v := range docs["Docs"].([]interface{}) { self.Success.old[v.(bson.M)["_id"].(string)] = true } case "mysql": _, err := mysql.DB() if err != nil { logs.Log.Error(" * Fail [读取成功记录][mysql]: %v\n", err) return } table, ok := getReadMysqlTable(self.Success.tabName) if !ok { table = mysql.New().SetTableName(self.Success.tabName) setReadMysqlTable(self.Success.tabName, table) } rows, err := table.SelectAll() if err != nil { return } for rows.Next() { var id string err = rows.Scan(&id) self.Success.old[id] = true } default: f, err := os.Open(self.Success.fileName) if err != nil { return } defer f.Close() b, _ := ioutil.ReadAll(f) if len(b) == 0 { return } b[0] = '{' json.Unmarshal(append(b, '}'), &self.Success.old) } logs.Log.Informational(" * [读取成功记录]: %v 条\n", len(self.Success.old)) }
// 读取成功记录 func (self *History) ReadSuccess(provider string, inherit bool) { self.RWMutex.Lock() self.provider = provider self.RWMutex.Unlock() if !inherit { // 不继承历史记录时 self.Success.old = make(map[string]bool) self.Success.new = make(map[string]bool) self.Success.inheritable = false return } else if self.Success.inheritable { // 本次与上次均继承历史记录时 return } else { // 上次没有继承历史记录,但本次继承时 self.Success.old = make(map[string]bool) self.Success.new = make(map[string]bool) self.Success.inheritable = true } switch provider { case "mgo": var docs = map[string]interface{}{} err := mgo.Mgo(&docs, "find", map[string]interface{}{ "Database": MGO_DB, "Collection": SUCCESS_FILE, }) if err != nil { logs.Log.Error("从mgo读取成功记录: %v", err) return } for _, v := range docs["Docs"].([]interface{}) { self.Success.old[v.(bson.M)["_id"].(string)] = true } case "mysql": db, ok := mysql.MysqlPool.GetOne().(*mysql.MysqlSrc) if !ok || db == nil { // logs.Log.Error("链接Mysql数据库超时,无法读取成功记录!") return } defer mysql.MysqlPool.Free(db) rows, err := mysql.New(db.DB). SetTableName("`" + SUCCESS_FILE + "`"). SelectAll() if err != nil { return } for rows.Next() { var id string err = rows.Scan(&id) self.Success.old[id] = true } default: f, err := os.Open(SUCCESS_FILE_FULL) if err != nil { return } defer f.Close() b, _ := ioutil.ReadAll(f) b[0] = '{' json.Unmarshal(append(b, '}'), &self.Success.old) } logs.Log.Informational(" * 读出 %v 条成功记录\n", len(self.Success.old)) }
func (self *Success) flush(provider string) (sLen int, err error) { self.RWMutex.Lock() defer self.RWMutex.Unlock() sLen = len(self.new) if sLen == 0 { return } switch provider { case "mgo": if mgo.Error() != nil { err = fmt.Errorf(" * Fail [添加成功记录][mgo]: %v 条 [ERROR] %v\n", sLen, mgo.Error()) return } var docs = make([]map[string]interface{}, sLen) var i int for key := range self.new { docs[i] = map[string]interface{}{"_id": key} self.old[key] = true i++ } err := mgo.Mgo(nil, "insert", map[string]interface{}{ "Database": config.DB_NAME, "Collection": self.tabName, "Docs": docs, }) if err != nil { err = fmt.Errorf(" * Fail [添加成功记录][mgo]: %v 条 [ERROR] %v\n", sLen, err) } case "mysql": _, err := mysql.DB() if err != nil { return sLen, fmt.Errorf(" * Fail [添加成功记录][mysql]: %v 条 [ERROR] %v\n", sLen, err) } table, ok := getWriteMysqlTable(self.tabName) if !ok { table = mysql.New() table.SetTableName(self.tabName).CustomPrimaryKey(`id VARCHAR(255) not null primary key`) err = table.Create() if err != nil { return sLen, fmt.Errorf(" * Fail [添加成功记录][mysql]: %v 条 [ERROR] %v\n", sLen, err) } setWriteMysqlTable(self.tabName, table) } for key := range self.new { table.AutoInsert([]string{key}) self.old[key] = true } err = table.FlushInsert() if err != nil { return sLen, fmt.Errorf(" * Fail [添加成功记录][mysql]: %v 条 [ERROR] %v\n", sLen, err) } default: f, _ := os.OpenFile(self.fileName, os.O_CREATE|os.O_APPEND|os.O_RDWR, 0777) b, _ := json.Marshal(self.new) b[0] = ',' f.Write(b[:len(b)-1]) f.Close() for key := range self.new { self.old[key] = true } } self.new = make(map[string]bool) return }
func (self *Deduplication) Submit(provider string) { self.Mutex.Lock() defer self.Unlock() self.provider = provider if len(self.sampling.new) == 0 { return } switch self.provider { case "mgo": var docs = make([]map[string]interface{}, len(self.sampling.new)) var i int for key := range self.sampling.new { docs[i] = map[string]interface{}{"_id": key} self.sampling.old[key] = true i++ } mgo.Mgo(nil, "insert", map[string]interface{}{ "Database": config.MGO.DB, "Collection": config.DEDUPLICATION.FILE_NAME, "Docs": docs, }) case "mysql": db, ok := mysql.MysqlPool.GetOne().(*mysql.MysqlSrc) if !ok || db == nil { logs.Log.Error("链接Mysql数据库超时,无法保存去重记录!") return } defer mysql.MysqlPool.Free(db) table := mysql.New(db.DB). SetTableName(config.DEDUPLICATION.FILE_NAME). CustomPrimaryKey(`id VARCHAR(255) not null primary key`). Create() for key := range self.sampling.new { table.AddRow(key).Update() self.sampling.old[key] = true } default: p, _ := path.Split(config.COMM_PATH.CACHE + "/" + config.DEDUPLICATION.FILE_NAME) // 创建/打开目录 d, err := os.Stat(p) if err != nil || !d.IsDir() { if err := os.MkdirAll(p, 0777); err != nil { logs.Log.Error("Error: %v\n", err) } } f, _ := os.OpenFile(config.COMM_PATH.CACHE+"/"+config.DEDUPLICATION.FILE_NAME, os.O_CREATE|os.O_APPEND|os.O_RDWR, 0660) b, _ := json.Marshal(self.sampling.new) b[0] = ',' f.Write(b[:len(b)-1]) f.Close() for key := range self.sampling.new { self.sampling.old[key] = true } } logs.Log.Informational(" * 新增 %v 条去重样本\n", len(self.sampling.new)) self.sampling.new = make(map[string]bool) }
func (self *Deduplication) Update(provider string, inherit bool) { self.Mutex.Lock() defer self.Unlock() self.provider = provider if !inherit { // 不继承历史记录时 self.sampling.old = make(map[string]bool) self.sampling.new = make(map[string]bool) self.lastIsInherit = false return } else if self.lastIsInherit { // 本次与上次均继承历史记录时 return } else { // 上次没有继承历史记录,但本次继承时 self.sampling.old = make(map[string]bool) self.sampling.new = make(map[string]bool) self.lastIsInherit = true } switch self.provider { case "mgo": var docs = map[string]interface{}{} err := mgo.Mgo(&docs, "find", map[string]interface{}{ "Database": config.MGO.DB, "Collection": config.DEDUPLICATION.FILE_NAME, }) if err != nil { logs.Log.Error("去重读取mgo: %v", err) return } for _, v := range docs["Docs"].([]interface{}) { self.sampling.old[v.(bson.M)["_id"].(string)] = true } case "mysql": db, ok := mysql.MysqlPool.GetOne().(*mysql.MysqlSrc) if !ok || db == nil { logs.Log.Error("链接Mysql数据库超时,无法读取去重记录!") return } defer mysql.MysqlPool.Free(db) rows, err := mysql.New(db.DB). SetTableName("`" + config.DEDUPLICATION.FILE_NAME + "`"). SelectAll() if err != nil { // logs.Log.Error("读取Mysql数据库中去重记录失败:%v", err) return } for rows.Next() { var id string err = rows.Scan(&id) self.sampling.old[id] = true } default: f, err := os.Open(config.COMM_PATH.CACHE + "/" + config.DEDUPLICATION.FILE_NAME) if err != nil { return } defer f.Close() b, _ := ioutil.ReadAll(f) b[0] = '{' json.Unmarshal( append(b, '}'), &self.sampling.old, ) } logs.Log.Informational(" * 读出 %v 条去重样本\n", len(self.sampling.old)) }