func (idx *WOFIndex) IndexMetaFile(csv_file string) error { reader, reader_err := csv.NewDictReaderFromPath(csv_file) if reader_err != nil { idx.Logger.Error("failed to create CSV reader , because %s", reader_err) return reader_err } // It is tempting to think that we could fan this out and process each row/file // concurrently but that will make the RTree sad... (20151020/thisisaaronland) for { row, err := reader.Read() if err == io.EOF { break } if err != nil { idx.Logger.Error("failed to parse CSV row , because %s", err) return err } rel_path, ok := row["path"] if ok != true { idx.Logger.Warning("CSV row is missing a 'path' column") continue } abs_path := path.Join(idx.Source, rel_path) _, err = os.Stat(abs_path) if os.IsNotExist(err) { idx.Logger.Error("'%s' does not exist", abs_path) continue } index_err := idx.IndexGeoJSONFile(abs_path) if index_err != nil { idx.Logger.Error("failed to index '%s', because %s", abs_path, index_err) return index_err } } return nil }
func (c *WOFClone) CloneMetaFile(file string, skip_existing bool, force_updates bool) error { abs_path, _ := filepath.Abs(file) reader, read_err := csv.NewDictReaderFromPath(abs_path) if read_err != nil { c.Logger.Error("Failed to read %s, because %v", abs_path, read_err) return read_err } wg := new(sync.WaitGroup) c.timer = time.Now() for { row, err := reader.Read() if err == io.EOF { break } if err != nil { return err } rel_path, ok := row["path"] if !ok { continue } ensure_changes := true has_changes := true carry_on := false remote := c.Source + rel_path local := path.Join(c.Dest, rel_path) _, err = os.Stat(local) if !os.IsNotExist(err) { if force_updates { c.Logger.Debug("%s already but we are forcing updates", local) } else if skip_existing { c.Logger.Debug("%s already exists and we are skipping things that exist", local) carry_on = true } else { file_hash, ok := row["file_hash"] t1 := time.Now() if ok { c.Logger.Debug("comparing hardcoded hash (%s) for %s", file_hash, local) has_changes, _ = c.HasHashChanged(file_hash, remote) } else { has_changes, _ = c.HasChanged(local, remote) } if !has_changes { c.Logger.Info("no changes to %s", local) carry_on = true } t2 := time.Since(t1) c.Logger.Debug("time to determine whether %s has changed (%t), %v", local, has_changes, t2) } if carry_on { atomic.AddInt64(&c.Scheduled, 1) atomic.AddInt64(&c.Completed, 1) atomic.AddInt64(&c.Skipped, 1) continue } ensure_changes = false } wg.Add(1) atomic.AddInt64(&c.Scheduled, 1) go func(c *WOFClone, rel_path string, ensure_changes bool) { defer wg.Done() _, err = c.workpool.SendWork(func() { t1 := time.Now() cl_err := c.ClonePath(rel_path, ensure_changes) t2 := time.Since(t1) c.Logger.Debug("time to process %s : %v", rel_path, t2) if cl_err != nil { atomic.AddInt64(&c.Error, 1) c.retries.Push(&pool.PoolString{String: rel_path}) } else { atomic.AddInt64(&c.Success, 1) } atomic.AddInt64(&c.Completed, 1) }) }(c, rel_path, ensure_changes) } wg.Wait() c.writesync.Wait() ok := c.ProcessRetries() if !ok { c.Logger.Warning("failed to process retries") return errors.New("One of file failed to be cloned") } c.writesync.Wait() return nil }