Exemplo n.º 1
0
func (idx *WOFIndex) IndexMetaFile(csv_file string) error {

	reader, reader_err := csv.NewDictReaderFromPath(csv_file)

	if reader_err != nil {
		idx.Logger.Error("failed to create CSV reader , because %s", reader_err)
		return reader_err
	}

	// It is tempting to think that we could fan this out and process each row/file
	// concurrently but that will make the RTree sad... (20151020/thisisaaronland)

	for {
		row, err := reader.Read()

		if err == io.EOF {
			break
		}

		if err != nil {
			idx.Logger.Error("failed to parse CSV row , because %s", err)
			return err
		}

		rel_path, ok := row["path"]

		if ok != true {
			idx.Logger.Warning("CSV row is missing a 'path' column")
			continue
		}

		abs_path := path.Join(idx.Source, rel_path)

		_, err = os.Stat(abs_path)

		if os.IsNotExist(err) {
			idx.Logger.Error("'%s' does not exist", abs_path)
			continue
		}

		index_err := idx.IndexGeoJSONFile(abs_path)

		if index_err != nil {
			idx.Logger.Error("failed to index '%s', because %s", abs_path, index_err)
			return index_err
		}
	}

	return nil
}
Exemplo n.º 2
0
func (c *WOFClone) CloneMetaFile(file string, skip_existing bool, force_updates bool) error {

	abs_path, _ := filepath.Abs(file)

	reader, read_err := csv.NewDictReaderFromPath(abs_path)

	if read_err != nil {
		c.Logger.Error("Failed to read %s, because %v", abs_path, read_err)
		return read_err
	}

	wg := new(sync.WaitGroup)

	c.timer = time.Now()

	for {

		row, err := reader.Read()

		if err == io.EOF {
			break
		}

		if err != nil {
			return err
		}

		rel_path, ok := row["path"]

		if !ok {
			continue
		}

		ensure_changes := true
		has_changes := true
		carry_on := false

		remote := c.Source + rel_path
		local := path.Join(c.Dest, rel_path)

		_, err = os.Stat(local)

		if !os.IsNotExist(err) {

			if force_updates {

				c.Logger.Debug("%s already but we are forcing updates", local)
			} else if skip_existing {

				c.Logger.Debug("%s already exists and we are skipping things that exist", local)
				carry_on = true

			} else {

				file_hash, ok := row["file_hash"]

				t1 := time.Now()

				if ok {
					c.Logger.Debug("comparing hardcoded hash (%s) for %s", file_hash, local)
					has_changes, _ = c.HasHashChanged(file_hash, remote)
				} else {
					has_changes, _ = c.HasChanged(local, remote)
				}

				if !has_changes {
					c.Logger.Info("no changes to %s", local)
					carry_on = true
				}

				t2 := time.Since(t1)

				c.Logger.Debug("time to determine whether %s has changed (%t), %v", local, has_changes, t2)
			}

			if carry_on {

				atomic.AddInt64(&c.Scheduled, 1)
				atomic.AddInt64(&c.Completed, 1)
				atomic.AddInt64(&c.Skipped, 1)
				continue
			}

			ensure_changes = false
		}

		wg.Add(1)
		atomic.AddInt64(&c.Scheduled, 1)

		go func(c *WOFClone, rel_path string, ensure_changes bool) {

			defer wg.Done()

			_, err = c.workpool.SendWork(func() {

				t1 := time.Now()
				cl_err := c.ClonePath(rel_path, ensure_changes)
				t2 := time.Since(t1)

				c.Logger.Debug("time to process %s : %v", rel_path, t2)

				if cl_err != nil {
					atomic.AddInt64(&c.Error, 1)
					c.retries.Push(&pool.PoolString{String: rel_path})
				} else {
					atomic.AddInt64(&c.Success, 1)
				}

				atomic.AddInt64(&c.Completed, 1)
			})

		}(c, rel_path, ensure_changes)
	}

	wg.Wait()

	c.writesync.Wait()

	ok := c.ProcessRetries()

	if !ok {
		c.Logger.Warning("failed to process retries")
		return errors.New("One of file failed to be cloned")
	}

	c.writesync.Wait()

	return nil
}