Esempio n. 1
0
// enables transactional processing of files
func LoadLines3(ss3 s3.Interface, output *StepLocation, threads int, proc FileProcessor) {
	var wg sync.WaitGroup
	ch := make(chan s3.ListedObject)
	for i := 0; i < threads; i++ {
		wg.Add(1)
		go func() {
			for o := range ch {
				fn := o.Object().Url()
				p := proc.ForFile(fn, o.Size)
				for p != nil {
					r, err := ss3.Get(s3.GetRequest{Object: o.Object()})
					if err != nil {
						if p = proc.Failure(fn, o.Size, err); p != nil {
							continue
						} else {
							break
						}
					}
					if strings.HasSuffix(o.Key, ".gz") {
						r, err = gzip.NewReader(r)
						check(err)
					}
					scanner := bufio.NewScanner(r)
					for scanner.Scan() {
						kv := ParseLine(scanner.Text())
						p(&kv)
					}
					if err := scanner.Err(); err != nil {
						r.Close()
						if p = proc.Failure(fn, o.Size, err); p != nil {
							continue
						} else {
							break
						}
					} else {
						proc.Success(fn)
						p = nil
					}
					r.Close()
				}
			}
			wg.Done()
		}()
	}
	List(ss3, output, ch)
	wg.Wait()
}
Esempio n. 2
0
func LoadLines2(ss3 s3.Interface, output *StepLocation, threads int, decider UrlDeciderFunc, f func(string, *KeyValue)) {
	var wg, wg2 sync.WaitGroup
	ch2 := make(chan *FileKeyValue)
	ch := make(chan s3.ListedObject)
	wg2.Add(1)
	go func() {
		for fkv := range ch2 {
			f(fkv.Filename, fkv.Item)
		}
		wg2.Done()
	}()
	for i := 0; i < threads; i++ {
		wg.Add(1)
		go func() {
			for o := range ch {
				fn := o.Object().Url()
				if decider(fn) {
					r, err := ss3.Get(s3.GetRequest{Object: o.Object()})
					check(err)
					defer r.Close()
					if strings.HasSuffix(o.Key, ".gz") {
						r, err = gzip.NewReader(r)
						check(err)
					}

					scanner := bufio.NewScanner(r)
					for scanner.Scan() {
						kv := ParseLine(scanner.Text())
						ch2 <- &FileKeyValue{
							Filename: fn,
							Item:     &kv,
						}
					}
					if err := scanner.Err(); err != nil {
						panic(err)
					}
				}
			}
			wg.Done()
		}()
	}
	List(ss3, output, ch)
	wg.Wait()
	close(ch2)
	wg2.Wait()
}