Example #1
0
func executeItemQuery(con *Context, q *datastore.Query, limit int, cursorStr string) ([]Item, string, error) {
	if cursor, err := datastore.DecodeCursor(cursorStr); err == nil {
		q = q.Start(cursor)
	}

	var is = make([]Item, 0, limit)
	var err error
	t := q.Run(con.C)
	for {
		var i Item
		_, err = t.Next(&i)
		if err == datastore.Done {
			break
		}

		is = append(is, i)
		if err != nil {
			con.Log.Errorf("Error fetching next item: %v", err)
			return nil, "", err
		}
	}

	var cursor datastore.Cursor
	if cursor, err = t.Cursor(); err == nil {
		return is, cursor.String(), nil
	}

	return nil, "", err
}
Example #2
0
// createQuery builds a range query using start and end. It works
// for [start,end[, [start,nil] and [start,start] intervals. The
// returned query is sorted by __key__ and limited to BatchSize.
func createQuery(start, end *datastore.Key, cur datastore.Cursor) *datastore.Query {
	q := datastore.NewQuery(start.Kind())

	if start.Equal(end) {
		q = q.Filter("__key__ =", start)
	} else {
		q = q.Filter("__key__ >=", start)
		if end != nil {
			q = q.Filter("__key__ <", end)
		}
	}

	if cur.String() != "" {
		q = q.Start(cur)
	}

	q = q.Order("__key__")
	return q
}
func process(c context.Context, processor Processor, start string) error {
	// use the full 10 minutes allowed (assuming front-end instance type)
	c, _ = context.WithTimeout(c, time.Duration(10)*time.Minute)

	// get the query to iterate and the entity slot to load (could be nill for keys_only)
	q, e := processor.Start(c)

	var cursor *datastore.Cursor
	if start != "" {
		newCursor, err := datastore.DecodeCursor(start)
		if err != nil {
			log.Errorf(c, "get start cursor error %s", err.Error())
			return err
		}
		cursor = &newCursor
	}

	// signal a timeout after 5 minutes
	timeout := make(chan bool, 1)
	timer := time.AfterFunc(time.Duration(5)*time.Minute, func() {
		timeout <- true
	})
	defer timer.Stop()

	// TODO: error handling to retry
Loop:
	for {
		// check if we've timed out or whether to keep going
		select {
		case <-timeout:
			break Loop
		default:
		}

		processed := 0

		if cursor != nil {
			q = q.Start(*cursor)
		}
		it := q.Run(c)
		for {
			key, err := it.Next(e)
			if err == datastore.Done {
				break
			}
			if err != nil {
				log.Errorf(c, "get key error %s", err.Error())
				return err
			}

			processor.Process(c, key)
			processed++
		}

		// did we process any?
		if processed > 0 {
			newCursor, err := it.Cursor()
			if err != nil {
				log.Errorf(c, "get next cursor error %s", err.Error())
				return err
			}
			cursor = &newCursor
		} else {
			// otherwise we're finished
			cursor = nil
			break
		}
	}

	// let the processor write any aggregation entries / tasks etc...
	processor.Complete(c)

	// if we didn't complete everything then continue from the cursor
	if cursor != nil {
		processFunc.Call(c, processor, cursor.String())
	}

	return nil
}
func (it *iterator) iterate(c context.Context, mapper *mapper) (bool, error) {
	taskTimeout := time.After(mapper.config.TaskTimeout)
	taskRunning := true

	// if the query defines the specific namespaces to process
	// then we can just process that list directly
	if it.Query.selection == selected {
		for _, namespace := range it.Query.namespaces {
			it.process(c, mapper, namespace)
		}
		return true, nil
	}

	q := it.createQuery(c)

	var cursor *datastore.Cursor
	if it.Cursor != "" {
		newCursor, err := datastore.DecodeCursor(it.Cursor)
		if err != nil {
			log.Errorf(c, "get start cursor error %s", err.Error())
			return false, err
		}
		cursor = &newCursor
	}

	// main task loop to repeat datastore query with cursor
	for taskRunning {

		// if cursor is set, start the query at that point
		if cursor != nil {
			q = q.Start(*cursor)
		}

		// limit how long the cursor can run before we requery
		cursorTimeout := time.After(mapper.config.CursorTimeout)
		// datastore cursor context needs to run for the max allowed
		cc, _ := context.WithTimeout(c, time.Duration(60)*time.Second)
		t := q.Run(cc)

		// item loop to iterate cursor
	cursorLoop:
		for {
			key, err := t.Next(nil)
			if err == datastore.Done {
				// we reached the end
				return true, nil
			}

			if err != nil {
				log.Errorf(c, "error %s", err.Error())
				return false, err
			}

			namespace := key.StringID()
			if err := it.process(c, mapper, namespace); err != nil {
				return false, err
			}

			select {
			case <-taskTimeout:
				// clearing the flag breaks us out of the task loop but also lets us update the
				// cursor first when we break from the inner cursorLoop
				taskRunning = false
				break cursorLoop
			default:
				select {
				case <-cursorTimeout:
					// this forces a new cursor and query so we don't suffer from datastore timeouts
					break cursorLoop
				default:
					// no timeout so carry on with the current cursor
					continue cursorLoop
				}
			}
		}

		// we need to get the cursor for where we are upto whether we are requerying
		// within this task or scheduling a new continuation slice
		newCursor, err := t.Cursor()
		if err != nil {
			log.Errorf(c, "get next cursor error %s", err.Error())
			return false, err
		}
		cursor = &newCursor
		it.Cursor = cursor.String()
	}

	return false, nil
}
func processPhotos(c context.Context, processor PhotoProcessor) error {
	// use the full 10 minutes allowed (assuming front-end instance type)
	c, _ = context.WithTimeout(c, time.Duration(10)*time.Minute)

	r := processor.Start(c)
	log.Debugf(c, "processPhotos from %s to %s cursor %s", r.From.Format(dateFormat), r.To.Format(dateFormat), r.Start)

	// TODO: describe pros & cons of different querying + continuation strategies

	q := datastore.NewQuery("photo")
	q = q.Filter("taken >=", r.From)
	q = q.Filter("taken <", r.To)
	q = q.Order("taken")

	// I use keys only because it saves on cost - entities come from memcache if possible
	q = q.KeysOnly()

	var cursor *datastore.Cursor
	if r.Start != "" {
		newCursor, err := datastore.DecodeCursor(r.Start)
		if err != nil {
			log.Errorf(c, "get start cursor error %s", err.Error())
			return err
		}
		cursor = &newCursor
	}

	// only one entity is loaded at a time
	p := new(Photo)

	timeout := make(chan bool, 1)
	timer := time.AfterFunc(r.Timeout, func() {
		timeout <- true
	})
	defer timer.Stop()

Loop:
	for {
		// check if we've timed out or whether to keep going
		select {
		case <-timeout:
			break Loop
		default:
		}

		processed := 0

		q = q.Limit(r.Size)
		if cursor != nil {
			q = q.Start(*cursor)
		}
		it := q.Run(c)
		for {
			// if not using keys only then we would load the actual entity here using
			// key, err := it.Next(p)
			key, err := it.Next(nil)
			if err == datastore.Done {
				break
			}
			if err != nil {
				log.Errorf(c, "get key error %s", err.Error())
				return err
			}

			// loads the actual entity from memcache / datastore
			err = nds.Get(c, key, p)
			if err != nil {
				log.Errorf(c, "get photo error %s", err.Error())
				return err
			}

			// call the processor with the entity
			p.ID = key.IntID()
			processor.Process(c, p)

			processed++
		}

		// did we process a full batch? if so, there may be more
		if processed == r.Size {
			newCursor, err := it.Cursor()
			if err != nil {
				log.Errorf(c, "get next cursor error %s", err.Error())
				return err
			}
			cursor = &newCursor
		} else {
			// otherwise we're finished
			cursor = nil
			break
		}
	}

	// let the processor write any aggregation entries / tasks etc...
	processor.Complete(c)

	// if we didn't complete everything then continue from the cursor
	if cursor != nil {
		r.Start = cursor.String()
		processPhotosFunc.Call(c, processor)
	}

	return nil
}
Example #6
0
func (s *shard) iterate(c context.Context, mapper *mapper) (bool, error) {
	// switch namespace
	c, _ = appengine.Namespace(c, s.Namespace)

	taskTimeout := time.After(mapper.config.TaskTimeout)
	taskRunning := true

	jobOutput, useJobOutput := s.job.JobSpec.(JobOutput)
	if useJobOutput && s.job.Bucket != "" {
		w, err := s.createOutputFile(c)
		if err != nil {
			return false, err
		}
		defer w.Close()
		jobOutput.Output(w)
	}

	q := datastore.NewQuery(s.Query.kind)
	for _, f := range s.Query.filter {
		q = q.Filter(f.FieldName+" "+f.Op.String(), f.Value)
	}

	var cursor *datastore.Cursor
	if s.Cursor != "" {
		newCursor, err := datastore.DecodeCursor(s.Cursor)
		if err != nil {
			log.Errorf(c, "get start cursor error %s", err.Error())
			return false, err
		}
		cursor = &newCursor
	}

	// what we'll load into if doing full entity loads (i.e. not doing KeysOnly)
	var entity interface{}

	// is full loading implemented?
	jobEntity, useJobEntity := s.job.JobSpec.(JobEntity)
	if useJobEntity {
		entity = jobEntity.Make()
	} else {
		q = q.KeysOnly()
	}

	// main task loop to repeat datastore query with cursor
	for taskRunning {

		// if cursor is set, start the query at that point
		if cursor != nil {
			q = q.Start(*cursor)
		}

		// limit how long the cursor can run before we requery
		cursorTimeout := time.After(mapper.config.CursorTimeout)
		// datastore cursor context needs to run for the max allowed
		cc, _ := context.WithTimeout(c, time.Duration(60)*time.Second)
		it := q.Run(cc)

		// item loop to iterate cursor
	cursorLoop:
		for {
			key, err := it.Next(entity)
			if err == datastore.Done {
				// we reached the end
				return true, nil
			}

			// TODO: option to fail or continue on individual errors
			// or add error handling logic to job to give it a chance (?)
			if err != nil {
				log.Errorf(c, "key %v error %v", key, err)
				// return false, err
				continue cursorLoop
			}

			if err := s.job.JobSpec.Next(c, s.Counters, key); err != nil {
				// TODO: instead of failing the entire slice, try to figure
				// out if it's possible to continue from this point or maybe
				// the last cursor position to avoid re-processing entities.
				// NOTE: this would need to truncate any output file being
				// written so entries weren't doubled up but maybe possible.
				return false, err
			}
			s.Count++

			select {
			case <-taskTimeout:
				// clearing the flag breaks us out of the task loop but also lets us update the
				// cursor first when we break from the inner cursorLoop
				taskRunning = false
				break cursorLoop
			default:
				select {
				case <-cursorTimeout:
					// this forces a new cursor and query so we don't suffer from datastore timeouts
					break cursorLoop
				default:
					// no timeout so carry on with the current cursor
					continue cursorLoop
				}
			}
		}

		// we need to get the cursor for where we are upto whether we are requerying
		// within this task or scheduling a new continuation slice
		newCursor, err := it.Cursor()
		if err != nil {
			log.Errorf(c, "get next cursor error %s", err.Error())
			return false, err
		}
		cursor = &newCursor
		s.Cursor = cursor.String()
	}

	return false, nil
}
Example #7
0
File: model.go Project: favclip/qbg
func (p *MemcacheQueryPlugin) Start(cur datastore.Cursor) {
	p.buf.WriteString(fmt.Sprintf(":!s=%s", cur.String()))
}