// Merges all of the individual chunks into a single object and stores it on s3.
// The log chunks are stored in the database, while the object is uploaded to s3.
func MergeLogChunks(ctx context.Context, artifact *model.Artifact, db database.Database, s3bucket *s3.Bucket) error {
	switch artifact.State {
	case model.APPEND_COMPLETE:
		// TODO: Reimplement using GorpDatabase
		// If the file is empty, don't bother creating an object on S3.
		if artifact.Size == 0 {
			artifact.State = model.CLOSED_WITHOUT_DATA
			artifact.S3URL = ""

			// Conversion between *DatabaseEror and error is tricky. If we don't do this, a nil
			// *DatabaseError can become a non-nil error.
			return db.UpdateArtifact(artifact).GetError()
		}

		// XXX Do we need to commit here or is this handled transparently?
		artifact.State = model.UPLOADING
		if err := db.UpdateArtifact(artifact); err != nil {
			return err
		}

		fileName := artifact.DefaultS3URL()

		r := newLogChunkReaderWithReadahead(artifact, db)

		if err := uploadArtifactToS3(s3bucket, fileName, artifact.Size, r); err != nil {
			return err
		}

		// XXX This is a long operation and should probably be asynchronous from the
		// actual HTTP request, and the client should poll to check when its uploaded.
		artifact.State = model.UPLOADED
		artifact.S3URL = fileName
		if err := db.UpdateArtifact(artifact); err != nil {
			return err
		}

		// From this point onwards, we will not send back any errors back to the user. If we are
		// unable to delete logchunks, we log it to Sentry instead.
		if _, err := db.DeleteLogChunksForArtifact(artifact.Id); err != nil {
			sentry.ReportError(ctx, err)
			return nil
		}

		return nil

	case model.WAITING_FOR_UPLOAD:
		fallthrough
	case model.ERROR:
		fallthrough
	case model.APPENDING:
		fallthrough
	case model.UPLOADED:
		fallthrough
	case model.UPLOADING:
		return fmt.Errorf("Artifact can only be merged when in APPEND_COMPLETE state, but state is %s", artifact.State)
	default:
		return fmt.Errorf("Illegal artifact state! State code is %d", artifact.State)
	}
}
func GetArtifactContent(ctx context.Context, r render.Render, req *http.Request, res http.ResponseWriter, db database.Database, s3bucket *s3.Bucket, artifact *model.Artifact) {
	if artifact == nil {
		LogAndRespondWithErrorf(ctx, r, http.StatusBadRequest, "No artifact specified")
		return
	}

	switch artifact.State {
	case model.UPLOADED:
		// Fetch from S3
		url := s3bucket.SignedURL(artifact.S3URL, time.Now().Add(30*time.Minute))
		rq, err := http.NewRequest("GET", url, nil)
		if byteRanges := req.Header.Get("Range"); byteRanges != "" {
			// If request contains Range: headers, pass them right through to S3.
			// TODO(anupc): Validation? We're sending user input through to the data store.
			rq.Header.Add("Range", byteRanges)
		}
		resp, err := http.DefaultClient.Do(rq)
		if err != nil {
			LogAndRespondWithError(ctx, r, http.StatusInternalServerError, err)
			return
		}
		if resp.StatusCode != http.StatusPartialContent && resp.StatusCode != http.StatusOK {
			LogAndRespondWithErrorf(ctx, r, http.StatusInternalServerError, fmt.Sprintf("Bad status code %d recieved from S3", resp.StatusCode))
			return
		}
		contentdisposition.SetFilename(res, filepath.Base(artifact.RelativePath))
		res.Header().Add("Content-Length", strconv.Itoa(int(artifact.Size)))
		if n, err := io.CopyN(res, resp.Body, artifact.Size); err != nil {
			sentry.ReportError(ctx, fmt.Errorf("Error transferring artifact (for artifact %s/%s, bytes (%d/%d) read): %s", artifact.BucketId, artifact.Name, n, artifact.Size, err))
			return
		}
		return
	case model.UPLOADING:
		// Not done uploading to S3 yet. Error.
		LogAndRespondWithErrorf(ctx, r, http.StatusNotFound, "Waiting for content to complete uploading")
		return
	case model.APPENDING:
		fallthrough
	case model.APPEND_COMPLETE:
		// Pick from log chunks
		contentdisposition.SetFilename(res, filepath.Base(artifact.RelativePath))
		// All written bytes are immutable. So, unless size changes, all previously read contents can be cached.
		res.Header().Add("ETag", strconv.Itoa(int(artifact.Size)))
		http.ServeContent(res, req, filepath.Base(artifact.RelativePath), time.Time{}, newLogChunkReaderWithReadahead(artifact, db))
		return
	case model.WAITING_FOR_UPLOAD:
		// Not started yet. Error
		LogAndRespondWithErrorf(ctx, r, http.StatusNotFound, "Waiting for content to get uploaded")
		return
	}
}
Example #3
0
// LogAndRespondWithError posts a JSON-serialized error and statuscode on the HTTP response object
// (using Martini render).
// Log the error message to Sentry.
func LogAndRespondWithError(ctx context.Context, render render.Render, code int, err error) {
	sentry.ReportError(ctx, err)
	render.JSON(code, map[string]string{"error": err.Error()})
}
Example #4
0
// LogAndRespondWithErrorf posts a JSON-serialized error message and statuscode on the HTTP response
// object (using Martini render).
// Log the error message to Sentry.
func LogAndRespondWithErrorf(ctx context.Context, render render.Render, code int, errStr string, params ...interface{}) {
	msg := fmt.Sprintf(errStr, params...)
	sentry.ReportError(ctx, errors.New(msg))
	render.JSON(code, map[string]string{"error": msg})
}
// PutArtifact writes a streamed artifact to S3. The entire file contents are streamed directly
// through to S3. If S3 is not accessible, we don't make any attempt to buffer on disk and fail
// immediately.
func PutArtifact(ctx context.Context, artifact *model.Artifact, db database.Database, bucket *s3.Bucket, req PutArtifactReq) error {
	if artifact.State != model.WAITING_FOR_UPLOAD {
		return fmt.Errorf("Expected artifact to be in state WAITING_FOR_UPLOAD: %s", artifact.State)
	}

	// New file being inserted into DB.
	// Mark status change to UPLOADING and start uploading to S3.
	//
	// First, verify that the size of the content being uploaded matches our expected size.
	var fileSize int64
	var err error

	if req.ContentLength != "" {
		fileSize, err = strconv.ParseInt(req.ContentLength, 10, 64) // string, base, bits
		// This should never happen if a sane HTTP client is used. Nonetheless ...
		if err != nil {
			return fmt.Errorf("Invalid Content-Length specified")
		}
	} else {
		// This too should never happen if a sane HTTP client is used. Nonetheless ...
		return fmt.Errorf("Content-Length not specified")
	}

	if fileSize != artifact.Size {
		return fmt.Errorf("Content length %d does not match expected file size %d", fileSize, artifact.Size)
	}

	artifact.State = model.UPLOADING
	if err := db.UpdateArtifact(artifact); err != nil {
		return err
	}

	cleanupAndReturn := func(err error) error {
		// TODO: Is there a better way to detect and handle errors?
		// Use a channel to signify upload completion. In defer, check if the channel is empty. If
		// yes, mark error. Else ignore.
		if err != nil {
			// TODO: s/ERROR/WAITING_FOR_UPLOAD/ ?
			sentry.ReportError(ctx, err)
			artifact.State = model.ERROR
			err2 := db.UpdateArtifact(artifact)
			if err2 != nil {
				log.Printf("Error while handling error: %s", err2.Error())
			}
			return err
		}

		return nil
	}

	b := new(bytes.Buffer)
	// Note: Storing entire contents of uploaded artifact in memory can cause OOMS.
	if n, err := io.CopyN(b, req.Body, artifact.Size); err != nil {
		return cleanupAndReturn(fmt.Errorf("Error reading from request body (for artifact %s/%s, bytes (%d/%d) read): %s", artifact.BucketId, artifact.Name, n, artifact.Size, err))
	}
	fileName := artifact.DefaultS3URL()

	if err := uploadArtifactToS3(bucket, fileName, artifact.Size, bytes.NewReader(b.Bytes())); err != nil {
		return cleanupAndReturn(err)
	}

	artifact.State = model.UPLOADED
	artifact.S3URL = fileName
	if err := db.UpdateArtifact(artifact); err != nil {
		return err
	}
	return nil
}