Exemplo n.º 1
0
// CloseArtifact closes an artifact for further writes and begins process of merging and uploading
// the artifact. This operation is only valid for artifacts which are being uploaded in chunks.
// In all other cases, an error is returned.
func CloseArtifact(ctx context.Context, artifact *model.Artifact, db database.Database, s3bucket *s3.Bucket, failIfAlreadyClosed bool) error {
	switch artifact.State {
	case model.UPLOADED:
		// Already closed. Nothing to do here.
		fallthrough
	case model.APPEND_COMPLETE:
		// This artifact will be eventually shipped to S3. No change required.
		return nil

	case model.APPENDING:
		artifact.State = model.APPEND_COMPLETE
		if err := db.UpdateArtifact(artifact); err != nil {
			return err
		}

		return MergeLogChunks(ctx, artifact, db, s3bucket)

	case model.WAITING_FOR_UPLOAD:
		// Streaming artifact was not uploaded
		artifact.State = model.CLOSED_WITHOUT_DATA
		if err := db.UpdateArtifact(artifact); err != nil {
			return err
		}

		return nil

	default:
		return fmt.Errorf("Unexpected artifact state: %s", artifact.State)
	}
}
Exemplo n.º 2
0
// Merges all of the individual chunks into a single object and stores it on s3.
// The log chunks are stored in the database, while the object is uploaded to s3.
func MergeLogChunks(ctx context.Context, artifact *model.Artifact, db database.Database, s3bucket *s3.Bucket) error {
	switch artifact.State {
	case model.APPEND_COMPLETE:
		// TODO: Reimplement using GorpDatabase
		// If the file is empty, don't bother creating an object on S3.
		if artifact.Size == 0 {
			artifact.State = model.CLOSED_WITHOUT_DATA
			artifact.S3URL = ""

			// Conversion between *DatabaseEror and error is tricky. If we don't do this, a nil
			// *DatabaseError can become a non-nil error.
			return db.UpdateArtifact(artifact).GetError()
		}

		// XXX Do we need to commit here or is this handled transparently?
		artifact.State = model.UPLOADING
		if err := db.UpdateArtifact(artifact); err != nil {
			return err
		}

		fileName := artifact.DefaultS3URL()

		r := newLogChunkReaderWithReadahead(artifact, db)

		if err := uploadArtifactToS3(s3bucket, fileName, artifact.Size, r); err != nil {
			return err
		}

		// XXX This is a long operation and should probably be asynchronous from the
		// actual HTTP request, and the client should poll to check when its uploaded.
		artifact.State = model.UPLOADED
		artifact.S3URL = fileName
		if err := db.UpdateArtifact(artifact); err != nil {
			return err
		}

		// From this point onwards, we will not send back any errors back to the user. If we are
		// unable to delete logchunks, we log it to Sentry instead.
		if _, err := db.DeleteLogChunksForArtifact(artifact.Id); err != nil {
			sentry.ReportError(ctx, err)
			return nil
		}

		return nil

	case model.WAITING_FOR_UPLOAD:
		fallthrough
	case model.ERROR:
		fallthrough
	case model.APPENDING:
		fallthrough
	case model.UPLOADED:
		fallthrough
	case model.UPLOADING:
		return fmt.Errorf("Artifact can only be merged when in APPEND_COMPLETE state, but state is %s", artifact.State)
	default:
		return fmt.Errorf("Illegal artifact state! State code is %d", artifact.State)
	}
}
func AppendLogChunk(db database.Database, artifact *model.Artifact, logChunk *model.LogChunk) *HttpError {
	if artifact.State != model.APPENDING {
		return NewHttpError(http.StatusBadRequest, fmt.Sprintf("Unexpected artifact state: %s", artifact.State))
	}

	if logChunk.Size <= 0 {
		return NewHttpError(http.StatusBadRequest, "Invalid chunk size %d", logChunk.Size)
	}

	if logChunk.Content == "" {
		return NewHttpError(http.StatusBadRequest, "Empty content string")
	}

	if int64(len(logChunk.Content)) != logChunk.Size {
		return NewHttpError(http.StatusBadRequest, "Content length does not match indicated size")
	}

	// Find previous chunk in DB - append only
	if nextByteOffset, err := db.GetLastByteSeenForArtifact(artifact.Id); err != nil {
		return NewHttpError(http.StatusInternalServerError, "Error while checking for previous byte range: %s", err)
	} else if nextByteOffset != logChunk.ByteOffset {
		return NewHttpError(http.StatusBadRequest, "Overlapping ranges detected, expected offset: %d, actual offset: %d", nextByteOffset, logChunk.ByteOffset)
	}

	logChunk.ArtifactId = artifact.Id

	// Expand artifact size - redundant after above change.
	if artifact.Size < logChunk.ByteOffset+logChunk.Size {
		artifact.Size = logChunk.ByteOffset + logChunk.Size
		if err := db.UpdateArtifact(artifact); err != nil {
			return NewHttpError(http.StatusInternalServerError, err.Error())
		}
	}

	if err := db.InsertLogChunk(logChunk); err != nil {
		return NewHttpError(http.StatusBadRequest, "Error updating log chunk: %s", err)
	}
	return nil
}
Exemplo n.º 4
0
// CreateArtifact creates a new artifact in a open bucket.
//
// If an artifact with the same name already exists in the same bucket, we attempt to rename the
// artifact by adding a suffix.
// If the request specifies a chunked artifact, the size field is ignored and always set to zero.
// If the request is for a streamed artifact, size is mandatory.
// A relative path field may be specified to preserve the original file name and path. If no path is
// specified, the original artifact name is used by default.
func CreateArtifact(req createArtifactReq, bucket *model.Bucket, db database.Database) (*model.Artifact, *HttpError) {
	if len(req.Name) == 0 {
		return nil, NewHttpError(http.StatusBadRequest, "Artifact name not provided")
	}

	if bucket.State != model.OPEN {
		return nil, NewHttpError(http.StatusBadRequest, "Bucket is already closed")
	}

	artifact := new(model.Artifact)

	artifact.Name = req.Name
	artifact.BucketId = bucket.Id
	artifact.DateCreated = time.Now()

	if req.DeadlineMins == 0 {
		artifact.DeadlineMins = DEFAULT_DEADLINE
	} else {
		artifact.DeadlineMins = req.DeadlineMins
	}

	if req.Chunked {
		artifact.State = model.APPENDING
	} else {
		if req.Size == 0 {
			return nil, NewHttpError(http.StatusBadRequest, "Cannot create a new upload artifact without size.")
		} else if req.Size > MaxArtifactSizeBytes {
			return nil, NewHttpError(http.StatusRequestEntityTooLarge, fmt.Sprintf("Entity '%s' (size %d) is too large (limit %d)", req.Name, req.Size, MaxArtifactSizeBytes))
		}
		artifact.Size = req.Size
		artifact.State = model.WAITING_FOR_UPLOAD
	}

	if req.RelativePath == "" {
		// Use artifact name provided as default relativePath
		artifact.RelativePath = req.Name
	} else {
		artifact.RelativePath = req.RelativePath
	}

	// Attempt to insert artifact and retry with a different name if it fails.
	if err := db.InsertArtifact(artifact); err != nil {
		for attempt := 1; attempt <= MaxDuplicateFileNameResolutionAttempts; attempt++ {
			// Unable to create new artifact - if an artifact already exists, the above insert failed
			// because of a collision.
			if _, err := db.GetArtifactByName(bucket.Id, artifact.Name); err != nil {
				// This could be a transient DB error (down/unreachable), in which case we expect the client
				// to retry. There is no value in attempting alternate artifact names.
				//
				// We have no means of verifying there was a name collision - bail with an internal error.
				return nil, NewHttpError(http.StatusInternalServerError, err.Error())
			}

			// File name collision - attempt to resolve
			artifact.Name = fmt.Sprintf(DuplicateArtifactNameFormat, req.Name, randString(5))
			if err := db.InsertArtifact(artifact); err == nil {
				return artifact, nil
			}
		}

		return nil, NewHttpError(http.StatusInternalServerError, "Exceeded retry limit avoiding duplicates")
	}

	return artifact, nil
}
Exemplo n.º 5
0
// PutArtifact writes a streamed artifact to S3. The entire file contents are streamed directly
// through to S3. If S3 is not accessible, we don't make any attempt to buffer on disk and fail
// immediately.
func PutArtifact(ctx context.Context, artifact *model.Artifact, db database.Database, bucket *s3.Bucket, req PutArtifactReq) error {
	if artifact.State != model.WAITING_FOR_UPLOAD {
		return fmt.Errorf("Expected artifact to be in state WAITING_FOR_UPLOAD: %s", artifact.State)
	}

	// New file being inserted into DB.
	// Mark status change to UPLOADING and start uploading to S3.
	//
	// First, verify that the size of the content being uploaded matches our expected size.
	var fileSize int64
	var err error

	if req.ContentLength != "" {
		fileSize, err = strconv.ParseInt(req.ContentLength, 10, 64) // string, base, bits
		// This should never happen if a sane HTTP client is used. Nonetheless ...
		if err != nil {
			return fmt.Errorf("Invalid Content-Length specified")
		}
	} else {
		// This too should never happen if a sane HTTP client is used. Nonetheless ...
		return fmt.Errorf("Content-Length not specified")
	}

	if fileSize != artifact.Size {
		return fmt.Errorf("Content length %d does not match expected file size %d", fileSize, artifact.Size)
	}

	artifact.State = model.UPLOADING
	if err := db.UpdateArtifact(artifact); err != nil {
		return err
	}

	cleanupAndReturn := func(err error) error {
		// TODO: Is there a better way to detect and handle errors?
		// Use a channel to signify upload completion. In defer, check if the channel is empty. If
		// yes, mark error. Else ignore.
		if err != nil {
			// TODO: s/ERROR/WAITING_FOR_UPLOAD/ ?
			sentry.ReportError(ctx, err)
			artifact.State = model.ERROR
			err2 := db.UpdateArtifact(artifact)
			if err2 != nil {
				log.Printf("Error while handling error: %s", err2.Error())
			}
			return err
		}

		return nil
	}

	b := new(bytes.Buffer)
	// Note: Storing entire contents of uploaded artifact in memory can cause OOMS.
	if n, err := io.CopyN(b, req.Body, artifact.Size); err != nil {
		return cleanupAndReturn(fmt.Errorf("Error reading from request body (for artifact %s/%s, bytes (%d/%d) read): %s", artifact.BucketId, artifact.Name, n, artifact.Size, err))
	}
	fileName := artifact.DefaultS3URL()

	if err := uploadArtifactToS3(bucket, fileName, artifact.Size, bytes.NewReader(b.Bytes())); err != nil {
		return cleanupAndReturn(err)
	}

	artifact.State = model.UPLOADED
	artifact.S3URL = fileName
	if err := db.UpdateArtifact(artifact); err != nil {
		return err
	}
	return nil
}
Exemplo n.º 6
0
// AppendLogChunk appends a logchunk to an artifact.
// If the logchunk position does not match the current end of artifact, an error is returned.
// An exception to this is made when the last seen logchunk is repeated, which is silently ignored
// without an error.
func AppendLogChunk(ctx context.Context, db database.Database, artifact *model.Artifact, logChunkReq *createLogChunkReq) *HttpError {
	if artifact.State != model.APPENDING {
		return NewHttpError(http.StatusBadRequest, fmt.Sprintf("Unexpected artifact state: %s", artifact.State))
	}

	if logChunkReq.Size <= 0 {
		return NewHttpError(http.StatusBadRequest, "Invalid chunk size %d", logChunkReq.Size)
	}

	var contentBytes []byte
	if len(logChunkReq.Bytes) != 0 {
		// If request sent Bytes, use Bytes.
		if int64(len(logChunkReq.Bytes)) != logChunkReq.Size {
			return NewHttpError(http.StatusBadRequest, "Content length %d does not match indicated size %d", len(logChunkReq.Bytes), logChunkReq.Size)
		}
		contentBytes = logChunkReq.Bytes
	} else {
		// Otherwise, allow Content, for now.
		if len(logChunkReq.Content) == 0 {
			return NewHttpError(http.StatusBadRequest, "Empty content string")
		}

		if int64(len(logChunkReq.Content)) != logChunkReq.Size {
			return NewHttpError(http.StatusBadRequest, "Content length %d does not match indicated size %d", len(logChunkReq.Content), logChunkReq.Size)
		}
		contentBytes = []byte(logChunkReq.Content)
	}

	// Find previous chunk in DB - append only
	nextByteOffset := artifact.Size
	if nextByteOffset != logChunkReq.ByteOffset {
		// There is a possibility the previous logchunk is being retried - we need to handle cases where
		// a server/proxy time out caused the client not to get an ACK when it successfully uploaded the
		// previous logchunk, due to which it is retrying.
		//
		// This is a best-effort check - if we encounter DB errors or any mismatch in the chunk
		// contents, we ignore this test and claim that a range mismatch occured.
		if nextByteOffset != 0 && nextByteOffset == logChunkReq.ByteOffset+logChunkReq.Size {
			if prevLogChunk, err := db.GetLastLogChunkSeenForArtifact(artifact.Id); err == nil {
				if prevLogChunk != nil && prevLogChunk.ByteOffset == logChunkReq.ByteOffset && prevLogChunk.Size == logChunkReq.Size && bytes.Equal(prevLogChunk.ContentBytes, contentBytes) {
					sentry.ReportMessage(ctx, fmt.Sprintf("Received duplicate chunk for artifact %v of size %d at byte %d", artifact.Id, logChunkReq.Size, logChunkReq.ByteOffset))
					return nil
				}
			}
		}

		return NewHttpError(http.StatusBadRequest, "Overlapping ranges detected, expected offset: %d, actual offset: %d", nextByteOffset, logChunkReq.ByteOffset)
	}

	// Expand artifact size - redundant after above change.
	if artifact.Size < logChunkReq.ByteOffset+logChunkReq.Size {
		artifact.Size = logChunkReq.ByteOffset + logChunkReq.Size
		if err := db.UpdateArtifact(artifact); err != nil {
			return NewHttpError(http.StatusInternalServerError, err.Error())
		}
	}

	logChunk := &model.LogChunk{
		ArtifactId:   artifact.Id,
		ByteOffset:   logChunkReq.ByteOffset,
		ContentBytes: contentBytes,
		Size:         logChunkReq.Size,
	}

	if err := db.InsertLogChunk(logChunk); err != nil {
		return NewHttpError(http.StatusBadRequest, "Error updating log chunk: %s", err)
	}
	return nil
}
func PutArtifact(artifact *model.Artifact, db database.Database, bucket *s3.Bucket, req PutArtifactReq) error {
	if artifact.State != model.WAITING_FOR_UPLOAD {
		return fmt.Errorf("Expected artifact to be in state WAITING_FOR_UPLOAD: %s", artifact.State)
	}

	// New file being inserted into DB.
	// Mark status change to UPLOADING and start uploading to S3.
	//
	// First, verify that the size of the content being uploaded matches our expected size.
	var fileSize int64
	var err error

	if req.ContentLength != "" {
		fileSize, err = strconv.ParseInt(req.ContentLength, 10, 64) // string, base, bits
		// This should never happen if a sane HTTP client is used. Nonetheless ...
		if err != nil {
			return fmt.Errorf("Invalid Content-Length specified")
		}
	} else {
		// This too should never happen if a sane HTTP client is used. Nonetheless ...
		return fmt.Errorf("Content-Length not specified")
	}

	if fileSize != artifact.Size {
		return fmt.Errorf("Content length %d does not match expected file size %d", fileSize, artifact.Size)
	}

	// XXX Do we need to commit here or is this handled transparently?
	artifact.State = model.UPLOADING
	if err := db.UpdateArtifact(artifact); err != nil {
		return err
	}

	cleanupAndReturn := func(err error) error {
		// TODO: Is there a better way to detect and handle errors?
		// Use a channel to signify upload completion. In defer, check if the channel is empty. If
		// yes, mark error. Else ignore.
		if err != nil {
			// TODO: s/ERROR/WAITING_FOR_UPLOAD/ ?
			log.Printf("Error uploading to S3: %s\n", err)
			artifact.State = model.ERROR
			err2 := db.UpdateArtifact(artifact)
			if err2 != nil {
				log.Printf("Error while handling error: %s", err2.Error())
			}
			return err
		}

		return nil
	}

	fileName := artifact.DefaultS3URL()
	if err := bucket.PutReader(fileName, req.Body, artifact.Size, "binary/octet-stream", s3.PublicRead); err != nil {
		return cleanupAndReturn(fmt.Errorf("Error uploading to S3: %s", err))
	}

	artifact.State = model.UPLOADED
	artifact.S3URL = fileName
	if err := db.UpdateArtifact(artifact); err != nil {
		return err
	}
	return nil
}
// Merges all of the individual chunks into a single object and stores it on s3.
// The log chunks are stored in the database, while the object is uploaded to s3.
func MergeLogChunks(artifact *model.Artifact, db database.Database, s3bucket *s3.Bucket) error {
	switch artifact.State {
	case model.APPEND_COMPLETE:
		// TODO: Reimplement using GorpDatabase
		// If the file is empty, don't bother creating an object on S3.
		if artifact.Size == 0 {
			artifact.State = model.CLOSED_WITHOUT_DATA
			artifact.S3URL = ""

			// Conversion between *DatabaseEror and error is tricky. If we don't do this, a nil
			// *DatabaseError can become a non-nil error.
			return db.UpdateArtifact(artifact).GetError()
		}

		// XXX Do we need to commit here or is this handled transparently?
		artifact.State = model.UPLOADING
		if err := db.UpdateArtifact(artifact); err != nil {
			return err
		}

		logChunks, err := db.ListLogChunksInArtifact(artifact.Id)
		if err != nil {
			return err
		}

		r, w := io.Pipe()
		errChan := make(chan error)
		uploadCompleteChan := make(chan bool)
		fileName := artifact.DefaultS3URL()

		// Asynchronously upload the object to s3 while reading from the r, w
		// pipe. Thus anything written to "w" will be sent to S3.
		go func() {
			defer close(errChan)
			defer close(uploadCompleteChan)
			defer r.Close()
			if err := s3bucket.PutReader(fileName, r, artifact.Size, "binary/octet-stream", s3.PublicRead); err != nil {
				errChan <- fmt.Errorf("Error uploading to S3: %s", err)
				return
			}

			uploadCompleteChan <- true
		}()

		for _, logChunk := range logChunks {
			w.Write([]byte(logChunk.Content))
		}

		w.Close()

		// Wait either for S3 upload to complete or for it to fail with an error.
		// XXX This is a long operation and should probably be asynchronous from the
		// actual HTTP request, and the client should poll to check when its uploaded.
		select {
		case _ = <-uploadCompleteChan:
			artifact.State = model.UPLOADED
			artifact.S3URL = fileName
			if err := db.UpdateArtifact(artifact); err != nil {
				return err
			}

			// From this point onwards, we will not send back any errors back to the user. If we are
			// unable to delete logchunks, we log it to Sentry instead.
			if n, err := db.DeleteLogChunksForArtifact(artifact.Id); err != nil {
				// TODO: Send this error to Sentry
				log.Printf("Error deleting logchunks for artifact %d: %v\n", artifact.Id, err)
				return nil
			} else if n != int64(len(logChunks)) {
				// TODO: Send this error to Sentry
				log.Printf("Mismatch in number of logchunks while deleting logchunks for artifact %d:"+
					"Expected: %d Actual: %d\n", artifact.Id, len(logChunks), n)
			}

			return nil
		case err := <-errChan:
			return err
		}

	case model.WAITING_FOR_UPLOAD:
		fallthrough
	case model.ERROR:
		fallthrough
	case model.APPENDING:
		fallthrough
	case model.UPLOADED:
		fallthrough
	case model.UPLOADING:
		return fmt.Errorf("Artifact can only be merged when in APPEND_COMPLETE state, but state is %s", artifact.State)
	default:
		return fmt.Errorf("Illegal artifact state! State code is %d", artifact.State)
	}
}