func GetArtifactContent(ctx context.Context, r render.Render, req *http.Request, res http.ResponseWriter, db database.Database, s3bucket *s3.Bucket, artifact *model.Artifact) { if artifact == nil { LogAndRespondWithErrorf(ctx, r, http.StatusBadRequest, "No artifact specified") return } switch artifact.State { case model.UPLOADED: // Fetch from S3 url := s3bucket.SignedURL(artifact.S3URL, time.Now().Add(30*time.Minute)) rq, err := http.NewRequest("GET", url, nil) if byteRanges := req.Header.Get("Range"); byteRanges != "" { // If request contains Range: headers, pass them right through to S3. // TODO(anupc): Validation? We're sending user input through to the data store. rq.Header.Add("Range", byteRanges) } resp, err := http.DefaultClient.Do(rq) if err != nil { LogAndRespondWithError(ctx, r, http.StatusInternalServerError, err) return } if resp.StatusCode != http.StatusPartialContent && resp.StatusCode != http.StatusOK { LogAndRespondWithErrorf(ctx, r, http.StatusInternalServerError, fmt.Sprintf("Bad status code %d recieved from S3", resp.StatusCode)) return } contentdisposition.SetFilename(res, filepath.Base(artifact.RelativePath)) res.Header().Add("Content-Length", strconv.Itoa(int(artifact.Size))) if n, err := io.CopyN(res, resp.Body, artifact.Size); err != nil { sentry.ReportError(ctx, fmt.Errorf("Error transferring artifact (for artifact %s/%s, bytes (%d/%d) read): %s", artifact.BucketId, artifact.Name, n, artifact.Size, err)) return } return case model.UPLOADING: // Not done uploading to S3 yet. Error. LogAndRespondWithErrorf(ctx, r, http.StatusNotFound, "Waiting for content to complete uploading") return case model.APPENDING: fallthrough case model.APPEND_COMPLETE: // Pick from log chunks contentdisposition.SetFilename(res, filepath.Base(artifact.RelativePath)) // All written bytes are immutable. So, unless size changes, all previously read contents can be cached. res.Header().Add("ETag", strconv.Itoa(int(artifact.Size))) http.ServeContent(res, req, filepath.Base(artifact.RelativePath), time.Time{}, newLogChunkReaderWithReadahead(artifact, db)) return case model.WAITING_FOR_UPLOAD: // Not started yet. Error LogAndRespondWithErrorf(ctx, r, http.StatusNotFound, "Waiting for content to get uploaded") return } }
func GetArtifactContent(r render.Render, req *http.Request, res http.ResponseWriter, db database.Database, params martini.Params, s3bucket *s3.Bucket, artifact *model.Artifact) { if artifact == nil { JsonErrorf(r, http.StatusBadRequest, "Error: no artifact specified") return } switch artifact.State { case model.UPLOADED: // Fetch from S3 reader, err := s3bucket.GetReader(artifact.S3URL) if err != nil { JsonErrorf(r, http.StatusInternalServerError, err.Error()) return } // Ideally, we'll use a Hijacker to take over the conn so that we can employ an io.Writer // instead of loading the entire file into memory before writing it back out. But, for now, we // will run the risk of OOM if large files need to be served. var buf bytes.Buffer _, err = buf.ReadFrom(reader) if err != nil { JsonErrorf(r, http.StatusInternalServerError, "Error reading upload buffer: %s", err.Error()) return } res.Write(buf.Bytes()) return case model.UPLOADING: // Not done uploading to S3 yet. Error. r.JSON(http.StatusNotFound, map[string]string{"error": "Waiting for content to complete uploading"}) return case model.APPENDING: fallthrough case model.APPEND_COMPLETE: // Pick from log chunks logChunks, err := db.ListLogChunksInArtifact(artifact.Id) if err != nil { JsonErrorf(r, http.StatusInternalServerError, err.Error()) return } var buf bytes.Buffer for _, logChunk := range logChunks { buf.WriteString(logChunk.Content) } res.Write(buf.Bytes()) return case model.WAITING_FOR_UPLOAD: // Not started yet. Error JsonErrorf(r, http.StatusNotFound, "Waiting for content to get uploaded") return } }
func killBucket(b *s3.Bucket) { var err error for attempt := attempts.Start(); attempt.Next(); { err = b.DelBucket() if err == nil { return } if _, ok := err.(*net.DNSError); ok { return } e, ok := err.(*s3.Error) if ok && e.Code == "NoSuchBucket" { return } if ok && e.Code == "BucketNotEmpty" { // Errors are ignored here. Just retry. resp, err := b.List("", "", "", 1000) if err == nil { for _, key := range resp.Contents { _ = b.Del(key.Key) } } multis, _, _ := b.ListMulti("", "") for _, m := range multis { _ = m.Abort() } } } message := "cannot delete test bucket" if err != nil { message += ": " + err.Error() } panic(message) }
func uploadArtifactToS3(bucket *s3.Bucket, artifactName string, artifactSize int64, contentReader io.ReadSeeker) error { attempts := 0 for { attempts++ // Rewind Seeker to beginning, required if we had already read a few bytes from it before. if _, err := contentReader.Seek(0, os.SEEK_SET); err != nil { return err } if err := bucket.PutReader(artifactName, contentReader, artifactSize, "binary/octet-stream", s3.PublicRead); err != nil { if attempts < MaxUploadAttempts { log.Printf("[Attempt %d/%d] Error uploading to S3: %s", attempts, MaxUploadAttempts, err) continue } return fmt.Errorf("Error uploading to S3: %s", err) } bytesUploadedCounter.Add(artifactSize) return nil } return nil // This should never happen - only here to satisfy the compiler }
// GetArtifactContentChunks lists artifact contents in a chunked form. Useful to poll for updates to // chunked artifacts. All artifact types are supported and chunks can be requested from arbitrary // locations within artifacts. // // This is primarily meant for Changes UI for log following. If you need to fetch byte ranges from the // store, it should be available directly at /content // // URL query parameters offset and limit can be used to control range of chunks to be fetched. // offset -> byte offset of the start of the range to be fetched (defaults to beginning of artifact) // limit -> number of bytes to be fetched (defaults to 100KB) // // Negative values for any query parameter will cause it to be set to 0 (default) func GetArtifactContentChunks(ctx context.Context, r render.Render, req *http.Request, res http.ResponseWriter, db database.Database, s3bucket *s3.Bucket, artifact *model.Artifact) { if artifact == nil { LogAndRespondWithErrorf(ctx, r, http.StatusBadRequest, "No artifact specified") return } type Chunk struct { ID int64 `json:"id"` Offset int64 `json:"offset"` Size int64 `json:"size"` Text string `json:"text"` } type Result struct { Chunks []Chunk `json:"chunks"` EOF bool `json:"eof"` NextOffset int64 `json:"nextOffset"` } byteRangeBegin, byteRangeEnd, err := getByteRangeFromRequest(req, artifact) if err != nil { // If given range is not valid, steer client to a valid range. r.JSON(http.StatusOK, &Result{Chunks: []Chunk{}, EOF: err == errReadBeyondEOF && artifact.State == model.UPLOADED, NextOffset: byteRangeEnd}) return } switch artifact.State { case model.UPLOADING: // No data to report right now. Wait till upload to S3 completes. fallthrough case model.WAITING_FOR_UPLOAD: // Upload hasn't started. No data to report. Try again later. r.JSON(http.StatusOK, &Result{Chunks: []Chunk{}, NextOffset: byteRangeBegin}) return case model.UPLOADED: // Fetch from S3 url := s3bucket.SignedURL(artifact.S3URL, time.Now().Add(30*time.Minute)) rq, err := http.NewRequest("GET", url, nil) if err != nil { LogAndRespondWithError(ctx, r, http.StatusInternalServerError, err) return } rq.Header.Add("Range", fmt.Sprintf("bytes=%d-%d", byteRangeBegin, byteRangeEnd)) resp, err := http.DefaultClient.Do(rq) if err != nil { LogAndRespondWithError(ctx, r, http.StatusInternalServerError, err) return } if resp.StatusCode != http.StatusPartialContent && resp.StatusCode != http.StatusOK { LogAndRespondWithErrorf(ctx, r, http.StatusBadRequest, fmt.Sprintf("Bad status code %d", resp.StatusCode)) return } var buf bytes.Buffer n, err := buf.ReadFrom(resp.Body) if err != nil { LogAndRespondWithError(ctx, r, http.StatusInternalServerError, err) return } nextOffset := byteRangeBegin + int64(n) r.JSON(http.StatusOK, &Result{ Chunks: []Chunk{Chunk{Offset: byteRangeBegin, Size: int64(n), Text: buf.String()}}, EOF: nextOffset == artifact.Size, NextOffset: nextOffset, }) return case model.APPENDING: fallthrough case model.APPEND_COMPLETE: // Pick from log chunks rd := newLogChunkReader(artifact, db) rd.Seek(byteRangeBegin, os.SEEK_SET) bts := make([]byte, byteRangeEnd-byteRangeBegin+1) n, err := runeLimitedRead(rd, bts) if err != nil && err != io.EOF { LogAndRespondWithError(ctx, r, http.StatusInternalServerError, err) return } if n > 0 { r.JSON(http.StatusOK, &Result{ Chunks: []Chunk{Chunk{Offset: byteRangeBegin, Size: int64(n), Text: string(bts[:n])}}, NextOffset: byteRangeBegin + int64(n), }) } else { r.JSON(http.StatusOK, &Result{Chunks: []Chunk{}, NextOffset: byteRangeBegin}) } return } }
func PutArtifact(artifact *model.Artifact, db database.Database, bucket *s3.Bucket, req PutArtifactReq) error { if artifact.State != model.WAITING_FOR_UPLOAD { return fmt.Errorf("Expected artifact to be in state WAITING_FOR_UPLOAD: %s", artifact.State) } // New file being inserted into DB. // Mark status change to UPLOADING and start uploading to S3. // // First, verify that the size of the content being uploaded matches our expected size. var fileSize int64 var err error if req.ContentLength != "" { fileSize, err = strconv.ParseInt(req.ContentLength, 10, 64) // string, base, bits // This should never happen if a sane HTTP client is used. Nonetheless ... if err != nil { return fmt.Errorf("Invalid Content-Length specified") } } else { // This too should never happen if a sane HTTP client is used. Nonetheless ... return fmt.Errorf("Content-Length not specified") } if fileSize != artifact.Size { return fmt.Errorf("Content length %d does not match expected file size %d", fileSize, artifact.Size) } // XXX Do we need to commit here or is this handled transparently? artifact.State = model.UPLOADING if err := db.UpdateArtifact(artifact); err != nil { return err } cleanupAndReturn := func(err error) error { // TODO: Is there a better way to detect and handle errors? // Use a channel to signify upload completion. In defer, check if the channel is empty. If // yes, mark error. Else ignore. if err != nil { // TODO: s/ERROR/WAITING_FOR_UPLOAD/ ? log.Printf("Error uploading to S3: %s\n", err) artifact.State = model.ERROR err2 := db.UpdateArtifact(artifact) if err2 != nil { log.Printf("Error while handling error: %s", err2.Error()) } return err } return nil } fileName := artifact.DefaultS3URL() if err := bucket.PutReader(fileName, req.Body, artifact.Size, "binary/octet-stream", s3.PublicRead); err != nil { return cleanupAndReturn(fmt.Errorf("Error uploading to S3: %s", err)) } artifact.State = model.UPLOADED artifact.S3URL = fileName if err := db.UpdateArtifact(artifact); err != nil { return err } return nil }
// Merges all of the individual chunks into a single object and stores it on s3. // The log chunks are stored in the database, while the object is uploaded to s3. func MergeLogChunks(artifact *model.Artifact, db database.Database, s3bucket *s3.Bucket) error { switch artifact.State { case model.APPEND_COMPLETE: // TODO: Reimplement using GorpDatabase // If the file is empty, don't bother creating an object on S3. if artifact.Size == 0 { artifact.State = model.CLOSED_WITHOUT_DATA artifact.S3URL = "" // Conversion between *DatabaseEror and error is tricky. If we don't do this, a nil // *DatabaseError can become a non-nil error. return db.UpdateArtifact(artifact).GetError() } // XXX Do we need to commit here or is this handled transparently? artifact.State = model.UPLOADING if err := db.UpdateArtifact(artifact); err != nil { return err } logChunks, err := db.ListLogChunksInArtifact(artifact.Id) if err != nil { return err } r, w := io.Pipe() errChan := make(chan error) uploadCompleteChan := make(chan bool) fileName := artifact.DefaultS3URL() // Asynchronously upload the object to s3 while reading from the r, w // pipe. Thus anything written to "w" will be sent to S3. go func() { defer close(errChan) defer close(uploadCompleteChan) defer r.Close() if err := s3bucket.PutReader(fileName, r, artifact.Size, "binary/octet-stream", s3.PublicRead); err != nil { errChan <- fmt.Errorf("Error uploading to S3: %s", err) return } uploadCompleteChan <- true }() for _, logChunk := range logChunks { w.Write([]byte(logChunk.Content)) } w.Close() // Wait either for S3 upload to complete or for it to fail with an error. // XXX This is a long operation and should probably be asynchronous from the // actual HTTP request, and the client should poll to check when its uploaded. select { case _ = <-uploadCompleteChan: artifact.State = model.UPLOADED artifact.S3URL = fileName if err := db.UpdateArtifact(artifact); err != nil { return err } // From this point onwards, we will not send back any errors back to the user. If we are // unable to delete logchunks, we log it to Sentry instead. if n, err := db.DeleteLogChunksForArtifact(artifact.Id); err != nil { // TODO: Send this error to Sentry log.Printf("Error deleting logchunks for artifact %d: %v\n", artifact.Id, err) return nil } else if n != int64(len(logChunks)) { // TODO: Send this error to Sentry log.Printf("Mismatch in number of logchunks while deleting logchunks for artifact %d:"+ "Expected: %d Actual: %d\n", artifact.Id, len(logChunks), n) } return nil case err := <-errChan: return err } case model.WAITING_FOR_UPLOAD: fallthrough case model.ERROR: fallthrough case model.APPENDING: fallthrough case model.UPLOADED: fallthrough case model.UPLOADING: return fmt.Errorf("Artifact can only be merged when in APPEND_COMPLETE state, but state is %s", artifact.State) default: return fmt.Errorf("Illegal artifact state! State code is %d", artifact.State) } }