// TODO(aa): // * Parallelize: http://golang.org/doc/effective_go.html#concurrency // * Do more than one "page" worth of results // * Report progress and errors back through host interface // * All the rest of the metadata (see photoMeta) // * Conflicts: For all metadata changes, prefer any non-imported claims // * Test! func (r *run) importPhoto(parent *importer.Object, photo *photosSearchItem) error { filename := fmt.Sprintf("%s.%s", photo.Id, photo.OriginalFormat) photoNode, err := parent.ChildPathObject(filename) if err != nil { return err } // https://www.flickr.com/services/api/misc.dates.html dateTaken, err := time.ParseInLocation("2006-01-02 15:04:05", photo.DateTaken, schema.UnknownLocation) if err != nil { // default to the published date otherwise log.Printf("Flickr importer: problem with date taken of photo %v, defaulting to published date instead.", photo.Id) seconds, err := strconv.ParseInt(photo.DateUpload, 10, 64) if err != nil { return fmt.Errorf("could not parse date upload time %q for image %v: %v", photo.DateUpload, photo.Id, err) } dateTaken = time.Unix(seconds, 0) } attrs := []string{ attrFlickrId, photo.Id, nodeattr.DateCreated, schema.RFC3339FromTime(dateTaken), nodeattr.Description, photo.Description.Content, } if schema.IsInterestingTitle(photo.Title) { attrs = append(attrs, nodeattr.Title, photo.Title) } // Import all the metadata. SetAttrs() is a no-op if the value hasn't changed, so there's no cost to doing these on every run. // And this way if we add more things to import, they will get picked up. if err := photoNode.SetAttrs(attrs...); err != nil { return err } // Import the photo itself. Since it is expensive to fetch the image, we store its lastupdate and only refetch if it might have changed. // lastupdate is a Unix timestamp according to https://www.flickr.com/services/api/flickr.photos.getInfo.html seconds, err := strconv.ParseInt(photo.LastUpdate, 10, 64) if err != nil { return fmt.Errorf("could not parse lastupdate time for image %v: %v", photo.Id, err) } lastUpdate := time.Unix(seconds, 0) if lastUpdateString := photoNode.Attr(nodeattr.DateModified); lastUpdateString != "" { oldLastUpdate, err := time.Parse(time.RFC3339, lastUpdateString) if err != nil { return fmt.Errorf("could not parse last stored update time for image %v: %v", photo.Id, err) } if lastUpdate.Equal(oldLastUpdate) { if err := r.updatePrimaryPhoto(photoNode); err != nil { return err } return nil } } form := url.Values{} form.Set("user_id", r.userID) res, err := r.fetch(photo.URL, form) if err != nil { log.Printf("Flickr importer: Could not fetch %s: %s", photo.URL, err) return err } defer res.Body.Close() fileRef, err := schema.WriteFileFromReader(r.Host.Target(), filename, res.Body) if err != nil { return err } if err := photoNode.SetAttr(nodeattr.CamliContent, fileRef.String()); err != nil { return err } if err := r.updatePrimaryPhoto(photoNode); err != nil { return err } // Write lastupdate last, so that if any of the preceding fails, we will try again next time. if err := photoNode.SetAttr(nodeattr.DateModified, schema.RFC3339FromTime(lastUpdate)); err != nil { return err } return nil }
func (r *run) updatePhotoInAlbum(ctx context.Context, albumNode *importer.Object, photo picago.Photo) (ret error) { if photo.ID == "" { return errors.New("photo has no ID") } getMediaBytes := func() (io.ReadCloser, error) { log.Printf("Importing media from %v", photo.URL) resp, err := ctxutil.Client(ctx).Get(photo.URL) if err != nil { return nil, fmt.Errorf("importing photo %s: %v", photo.ID, err) } if resp.StatusCode != http.StatusOK { resp.Body.Close() return nil, fmt.Errorf("importing photo %s: status code = %d", photo.ID, resp.StatusCode) } return resp.Body, nil } var fileRefStr string idFilename := photo.ID + "-" + photo.Filename photoNode, err := albumNode.ChildPathObjectOrFunc(idFilename, func() (*importer.Object, error) { h := blob.NewHash() rc, err := getMediaBytes() if err != nil { return nil, err } fileRef, err := schema.WriteFileFromReader(r.Host.Target(), photo.Filename, io.TeeReader(rc, h)) if err != nil { return nil, err } fileRefStr = fileRef.String() wholeRef := blob.RefFromHash(h) if pn, err := findExistingPermanode(r.Host.Searcher(), wholeRef); err == nil { return r.Host.ObjectFromRef(pn) } return r.Host.NewObject() }) if err != nil { return err } const attrMediaURL = "picasaMediaURL" if fileRefStr == "" { fileRefStr = photoNode.Attr(nodeattr.CamliContent) // Only re-download the source photo if its URL has changed. // Empirically this seems to work: cropping a photo in the // photos.google.com UI causes its URL to change. And it makes // sense, looking at the ugliness of the URLs with all their // encoded/signed state. if !mediaURLsEqual(photoNode.Attr(attrMediaURL), photo.URL) { rc, err := getMediaBytes() if err != nil { return err } fileRef, err := schema.WriteFileFromReader(r.Host.Target(), photo.Filename, rc) rc.Close() if err != nil { return err } fileRefStr = fileRef.String() } } title := strings.TrimSpace(photo.Description) if strings.Contains(title, "\n") { title = title[:strings.Index(title, "\n")] } if title == "" && schema.IsInterestingTitle(photo.Filename) { title = photo.Filename } // TODO(tgulacsi): add more attrs (comments ?) // for names, see http://schema.org/ImageObject and http://schema.org/CreativeWork attrs := []string{ nodeattr.CamliContent, fileRefStr, attrPicasaId, photo.ID, nodeattr.Title, title, nodeattr.Description, photo.Description, nodeattr.LocationText, photo.Location, nodeattr.DateModified, schema.RFC3339FromTime(photo.Updated), nodeattr.DatePublished, schema.RFC3339FromTime(photo.Published), nodeattr.URL, photo.PageURL, } if photo.Latitude != 0 || photo.Longitude != 0 { attrs = append(attrs, nodeattr.Latitude, fmt.Sprintf("%f", photo.Latitude), nodeattr.Longitude, fmt.Sprintf("%f", photo.Longitude), ) } if err := photoNode.SetAttrs(attrs...); err != nil { return err } if err := photoNode.SetAttrValues("tag", photo.Keywords); err != nil { return err } if photo.Position > 0 { if err := albumNode.SetAttr( nodeattr.CamliPathOrderColon+strconv.Itoa(photo.Position-1), photoNode.PermanodeRef().String()); err != nil { return err } } // Do this last, after we're sure the "camliContent" attribute // has been saved successfully, because this is the one that // causes us to do it again in the future or not. if err := photoNode.SetAttrs(attrMediaURL, photo.URL); err != nil { return err } return nil }