Ejemplo n.º 1
0
// TODO(aa):
// * Parallelize: http://golang.org/doc/effective_go.html#concurrency
// * Do more than one "page" worth of results
// * Report progress and errors back through host interface
// * All the rest of the metadata (see photoMeta)
// * Conflicts: For all metadata changes, prefer any non-imported claims
// * Test!
func (r *run) importPhoto(parent *importer.Object, photo *photosSearchItem) error {
	filename := fmt.Sprintf("%s.%s", photo.Id, photo.OriginalFormat)
	photoNode, err := parent.ChildPathObject(filename)
	if err != nil {
		return err
	}

	// https://www.flickr.com/services/api/misc.dates.html
	dateTaken, err := time.ParseInLocation("2006-01-02 15:04:05", photo.DateTaken, schema.UnknownLocation)
	if err != nil {
		// default to the published date otherwise
		log.Printf("Flickr importer: problem with date taken of photo %v, defaulting to published date instead.", photo.Id)
		seconds, err := strconv.ParseInt(photo.DateUpload, 10, 64)
		if err != nil {
			return fmt.Errorf("could not parse date upload time %q for image %v: %v", photo.DateUpload, photo.Id, err)
		}
		dateTaken = time.Unix(seconds, 0)
	}

	attrs := []string{
		attrFlickrId, photo.Id,
		nodeattr.DateCreated, schema.RFC3339FromTime(dateTaken),
		nodeattr.Description, photo.Description.Content,
	}
	if schema.IsInterestingTitle(photo.Title) {
		attrs = append(attrs, nodeattr.Title, photo.Title)
	}
	// Import all the metadata. SetAttrs() is a no-op if the value hasn't changed, so there's no cost to doing these on every run.
	// And this way if we add more things to import, they will get picked up.
	if err := photoNode.SetAttrs(attrs...); err != nil {
		return err
	}

	// Import the photo itself. Since it is expensive to fetch the image, we store its lastupdate and only refetch if it might have changed.
	// lastupdate is a Unix timestamp according to https://www.flickr.com/services/api/flickr.photos.getInfo.html
	seconds, err := strconv.ParseInt(photo.LastUpdate, 10, 64)
	if err != nil {
		return fmt.Errorf("could not parse lastupdate time for image %v: %v", photo.Id, err)
	}
	lastUpdate := time.Unix(seconds, 0)
	if lastUpdateString := photoNode.Attr(nodeattr.DateModified); lastUpdateString != "" {
		oldLastUpdate, err := time.Parse(time.RFC3339, lastUpdateString)
		if err != nil {
			return fmt.Errorf("could not parse last stored update time for image %v: %v", photo.Id, err)
		}
		if lastUpdate.Equal(oldLastUpdate) {
			if err := r.updatePrimaryPhoto(photoNode); err != nil {
				return err
			}
			return nil
		}
	}
	form := url.Values{}
	form.Set("user_id", r.userID)
	res, err := r.fetch(photo.URL, form)
	if err != nil {
		log.Printf("Flickr importer: Could not fetch %s: %s", photo.URL, err)
		return err
	}
	defer res.Body.Close()

	fileRef, err := schema.WriteFileFromReader(r.Host.Target(), filename, res.Body)
	if err != nil {
		return err
	}
	if err := photoNode.SetAttr(nodeattr.CamliContent, fileRef.String()); err != nil {
		return err
	}
	if err := r.updatePrimaryPhoto(photoNode); err != nil {
		return err
	}
	// Write lastupdate last, so that if any of the preceding fails, we will try again next time.
	if err := photoNode.SetAttr(nodeattr.DateModified, schema.RFC3339FromTime(lastUpdate)); err != nil {
		return err
	}

	return nil
}
Ejemplo n.º 2
0
func (r *run) updatePhotoInAlbum(ctx context.Context, albumNode *importer.Object, photo picago.Photo) (ret error) {
	if photo.ID == "" {
		return errors.New("photo has no ID")
	}

	getMediaBytes := func() (io.ReadCloser, error) {
		log.Printf("Importing media from %v", photo.URL)
		resp, err := ctxutil.Client(ctx).Get(photo.URL)
		if err != nil {
			return nil, fmt.Errorf("importing photo %s: %v", photo.ID, err)
		}
		if resp.StatusCode != http.StatusOK {
			resp.Body.Close()
			return nil, fmt.Errorf("importing photo %s: status code = %d", photo.ID, resp.StatusCode)
		}
		return resp.Body, nil
	}

	var fileRefStr string
	idFilename := photo.ID + "-" + photo.Filename
	photoNode, err := albumNode.ChildPathObjectOrFunc(idFilename, func() (*importer.Object, error) {
		h := blob.NewHash()
		rc, err := getMediaBytes()
		if err != nil {
			return nil, err
		}
		fileRef, err := schema.WriteFileFromReader(r.Host.Target(), photo.Filename, io.TeeReader(rc, h))
		if err != nil {
			return nil, err
		}
		fileRefStr = fileRef.String()
		wholeRef := blob.RefFromHash(h)
		if pn, err := findExistingPermanode(r.Host.Searcher(), wholeRef); err == nil {
			return r.Host.ObjectFromRef(pn)
		}
		return r.Host.NewObject()
	})
	if err != nil {
		return err
	}

	const attrMediaURL = "picasaMediaURL"
	if fileRefStr == "" {
		fileRefStr = photoNode.Attr(nodeattr.CamliContent)
		// Only re-download the source photo if its URL has changed.
		// Empirically this seems to work: cropping a photo in the
		// photos.google.com UI causes its URL to change. And it makes
		// sense, looking at the ugliness of the URLs with all their
		// encoded/signed state.
		if !mediaURLsEqual(photoNode.Attr(attrMediaURL), photo.URL) {
			rc, err := getMediaBytes()
			if err != nil {
				return err
			}
			fileRef, err := schema.WriteFileFromReader(r.Host.Target(), photo.Filename, rc)
			rc.Close()
			if err != nil {
				return err
			}
			fileRefStr = fileRef.String()
		}
	}

	title := strings.TrimSpace(photo.Description)
	if strings.Contains(title, "\n") {
		title = title[:strings.Index(title, "\n")]
	}
	if title == "" && schema.IsInterestingTitle(photo.Filename) {
		title = photo.Filename
	}

	// TODO(tgulacsi): add more attrs (comments ?)
	// for names, see http://schema.org/ImageObject and http://schema.org/CreativeWork
	attrs := []string{
		nodeattr.CamliContent, fileRefStr,
		attrPicasaId, photo.ID,
		nodeattr.Title, title,
		nodeattr.Description, photo.Description,
		nodeattr.LocationText, photo.Location,
		nodeattr.DateModified, schema.RFC3339FromTime(photo.Updated),
		nodeattr.DatePublished, schema.RFC3339FromTime(photo.Published),
		nodeattr.URL, photo.PageURL,
	}
	if photo.Latitude != 0 || photo.Longitude != 0 {
		attrs = append(attrs,
			nodeattr.Latitude, fmt.Sprintf("%f", photo.Latitude),
			nodeattr.Longitude, fmt.Sprintf("%f", photo.Longitude),
		)
	}
	if err := photoNode.SetAttrs(attrs...); err != nil {
		return err
	}
	if err := photoNode.SetAttrValues("tag", photo.Keywords); err != nil {
		return err
	}
	if photo.Position > 0 {
		if err := albumNode.SetAttr(
			nodeattr.CamliPathOrderColon+strconv.Itoa(photo.Position-1),
			photoNode.PermanodeRef().String()); err != nil {
			return err
		}
	}

	// Do this last, after we're sure the "camliContent" attribute
	// has been saved successfully, because this is the one that
	// causes us to do it again in the future or not.
	if err := photoNode.SetAttrs(attrMediaURL, photo.URL); err != nil {
		return err
	}
	return nil
}