func (fs *CamliFileSystem) fetchSchemaSuperset(br *blobref.BlobRef) (*schema.Superset, os.Error) { blobStr := br.String() if ss, ok := fs.blobToSchema.Get(blobStr); ok { return ss.(*schema.Superset), nil } log.Printf("schema cache MISS on %q", blobStr) rsc, _, err := fs.fetcher.Fetch(br) if err != nil { return nil, err } defer rsc.Close() jd := json.NewDecoder(rsc) ss := new(schema.Superset) err = jd.Decode(ss) if err != nil { log.Printf("Error parsing %s as schema blob: %v", br, err) return nil, os.EINVAL } if ss.Type == "" { log.Printf("blob %s is JSON but lacks camliType", br) return nil, os.EINVAL } ss.BlobRef = br fs.blobToSchema.Add(blobStr, ss) return ss, nil }
func (mi *Indexer) populateFile(client *mysql.Client, blobRef *blobref.BlobRef, ss *schema.Superset) (err os.Error) { if ss.Fragment { return nil } seekFetcher, err := blobref.SeekerFromStreamingFetcher(mi.BlobSource) if err != nil { return err } sha1 := sha1.New() fr := ss.NewFileReader(seekFetcher) mime, reader := magic.MimeTypeFromReader(fr) n, err := io.Copy(sha1, reader) if err != nil { // TODO: job scheduling system to retry this spaced // out max n times. Right now our options are // ignoring this error (forever) or returning the // error and making the indexing try again (likely // forever failing). Both options suck. For now just // log and act like all's okay. log.Printf("mysqlindex: error indexing file %s: %v", blobRef, err) return nil } attrs := []string{} if ss.UnixPermission != "" { attrs = append(attrs, "perm") } if ss.UnixOwnerId != 0 || ss.UnixOwner != "" || ss.UnixGroupId != 0 || ss.UnixGroup != "" { attrs = append(attrs, "owner") } if ss.UnixMtime != "" || ss.UnixCtime != "" || ss.UnixAtime != "" { attrs = append(attrs, "time") } log.Printf("file %s blobref is %s, size %d", blobRef, blobref.FromHash("sha1", sha1), n) err = execSQL(client, "INSERT IGNORE INTO files (fileschemaref, bytesref, size, filename, mime, setattrs) VALUES (?, ?, ?, ?, ?, ?)", blobRef.String(), blobref.FromHash("sha1", sha1).String(), n, ss.FileNameString(), mime, strings.Join(attrs, ",")) return }
func (mi *Indexer) populateFile(blobRef *blobref.BlobRef, ss *schema.Superset) (err os.Error) { seekFetcher, err := blobref.SeekerFromStreamingFetcher(mi.BlobSource) if err != nil { return err } sha1 := sha1.New() fr, err := ss.NewFileReader(seekFetcher) if err != nil { log.Printf("mysqlindex: error indexing file %s: %v", blobRef, err) return nil } mime, reader := magic.MimeTypeFromReader(fr) n, err := io.Copy(sha1, reader) if err != nil { // TODO: job scheduling system to retry this spaced // out max n times. Right now our options are // ignoring this error (forever) or returning the // error and making the indexing try again (likely // forever failing). Both options suck. For now just // log and act like all's okay. log.Printf("mysqlindex: error indexing file %s: %v", blobRef, err) return nil } log.Printf("file %s blobref is %s, size %d", blobRef, blobref.FromHash("sha1", sha1), n) err = mi.db.Execute( "INSERT IGNORE INTO bytesfiles (schemaref, camlitype, wholedigest, size, filename, mime) VALUES (?, ?, ?, ?, ?, ?)", blobRef.String(), "file", blobref.FromHash("sha1", sha1).String(), n, ss.FileNameString(), mime, ) return }