func listActors(db *imdb.DB, ractor, ractress io.ReadCloser) (err error) { defer csql.Safe(&err) logf("Reading actors list...") // PostgreSQL wants different transactions for each inserter. // SQLite can't handle them. The wrapper type here ensures that // PostgreSQL gets multiple transactions while SQLite only gets one. tx, err := db.Begin() csql.Panic(err) txactor := wrapTx(db, tx) txcredit := txactor.another() txname := txactor.another() txatom := txactor.another() // Drop data from the actor and credit tables. They will be rebuilt below. // The key here is to leave the atom and name tables alone. Invariably, // they will contain stale data. But the only side effect, I think, is // taking up space. // (Stale data can be removed with 'goim clean'.) csql.Truncate(txactor, db.Driver, "actor") csql.Truncate(txcredit.Tx, db.Driver, "credit") actIns, err := csql.NewInserter(txactor.Tx, db.Driver, "actor", "atom_id", "sequence") csql.Panic(err) credIns, err := csql.NewInserter(txcredit.Tx, db.Driver, "credit", "actor_atom_id", "media_atom_id", "character", "position", "attrs") csql.Panic(err) nameIns, err := csql.NewInserter(txname.Tx, db.Driver, "name", "atom_id", "name") csql.Panic(err) atoms, err := newAtomizer(db, txatom.Tx) csql.Panic(err) // Unfortunately, it looks like credits for an actor can appear in // multiple locations. (Or there are different actors that erroneously // have the same name.) added := make(map[imdb.Atom]struct{}, 3000000) n1, nc1 := listActs(db, ractress, atoms, added, actIns, credIns, nameIns) n2, nc2 := listActs(db, ractor, atoms, added, actIns, credIns, nameIns) csql.Panic(actIns.Exec()) csql.Panic(credIns.Exec()) csql.Panic(nameIns.Exec()) csql.Panic(atoms.Close()) csql.Panic(txactor.Commit()) csql.Panic(txcredit.Commit()) csql.Panic(txname.Commit()) csql.Panic(txatom.Commit()) logf("Done. Added %d actors/actresses and %d credits.", n1+n2, nc1+nc2) return }
func startSimpleLoad(db *imdb.DB, table string, columns ...string) *simpleLoad { logf("Reading list to populate table %s...", table) tx, err := db.Begin() csql.Panic(err) csql.Truncate(tx, db.Driver, table) ins, err := csql.NewInserter(tx, db.Driver, table, columns...) csql.Panic(err) atoms, err := newAtomizer(db, nil) // read only csql.Panic(err) return &simpleLoad{db, tx, table, 0, ins, atoms} }
func listMovies(db *imdb.DB, movies io.ReadCloser) (err error) { defer csql.Safe(&err) logf("Reading movies list...") addedMovies, addedTvshows, addedEpisodes := 0, 0, 0 // PostgreSQL wants different transactions for each inserter. // SQLite can't handle them. The wrapper type here ensures that // PostgreSQL gets multiple transactions while SQLite only gets one. tx, err := db.Begin() csql.Panic(err) txmovie := wrapTx(db, tx) txtv := txmovie.another() txepisode := txmovie.another() txname := txmovie.another() txatom := txmovie.another() // Drop data from the movie, tvshow and episode tables. They will be // rebuilt below. // The key here is to leave the atom and name tables alone. Invariably, // they will contain stale data. But the only side effect, I think, is // taking up space. // (Stale data can be removed with 'goim clean'.) csql.Truncate(txmovie, db.Driver, "movie") csql.Truncate(txtv, db.Driver, "tvshow") csql.Truncate(txepisode, db.Driver, "episode") mvIns, err := csql.NewInserter(txmovie.Tx, db.Driver, "movie", "atom_id", "year", "sequence", "tv", "video") csql.Panic(err) tvIns, err := csql.NewInserter(txtv.Tx, db.Driver, "tvshow", "atom_id", "year", "sequence", "year_start", "year_end") csql.Panic(err) epIns, err := csql.NewInserter(txepisode.Tx, db.Driver, "episode", "atom_id", "tvshow_atom_id", "year", "season", "episode_num") csql.Panic(err) nameIns, err := csql.NewInserter(txname.Tx, db.Driver, "name", "atom_id", "name") csql.Panic(err) atoms, err := newAtomizer(db, txatom.Tx) csql.Panic(err) defer func() { csql.Panic(mvIns.Exec()) csql.Panic(tvIns.Exec()) csql.Panic(epIns.Exec()) csql.Panic(nameIns.Exec()) csql.Panic(atoms.Close()) csql.Panic(txmovie.Commit()) csql.Panic(txtv.Commit()) csql.Panic(txepisode.Commit()) csql.Panic(txname.Commit()) csql.Panic(txatom.Commit()) logf("Done. Added %d movies, %d tv shows and %d episodes.", addedMovies, addedTvshows, addedEpisodes) }() listLines(movies, func(line []byte) { line = bytes.TrimSpace(line) fields := splitListLine(line) if len(fields) <= 1 { return } item, value := fields[0], fields[1] switch ent := mediaType(item); ent { case imdb.EntityMovie: m := imdb.Movie{} if !parseMovie(item, &m) { return } if existed, err := parseId(atoms, item, &m.Id); err != nil { csql.Panic(err) } else if !existed { // We only add a name when we add an atom. if err = nameIns.Exec(m.Id, m.Title); err != nil { logf("Full movie info (that failed to add): %#v", m) csql.Panic(ef("Could not add name '%s': %s", m, err)) } } err := mvIns.Exec(m.Id, m.Year, m.Sequence, m.Tv, m.Video) if err != nil { logf("Full movie info (that failed to add): %#v", m) csql.Panic(ef("Could not add movie '%s': %s", m, err)) } addedMovies++ case imdb.EntityTvshow: tv := imdb.Tvshow{} if !parseTvshow(item, &tv) { return } if !parseTvshowRange(value, &tv) { return } if existed, err := parseId(atoms, item, &tv.Id); err != nil { csql.Panic(err) } else if !existed { // We only add a name when we add an atom. if err = nameIns.Exec(tv.Id, tv.Title); err != nil { logf("Full tvshow info (that failed to add): %#v", tv) csql.Panic(ef("Could not add name '%s': %s", tv, err)) } } err := tvIns.Exec(tv.Id, tv.Year, tv.Sequence, tv.YearStart, tv.YearEnd) if err != nil { logf("Full tvshow info (that failed to add): %#v", tv) csql.Panic(ef("Could not add tvshow '%s': %s", tv, err)) } addedTvshows++ case imdb.EntityEpisode: ep := imdb.Episode{} if !parseEpisode(atoms, item, &ep) { return } if !parseEpisodeYear(value, &ep) { return } if existed, err := parseId(atoms, item, &ep.Id); err != nil { csql.Panic(err) } else if !existed { // We only add a name when we add an atom. if err = nameIns.Exec(ep.Id, ep.Title); err != nil { logf("Full episode info (that failed to add): %#v", ep) csql.Panic(ef("Could not add name '%s': %s", ep, err)) } } err := epIns.Exec(ep.Id, ep.TvshowId, ep.Year, ep.Season, ep.EpisodeNum) if err != nil { logf("Full episode info (that failed to add): %#v", ep) csql.Panic(ef("Could not add episode '%s': %s", ep, err)) } addedEpisodes++ default: csql.Panic(ef("Unrecognized entity %s", ent)) } }) return }