Ejemplo n.º 1
0
func listActors(db *imdb.DB, ractor, ractress io.ReadCloser) (err error) {
	defer csql.Safe(&err)

	logf("Reading actors list...")

	// PostgreSQL wants different transactions for each inserter.
	// SQLite can't handle them. The wrapper type here ensures that
	// PostgreSQL gets multiple transactions while SQLite only gets one.
	tx, err := db.Begin()
	csql.Panic(err)

	txactor := wrapTx(db, tx)
	txcredit := txactor.another()
	txname := txactor.another()
	txatom := txactor.another()

	// Drop data from the actor and credit tables. They will be rebuilt below.
	// The key here is to leave the atom and name tables alone. Invariably,
	// they will contain stale data. But the only side effect, I think, is
	// taking up space.
	// (Stale data can be removed with 'goim clean'.)
	csql.Truncate(txactor, db.Driver, "actor")
	csql.Truncate(txcredit.Tx, db.Driver, "credit")

	actIns, err := csql.NewInserter(txactor.Tx, db.Driver, "actor",
		"atom_id", "sequence")
	csql.Panic(err)
	credIns, err := csql.NewInserter(txcredit.Tx, db.Driver, "credit",
		"actor_atom_id", "media_atom_id", "character", "position", "attrs")
	csql.Panic(err)
	nameIns, err := csql.NewInserter(txname.Tx, db.Driver, "name",
		"atom_id", "name")
	csql.Panic(err)
	atoms, err := newAtomizer(db, txatom.Tx)
	csql.Panic(err)

	// Unfortunately, it looks like credits for an actor can appear in
	// multiple locations. (Or there are different actors that erroneously
	// have the same name.)
	added := make(map[imdb.Atom]struct{}, 3000000)
	n1, nc1 := listActs(db, ractress, atoms, added, actIns, credIns, nameIns)
	n2, nc2 := listActs(db, ractor, atoms, added, actIns, credIns, nameIns)

	csql.Panic(actIns.Exec())
	csql.Panic(credIns.Exec())
	csql.Panic(nameIns.Exec())
	csql.Panic(atoms.Close())

	csql.Panic(txactor.Commit())
	csql.Panic(txcredit.Commit())
	csql.Panic(txname.Commit())
	csql.Panic(txatom.Commit())

	logf("Done. Added %d actors/actresses and %d credits.", n1+n2, nc1+nc2)
	return
}
Ejemplo n.º 2
0
func startSimpleLoad(db *imdb.DB, table string, columns ...string) *simpleLoad {
	logf("Reading list to populate table %s...", table)

	tx, err := db.Begin()
	csql.Panic(err)
	csql.Truncate(tx, db.Driver, table)
	ins, err := csql.NewInserter(tx, db.Driver, table, columns...)
	csql.Panic(err)
	atoms, err := newAtomizer(db, nil) // read only
	csql.Panic(err)
	return &simpleLoad{db, tx, table, 0, ins, atoms}
}
Ejemplo n.º 3
0
// newAtomizer returns an atomizer that can be used to access or create new
// atom identifiers. Note that if tx is nil, then the atomizer returned is
// read-only (attempting to write will cause a panic).
//
// A read-only atomizer may be accessed from multiple goroutines
// simultaneously, but a read/write atomizer may NOT.
//
// If a read/write atomizer is created, then the caller is responsible for
// closing the transaction (which should be done immediately after a call to
// atomizer.Close).
//
// Note that this function loads the entire set of atoms from the database
// into memory, so it is costly.
func newAtomizer(db *imdb.DB, tx *sql.Tx) (az *atomizer, err error) {
	defer csql.Safe(&err)

	az = &atomizer{db, make(atomMap, 1000000), 0, nil}
	if tx != nil {
		var err error
		az.ins, err = csql.NewInserter(
			tx, db.Driver, "atom", "id", "hash")
		csql.Panic(err)
	}

	rs := csql.Query(db, "SELECT id, hash FROM atom ORDER BY id ASC")
	csql.ForRow(rs, az.readRow)
	az.nextId++
	return
}
Ejemplo n.º 4
0
func listMovies(db *imdb.DB, movies io.ReadCloser) (err error) {
	defer csql.Safe(&err)

	logf("Reading movies list...")
	addedMovies, addedTvshows, addedEpisodes := 0, 0, 0

	// PostgreSQL wants different transactions for each inserter.
	// SQLite can't handle them. The wrapper type here ensures that
	// PostgreSQL gets multiple transactions while SQLite only gets one.
	tx, err := db.Begin()
	csql.Panic(err)

	txmovie := wrapTx(db, tx)
	txtv := txmovie.another()
	txepisode := txmovie.another()
	txname := txmovie.another()
	txatom := txmovie.another()

	// Drop data from the movie, tvshow and episode tables. They will be
	// rebuilt below.
	// The key here is to leave the atom and name tables alone. Invariably,
	// they will contain stale data. But the only side effect, I think, is
	// taking up space.
	// (Stale data can be removed with 'goim clean'.)
	csql.Truncate(txmovie, db.Driver, "movie")
	csql.Truncate(txtv, db.Driver, "tvshow")
	csql.Truncate(txepisode, db.Driver, "episode")

	mvIns, err := csql.NewInserter(txmovie.Tx, db.Driver, "movie",
		"atom_id", "year", "sequence", "tv", "video")
	csql.Panic(err)
	tvIns, err := csql.NewInserter(txtv.Tx, db.Driver, "tvshow",
		"atom_id", "year", "sequence", "year_start", "year_end")
	csql.Panic(err)
	epIns, err := csql.NewInserter(txepisode.Tx, db.Driver, "episode",
		"atom_id", "tvshow_atom_id", "year", "season", "episode_num")
	csql.Panic(err)
	nameIns, err := csql.NewInserter(txname.Tx, db.Driver, "name",
		"atom_id", "name")
	csql.Panic(err)
	atoms, err := newAtomizer(db, txatom.Tx)
	csql.Panic(err)

	defer func() {
		csql.Panic(mvIns.Exec())
		csql.Panic(tvIns.Exec())
		csql.Panic(epIns.Exec())
		csql.Panic(nameIns.Exec())
		csql.Panic(atoms.Close())

		csql.Panic(txmovie.Commit())
		csql.Panic(txtv.Commit())
		csql.Panic(txepisode.Commit())
		csql.Panic(txname.Commit())
		csql.Panic(txatom.Commit())

		logf("Done. Added %d movies, %d tv shows and %d episodes.",
			addedMovies, addedTvshows, addedEpisodes)
	}()

	listLines(movies, func(line []byte) {
		line = bytes.TrimSpace(line)
		fields := splitListLine(line)
		if len(fields) <= 1 {
			return
		}
		item, value := fields[0], fields[1]
		switch ent := mediaType(item); ent {
		case imdb.EntityMovie:
			m := imdb.Movie{}
			if !parseMovie(item, &m) {
				return
			}
			if existed, err := parseId(atoms, item, &m.Id); err != nil {
				csql.Panic(err)
			} else if !existed {
				// We only add a name when we add an atom.
				if err = nameIns.Exec(m.Id, m.Title); err != nil {
					logf("Full movie info (that failed to add): %#v", m)
					csql.Panic(ef("Could not add name '%s': %s", m, err))
				}
			}
			err := mvIns.Exec(m.Id, m.Year, m.Sequence, m.Tv, m.Video)
			if err != nil {
				logf("Full movie info (that failed to add): %#v", m)
				csql.Panic(ef("Could not add movie '%s': %s", m, err))
			}
			addedMovies++
		case imdb.EntityTvshow:
			tv := imdb.Tvshow{}
			if !parseTvshow(item, &tv) {
				return
			}
			if !parseTvshowRange(value, &tv) {
				return
			}
			if existed, err := parseId(atoms, item, &tv.Id); err != nil {
				csql.Panic(err)
			} else if !existed {
				// We only add a name when we add an atom.
				if err = nameIns.Exec(tv.Id, tv.Title); err != nil {
					logf("Full tvshow info (that failed to add): %#v", tv)
					csql.Panic(ef("Could not add name '%s': %s", tv, err))
				}
			}
			err := tvIns.Exec(tv.Id, tv.Year, tv.Sequence,
				tv.YearStart, tv.YearEnd)
			if err != nil {
				logf("Full tvshow info (that failed to add): %#v", tv)
				csql.Panic(ef("Could not add tvshow '%s': %s", tv, err))
			}
			addedTvshows++
		case imdb.EntityEpisode:
			ep := imdb.Episode{}
			if !parseEpisode(atoms, item, &ep) {
				return
			}
			if !parseEpisodeYear(value, &ep) {
				return
			}
			if existed, err := parseId(atoms, item, &ep.Id); err != nil {
				csql.Panic(err)
			} else if !existed {
				// We only add a name when we add an atom.
				if err = nameIns.Exec(ep.Id, ep.Title); err != nil {
					logf("Full episode info (that failed to add): %#v", ep)
					csql.Panic(ef("Could not add name '%s': %s", ep, err))
				}
			}
			err := epIns.Exec(ep.Id, ep.TvshowId, ep.Year,
				ep.Season, ep.EpisodeNum)
			if err != nil {
				logf("Full episode info (that failed to add): %#v", ep)
				csql.Panic(ef("Could not add episode '%s': %s", ep, err))
			}
			addedEpisodes++
		default:
			csql.Panic(ef("Unrecognized entity %s", ent))
		}
	})
	return
}