Esempio n. 1
0
// Tables returns the names of all tables in the database sorted
// alphabetically in ascending order.
func (db *DB) Tables() (tables []string, err error) {
	defer csql.Safe(&err)

	var q string
	switch db.Driver {
	case "postgres":
		q = `
			SELECT tablename FROM pg_tables
			WHERE schemaname NOT IN ('pg_catalog', 'information_schema')
			ORDER BY tablename ASC
		`
	case "sqlite3":
		q = `
			SELECT tbl_name FROM sqlite_master
			WHERE type = 'table'
			ORDER BY tbl_name ASC
		`
	default:
		return nil, ef("Unrecognized database driver: %s", db.Driver)
	}
	rows := csql.Query(db, q)
	csql.ForRow(rows, func(rs csql.RowScanner) {
		var table string
		csql.Scan(rs, &table)
		if table != "migration_version" {
			tables = append(tables, table)
		}
	})
	return
}
Esempio n. 2
0
// Set associates the plain text password given with the user that is uniquely
// identified by id. The password is hashed with bcrypt. If there is a problem
// with hashing or with storing the password, an error is returned.
//
// This may be called on a new user.
func (s *Store) Set(id, password string) (cerr error) {
	defer csql.Safe(&cerr)

	hash, err := bcrypt.GenerateFromPassword(
		[]byte(password), bcrypt.DefaultCost)
	if err != nil {
		return err
	}

	// This lock can be avoided if we use some sort of upsert.
	// It's possible with Postgres, but this is just way easier.
	locker.Lock(id)
	defer locker.Unlock(id)

	n := csql.Count(s, `
		SELECT COUNT(*) FROM `+SqlTableName+` WHERE id = $1
		`, id)
	if n == 0 {
		csql.Exec(s, `
			INSERT INTO `+SqlTableName+` (id, hash) VALUES ($1, $2)
			`, id, hash)
	} else {
		csql.Exec(s, `
			UPDATE `+SqlTableName+` SET id = $1, hash = $2 WHERE id = $1
			`, id, hash)
	}
	return nil
}
Esempio n. 3
0
func doIndices(
	db *DB,
	getSql func(index, *DB) string,
	tables ...string,
) (err error) {
	defer csql.Safe(&err)

	trgmEnabled := db.IsFuzzyEnabled()
	var q string
	var ok bool
	for _, idx := range indices {
		if idx.isFulltext() && !trgmEnabled {
			// Only show the error message if we're on PostgreSQL.
			if db.Driver == "postgres" {
				log.Printf("Skipping fulltext index '%s' since "+
					"the pg_trgm extension is not enabled.", idx.sqlName())
			}
			continue
		}
		if len(tables) == 0 || fun.In(idx.table, tables) {
			q += getSql(idx, db) + "; "
			ok = true
		}
	}
	if ok {
		csql.Exec(db, q)
	}
	return
}
Esempio n. 4
0
// Get retrieves the current password hash for the user given.
func (s *Store) Get(id string) (hash []byte, err error) {
	defer csql.Safe(&err)

	r := s.QueryRow(`
		SELECT hash FROM `+SqlTableName+` WHERE id = $1
		`, id)
	csql.Scan(r, &hash)
	return
}
Esempio n. 5
0
func listMovieLinks(db *imdb.DB, atoms *atomizer, r io.ReadCloser) (err error) {
	defer csql.Safe(&err)
	table := startSimpleLoad(db, "link", "atom_id",
		"link_type", "link_atom_id", "entity")
	defer table.done()

	parseMovieLink := func(
		atoms *atomizer,
		text []byte,
		linkType *string,
		linkAtom *imdb.Atom,
		linkEntity *imdb.EntityKind,
	) bool {
		attrName, data, ok := parseNamedAttr(text)
		if !ok {
			logf("Could not parse named attribute '%s'. Skipping.", text)
			return false
		}
		id, ok := atoms.atomOnlyIfExist(data)
		if !ok {
			warnf("Could not find id for '%s'. Skipping.", data)
			return false
		}
		ent, ok := parseMediaEntity(data)
		if !ok {
			logf("Could not find entity type for '%s'. Skipping.", data)
			return false
		}
		*linkType = unicode(attrName)
		*linkAtom = id
		*linkEntity = ent.Type()
		return true
	}

	listAttrRowIds(r, table.atoms, func(id imdb.Atom, line, ent, row []byte) {
		var (
			linkType   string
			linkAtom   imdb.Atom
			linkEntity imdb.EntityKind
		)

		fields := splitListLine(row)
		if len(fields) == 0 {
			return
		}
		if bytes.Contains(fields[0], []byte("(VG)")) {
			return
		}
		ok := parseMovieLink(table.atoms, fields[0],
			&linkType, &linkAtom, &linkEntity)
		if !ok {
			return
		}
		table.add(line, id, linkType, linkAtom, linkEntity.String())
	})
	return
}
Esempio n. 6
0
func listActors(db *imdb.DB, ractor, ractress io.ReadCloser) (err error) {
	defer csql.Safe(&err)

	logf("Reading actors list...")

	// PostgreSQL wants different transactions for each inserter.
	// SQLite can't handle them. The wrapper type here ensures that
	// PostgreSQL gets multiple transactions while SQLite only gets one.
	tx, err := db.Begin()
	csql.Panic(err)

	txactor := wrapTx(db, tx)
	txcredit := txactor.another()
	txname := txactor.another()
	txatom := txactor.another()

	// Drop data from the actor and credit tables. They will be rebuilt below.
	// The key here is to leave the atom and name tables alone. Invariably,
	// they will contain stale data. But the only side effect, I think, is
	// taking up space.
	// (Stale data can be removed with 'goim clean'.)
	csql.Truncate(txactor, db.Driver, "actor")
	csql.Truncate(txcredit.Tx, db.Driver, "credit")

	actIns, err := csql.NewInserter(txactor.Tx, db.Driver, "actor",
		"atom_id", "sequence")
	csql.Panic(err)
	credIns, err := csql.NewInserter(txcredit.Tx, db.Driver, "credit",
		"actor_atom_id", "media_atom_id", "character", "position", "attrs")
	csql.Panic(err)
	nameIns, err := csql.NewInserter(txname.Tx, db.Driver, "name",
		"atom_id", "name")
	csql.Panic(err)
	atoms, err := newAtomizer(db, txatom.Tx)
	csql.Panic(err)

	// Unfortunately, it looks like credits for an actor can appear in
	// multiple locations. (Or there are different actors that erroneously
	// have the same name.)
	added := make(map[imdb.Atom]struct{}, 3000000)
	n1, nc1 := listActs(db, ractress, atoms, added, actIns, credIns, nameIns)
	n2, nc2 := listActs(db, ractor, atoms, added, actIns, credIns, nameIns)

	csql.Panic(actIns.Exec())
	csql.Panic(credIns.Exec())
	csql.Panic(nameIns.Exec())
	csql.Panic(atoms.Close())

	csql.Panic(txactor.Commit())
	csql.Panic(txcredit.Commit())
	csql.Panic(txname.Commit())
	csql.Panic(txatom.Commit())

	logf("Done. Added %d actors/actresses and %d credits.", n1+n2, nc1+nc2)
	return
}
Esempio n. 7
0
func listTaglines(db *imdb.DB, atoms *atomizer, r io.ReadCloser) (err error) {
	defer csql.Safe(&err)
	table := startSimpleLoad(db, "tagline", "atom_id", "tag")
	defer table.done()

	do := func(id imdb.Atom, item []byte) {
		table.add(item, id, unicode(item))
	}
	listPrefixItems(r, table.atoms, []byte{'#'}, []byte{'\t'}, do)
	return
}
Esempio n. 8
0
func listRunningTimes(
	db *imdb.DB,
	atoms *atomizer,
	r io.ReadCloser,
) (err error) {
	defer csql.Safe(&err)
	table := startSimpleLoad(db, "running_time",
		"atom_id", "country", "minutes", "attrs")
	defer table.done()

	parseRunningTime := func(text []byte, country *string, minutes *int) bool {
		sep := bytes.IndexByte(text, ':')
		var runtime []byte
		if sep > -1 {
			*country = unicode(bytes.TrimSpace(text[:sep]))
			runtime = bytes.TrimSpace(text[sep+1:])
		} else {
			*country = ""
			runtime = bytes.TrimSpace(text)
		}

		var err error
		*minutes, err = strconv.Atoi(unicode(runtime))
		if err != nil {
			// There are a lot of these.
			// From the looks of it, IMDb's web site just ignores them.
			// It's almost like it's freeform text... Yikes.
			return false
		}
		return true
	}

	listAttrRowIds(r, table.atoms, func(id imdb.Atom, line, ent, row []byte) {
		var (
			country string
			minutes int
			attrs   []byte
		)

		rowFields := splitListLine(row)
		if len(rowFields) == 0 {
			return // herp derp...
		}
		if !parseRunningTime(rowFields[0], &country, &minutes) {
			return
		}
		if len(rowFields) > 1 {
			attrs = rowFields[1]
		}
		table.add(line, id, country, minutes, unicode(attrs))
	})
	return
}
Esempio n. 9
0
func listGenres(db *imdb.DB, atoms *atomizer, r io.ReadCloser) (err error) {
	defer csql.Safe(&err)
	table := startSimpleLoad(db, "genre", "atom_id", "name")
	defer table.done()

	listAttrRowIds(r, table.atoms, func(id imdb.Atom, line, ent, row []byte) {
		fields := splitListLine(row)
		if len(fields) == 0 {
			return
		}
		table.add(line, id, strings.ToLower(unicode(fields[0])))
	})
	return
}
Esempio n. 10
0
func listAlternateVersions(
	db *imdb.DB,
	atoms *atomizer,
	r io.ReadCloser,
) (err error) {
	defer csql.Safe(&err)
	table := startSimpleLoad(db, "alternate_version", "atom_id", "about")
	defer table.done()

	do := func(id imdb.Atom, item []byte) {
		table.add(item, id, unicode(item))
	}
	listPrefixItems(r, table.atoms, []byte{'#'}, []byte{'-'}, do)
	return
}
Esempio n. 11
0
// newAtomizer returns an atomizer that can be used to access or create new
// atom identifiers. Note that if tx is nil, then the atomizer returned is
// read-only (attempting to write will cause a panic).
//
// A read-only atomizer may be accessed from multiple goroutines
// simultaneously, but a read/write atomizer may NOT.
//
// If a read/write atomizer is created, then the caller is responsible for
// closing the transaction (which should be done immediately after a call to
// atomizer.Close).
//
// Note that this function loads the entire set of atoms from the database
// into memory, so it is costly.
func newAtomizer(db *imdb.DB, tx *sql.Tx) (az *atomizer, err error) {
	defer csql.Safe(&err)

	az = &atomizer{db, make(atomMap, 1000000), 0, nil}
	if tx != nil {
		var err error
		az.ins, err = csql.NewInserter(
			tx, db.Driver, "atom", "id", "hash")
		csql.Panic(err)
	}

	rs := csql.Query(db, "SELECT id, hash FROM atom ORDER BY id ASC")
	csql.ForRow(rs, az.readRow)
	az.nextId++
	return
}
Esempio n. 12
0
func listPlots(db *imdb.DB, atoms *atomizer, r io.ReadCloser) (err error) {
	defer csql.Safe(&err)
	table := startSimpleLoad(db, "plot", "atom_id", "entry", "by")
	defer table.done()

	var curAtom imdb.Atom
	var curPlot []byte
	var curBy []byte
	var ok bool
	add := func(line []byte) {
		if curAtom > 0 && len(curPlot) > 0 {
			plot := unicode(bytes.TrimSpace(curPlot))
			by := unicode(bytes.TrimSpace(curBy))
			table.add(line, curAtom, plot, by)
		}
		curPlot, curBy = nil, nil
	}
	listLines(r, func(line []byte) {
		if bytes.HasPrefix(line, []byte("MV:")) {
			if len(curPlot) > 0 {
				add(line)
			}
			entity := bytes.TrimSpace(line[3:])
			if curAtom, ok = table.atoms.atomOnlyIfExist(entity); !ok {
				warnf("Could not find id for '%s'. Skipping.", entity)
				curAtom, curPlot, curBy = 0, nil, nil
			}
			return
		}
		if len(line) == 0 {
			return
		}
		if bytes.HasPrefix(line, []byte("PL:")) {
			curPlot = append(curPlot, bytes.TrimSpace(line[3:])...)
			curPlot = append(curPlot, ' ')
			return
		}
		if bytes.HasPrefix(line, []byte("BY:")) {
			curBy = line[3:]
			add(line)
			return
		}
	})
	add([]byte("UNKNOWN (last line?)"))
	return
}
Esempio n. 13
0
func listRatings(db *imdb.DB, atoms *atomizer, r io.ReadCloser) (err error) {
	defer csql.Safe(&err)
	table := startSimpleLoad(db, "rating", "atom_id", "votes", "rank")
	defer table.done()

	done := false
	listLines(r, func(line []byte) {
		var (
			id    imdb.Atom
			ok    bool
			votes int
			rank  float64
		)
		if done {
			return
		}

		fields := bytes.Fields(line)
		if bytes.HasPrefix(line, []byte("REPORT FORMAT")) {
			done = true
			return
		}
		if len(fields) < 4 {
			return
		}
		if bytes.Equal(fields[0], []byte("New")) {
			return
		}

		entity := bytes.Join(fields[3:], []byte{' '})
		if id, ok = table.atoms.atomOnlyIfExist(entity); !ok {
			warnf("Could not find id for '%s'. Skipping.", entity)
			return
		}
		if err := parseInt(fields[1], &votes); err != nil {
			logf("Could not parse integer '%s' in: '%s'", fields[1], line)
			return
		}
		if err := parseFloat(fields[2], &rank); err != nil {
			logf("Could not parse float '%s' in: '%s'", fields[2], line)
			return
		}
		table.add(line, id, votes, int(10*rank))
	})
	return
}
Esempio n. 14
0
// attrs uses reflection to automatically construct a list of simple attribute
// rows from the database based on information in the attribute's struct.
// This includes building the SELECT query and the slice itself.
//
// zero MUST be a pointer to a simple struct. A simple struct MUST ONLY contain
// fields that can be encoded/decoded as declared by the 'database/sql'
// package. Column names are the lowercase version of their struct field name
// unless the 'imdb_name' struct tag is set, in which case, that name is used.
//
// extra is passed to the end of the query executed. Useful for specifying
// ORDER BY or LIMIT clauses.
func attrs(
	zero interface{},
	db csql.Queryer,
	e Entity,
	tableName string,
	idColumn string,
	extra string,
) (v interface{}, err error) {
	defer csql.Safe(&err)

	rz := reflect.ValueOf(zero).Elem()
	tz := rz.Type()
	nfields := tz.NumField()
	columns := make([]string, nfields)
	for i := 0; i < nfields; i++ {
		f := tz.Field(i)
		column := f.Tag.Get("imdb_name")
		if len(column) == 0 {
			column = strings.ToLower(f.Name)
		}
		columns[i] = column
	}
	tattrs := reflect.SliceOf(tz)
	vattrs := reflect.MakeSlice(tattrs, 0, 10)
	v = vattrs.Interface()

	q := sf("SELECT %s FROM %s WHERE %s = $1 %s",
		strings.Join(columns, ", "), tableName, idColumn, extra)
	rs := csql.Query(db, q, e.Ident())
	csql.ForRow(rs, func(s csql.RowScanner) {
		loadCols := make([]interface{}, nfields)
		for i := 0; i < nfields; i++ {
			loadCols[i] = reflect.New(tz.Field(i).Type).Interface()
		}
		csql.Scan(s, loadCols...)

		row := reflect.New(tz).Elem()
		for i := 0; i < nfields; i++ {
			row.Field(i).Set(reflect.ValueOf(loadCols[i]).Elem())
		}
		vattrs = reflect.Append(vattrs, row)
	})
	v = vattrs.Interface() // not sure if this is necessary.
	return
}
Esempio n. 15
0
func listLanguages(db *imdb.DB, atoms *atomizer, r io.ReadCloser) (err error) {
	defer csql.Safe(&err)
	table := startSimpleLoad(db, "language", "atom_id", "name", "attrs")
	defer table.done()

	listAttrRowIds(r, table.atoms, func(id imdb.Atom, line, ent, row []byte) {
		var attrs []byte
		fields := splitListLine(row)
		if len(fields) == 0 {
			return
		}
		if len(fields) > 1 {
			attrs = fields[1]
		}
		table.add(line, id, unicode(fields[0]), unicode(attrs))
	})
	return
}
Esempio n. 16
0
func listLiterature(db *imdb.DB, atoms *atomizer, r io.ReadCloser) (err error) {
	defer csql.Safe(&err)
	table := startSimpleLoad(db, "literature", "atom_id", "lit_type", "ref")
	defer table.done()

	do := func(id imdb.Atom, item []byte) {
		sep := bytes.IndexByte(item, ':')
		if sep == -1 {
			logf("Badly formatted literature reference (skipping): '%s'", item)
			return
		}
		litType := bytes.TrimSpace(item[0:sep])
		item = bytes.TrimSpace(item[sep+1:])
		table.add(item, id, unicode(litType), unicode(item))
	}
	listPrefixItems(r, table.atoms, []byte("MOVI:"), nil, do)
	return
}
Esempio n. 17
0
func listGoofs(db *imdb.DB, atoms *atomizer, r io.ReadCloser) (err error) {
	defer csql.Safe(&err)
	table := startSimpleLoad(db, "goof", "atom_id", "goof_type", "entry")
	defer table.done()

	do := func(id imdb.Atom, item []byte) {
		sep := bytes.IndexByte(item, ':')
		if sep == -1 {
			table.add(item, id, "", unicode(item))
			return
		}
		goofType := bytes.TrimSpace(item[0:sep])
		item = bytes.TrimSpace(item[sep+1:])
		table.add(item, id, unicode(goofType), unicode(item))
	}
	listPrefixItems(r, table.atoms, []byte{'#'}, []byte{'-'}, do)
	return
}
Esempio n. 18
0
func listColorInfo(db *imdb.DB, atoms *atomizer, r io.ReadCloser) (err error) {
	defer csql.Safe(&err)
	table := startSimpleLoad(db, "color_info",
		"atom_id", "color", "attrs")
	defer table.done()

	var (
		infoColor = []byte("Color")
		infoBandW = []byte("Black and White")
	)

	parseColorInfo := func(text []byte, color *bool) bool {
		switch {
		case bytes.Equal(text, infoColor):
			*color = true
			return true
		case bytes.Equal(text, infoBandW):
			*color = false
			return true
		}
		logf("Could not parse '%s' as color information.", text)
		return false
	}

	listAttrRowIds(r, table.atoms, func(id imdb.Atom, line, ent, row []byte) {
		var (
			color bool
			attrs []byte
		)

		rowFields := splitListLine(row)
		if len(rowFields) == 0 {
			return // herp derp...
		}
		if !parseColorInfo(rowFields[0], &color) {
			return
		}
		if len(rowFields) > 1 {
			attrs = rowFields[1]
		}
		table.add(line, id, color, unicode(attrs))
	})
	return
}
Esempio n. 19
0
func listAkaTitles(db *imdb.DB, atoms *atomizer, r io.ReadCloser) (err error) {
	defer csql.Safe(&err)
	table := startSimpleLoad(db, "aka_title", "atom_id", "title", "attrs")
	defer table.done()

	parseAkaTitle := func(text []byte, title *string) bool {
		attrName, data, ok := parseNamedAttr(text)
		if !ok {
			return false
		}
		if !bytes.Equal(attrName, []byte("aka")) {
			return false
		}
		ent, ok := parseMediaEntity(data)
		if !ok {
			return false
		}
		*title = ent.Name()
		return true
	}

	listAttrRowIds(r, table.atoms, func(id imdb.Atom, line, ent, row []byte) {
		var (
			title string
			attrs []byte
		)

		fields := splitListLine(row)
		if len(fields) == 0 {
			return // herp derp...
		}
		if !parseAkaTitle(fields[0], &title) {
			if !bytes.Contains(fields[0], []byte("(VG)")) {
				logf("Could not parse aka title from '%s'", fields[0])
			}
			return
		}
		if len(fields) > 1 {
			attrs = fields[1]
		}
		table.add(line, id, title, unicode(attrs))
	})
	return
}
Esempio n. 20
0
// Results executes the parameters of the search and returns the results.
func (s *Searcher) Results() (rs []Result, err error) {
	defer csql.Safe(&err)

	// Set the similarity threshold first.
	if s.db.IsFuzzyEnabled() {
		csql.Exec(s.db, "SELECT set_limit($1)", s.similarThreshold)
	}

	if s.subTvshow != nil {
		if err := s.subTvshow.choose(s, s.chooser); err != nil {
			return nil, err
		}
	}
	if s.subCredits != nil {
		if err := s.subCredits.choose(s, s.chooser); err != nil {
			return nil, err
		}
	}
	if s.subCast != nil {
		if err := s.subCast.choose(s, s.chooser); err != nil {
			return nil, err
		}
	}

	var rows *sql.Rows
	if len(s.name) == 0 {
		rows = csql.Query(s.db, s.sql())
	} else {
		rows = csql.Query(s.db, s.sql(), strings.Join(s.name, " "))
	}
	csql.ForRow(rows, func(scanner csql.RowScanner) {
		var r Result
		var ent string
		csql.Scan(scanner, &ent, &r.Id, &r.Name, &r.Year,
			&r.Similarity, &r.Attrs,
			&r.Rank.Votes, &r.Rank.Rank,
			&r.Credit.ActorId, &r.Credit.MediaId, &r.Credit.Character,
			&r.Credit.Position, &r.Credit.Attrs)
		r.Entity = imdb.Entities[ent]
		rs = append(rs, r)
	})
	return
}
Esempio n. 21
0
func listQuotes(db *imdb.DB, atoms *atomizer, r io.ReadCloser) (err error) {
	defer csql.Safe(&err)
	table := startSimpleLoad(db, "quote", "atom_id", "entry")
	defer table.done()

	var curAtom imdb.Atom
	var curQuote []byte
	var ok bool
	add := func(line []byte) {
		if curAtom > 0 && len(curQuote) > 0 {
			table.add(line, curAtom, unicode(bytes.TrimSpace(curQuote)))
		}
		curQuote = nil
	}
	listLines(r, func(line []byte) {
		if bytes.HasPrefix(line, []byte{'#'}) {
			add(line)
			entity := bytes.TrimSpace(line[1:])
			if curAtom, ok = table.atoms.atomOnlyIfExist(entity); !ok {
				warnf("Could not find id for '%s'. Skipping.", entity)
				curAtom, curQuote = 0, nil
			}
			return
		}
		if len(line) == 0 {
			add(line)
			return
		}
		// If the line starts with a space, then it's a continuation.
		// So keep it as one line in the database. We do this by prefixing
		// a new line character whenever we add a new character quote.
		if line[0] != ' ' && len(curQuote) > 0 {
			curQuote = append(curQuote, '\n')
		}
		curQuote = append(curQuote, bytes.TrimSpace(line)...)
		curQuote = append(curQuote, ' ')
	})
	add([]byte("UNKNOWN (last line?)"))
	return
}
Esempio n. 22
0
func listReleaseDates(
	db *imdb.DB,
	atoms *atomizer,
	r io.ReadCloser,
) (err error) {
	defer csql.Safe(&err)
	table := startSimpleLoad(db, "release_date",
		"atom_id", "country", "released", "attrs")
	defer table.done()

	parseDate := func(text []byte, country *string, released *time.Time) bool {
		sep := bytes.IndexByte(text, ':')
		var date []byte
		if sep > -1 {
			*country = unicode(bytes.TrimSpace(text[:sep]))
			date = bytes.TrimSpace(text[sep+1:])
		} else {
			*country = ""
			date = bytes.TrimSpace(text)
		}

		var layout string
		switch spaces := len(bytes.Fields(date)); spaces {
		case 3:
			layout = "2 January 2006"
		case 2:
			layout = "January 2006"
		case 1:
			layout = "2006"
		default:
			pef("Too many fields in date '%s' (%d) in '%s'", date, spaces, text)
			return false
		}

		t, err := time.Parse(layout, unicode(date))
		if err != nil {
			pef("Could not parse date '%s': %s", date, err)
			return false
		}
		*released = t.UTC()
		return true
	}

	listAttrRowIds(r, table.atoms, func(id imdb.Atom, line, ent, row []byte) {
		var (
			country string
			date    time.Time
			attrs   string
		)

		rowFields := splitListLine(row)
		if !parseDate(rowFields[0], &country, &date) {
			pef("Could not extract date from '%s'. Skipping.", line)
			return
		}
		if len(rowFields) > 1 {
			attrs = unicode(rowFields[1])
		}
		table.add(line, id, country, date, attrs)
	})
	return
}
Esempio n. 23
0
func listMPAARatings(
	db *imdb.DB,
	atoms *atomizer,
	r io.ReadCloser,
) (err error) {
	defer csql.Safe(&err)
	table := startSimpleLoad(db, "mpaa_rating", "atom_id", "rating", "reason")
	defer table.done()

	var curAtom imdb.Atom
	var curRating string
	var curReason []byte
	var ok bool
	reset := func() {
		curAtom, curRating, curReason = 0, "", nil
	}
	add := func(line []byte) {
		if len(curReason) > 0 && len(curRating) > 0 {
			curReason = bytes.TrimSpace(curReason)
			table.add(line, curAtom, curRating, unicode(curReason))
			reset()
		}
	}
	listLines(r, func(line []byte) {
		if len(line) == 0 || line[0] == '-' {
			return
		}
		if bytes.HasPrefix(line, []byte("MV: ")) {
			add(line)
			entity := bytes.TrimSpace(line[3:])
			if curAtom, ok = table.atoms.atomOnlyIfExist(entity); !ok {
				warnf("Could not find id for '%s'. Skipping.", entity)
				reset()
			}
			return
		}
		if curAtom == 0 || !bytes.HasPrefix(line, []byte("RE: ")) {
			return
		}
		line = bytes.TrimSpace(line[3:])
		if len(curReason) == 0 {
			if bytes.HasPrefix(line, []byte("PG")) {
				// Weird corner case for "The Honeymooners (2005)". Bah.
				line = bytes.TrimSpace(line[2:])
				curRating = "PG"
			} else {
				if !bytes.HasPrefix(line, []byte("Rated ")) &&
					!bytes.HasPrefix(line, []byte("rated ")) {
					logf("Could not find rating in '%s'. Skipping.", line)
					reset()
					return
				}
				line = bytes.TrimSpace(line[5:])
				if bytes.HasPrefix(line, []byte("PG- 13")) {
					// Special case for malformed rating for
					// X-Men: Days of Future Past
					curRating = "PG-13"
				} else {
					nextSpace := bytes.IndexByte(line, ' ')
					if nextSpace == -1 {
						curRating = unicode(line)
					} else {
						curRating = unicode(line[:nextSpace])
						line = line[nextSpace+1:]
					}
				}
			}
			switch curRating {
			case "G", "PG", "PG-13", "R", "NC-17": // ok
			default:
				logf("Unrecognized rating '%s' in '%s'. Skipping.",
					curRating, line)
				reset()
			}
		}
		curReason = append(curReason, line...)
		curReason = append(curReason, ' ')
	})
	add([]byte("UNKNOWN (last line?)"))
	return
}
Esempio n. 24
0
func listMovies(db *imdb.DB, movies io.ReadCloser) (err error) {
	defer csql.Safe(&err)

	logf("Reading movies list...")
	addedMovies, addedTvshows, addedEpisodes := 0, 0, 0

	// PostgreSQL wants different transactions for each inserter.
	// SQLite can't handle them. The wrapper type here ensures that
	// PostgreSQL gets multiple transactions while SQLite only gets one.
	tx, err := db.Begin()
	csql.Panic(err)

	txmovie := wrapTx(db, tx)
	txtv := txmovie.another()
	txepisode := txmovie.another()
	txname := txmovie.another()
	txatom := txmovie.another()

	// Drop data from the movie, tvshow and episode tables. They will be
	// rebuilt below.
	// The key here is to leave the atom and name tables alone. Invariably,
	// they will contain stale data. But the only side effect, I think, is
	// taking up space.
	// (Stale data can be removed with 'goim clean'.)
	csql.Truncate(txmovie, db.Driver, "movie")
	csql.Truncate(txtv, db.Driver, "tvshow")
	csql.Truncate(txepisode, db.Driver, "episode")

	mvIns, err := csql.NewInserter(txmovie.Tx, db.Driver, "movie",
		"atom_id", "year", "sequence", "tv", "video")
	csql.Panic(err)
	tvIns, err := csql.NewInserter(txtv.Tx, db.Driver, "tvshow",
		"atom_id", "year", "sequence", "year_start", "year_end")
	csql.Panic(err)
	epIns, err := csql.NewInserter(txepisode.Tx, db.Driver, "episode",
		"atom_id", "tvshow_atom_id", "year", "season", "episode_num")
	csql.Panic(err)
	nameIns, err := csql.NewInserter(txname.Tx, db.Driver, "name",
		"atom_id", "name")
	csql.Panic(err)
	atoms, err := newAtomizer(db, txatom.Tx)
	csql.Panic(err)

	defer func() {
		csql.Panic(mvIns.Exec())
		csql.Panic(tvIns.Exec())
		csql.Panic(epIns.Exec())
		csql.Panic(nameIns.Exec())
		csql.Panic(atoms.Close())

		csql.Panic(txmovie.Commit())
		csql.Panic(txtv.Commit())
		csql.Panic(txepisode.Commit())
		csql.Panic(txname.Commit())
		csql.Panic(txatom.Commit())

		logf("Done. Added %d movies, %d tv shows and %d episodes.",
			addedMovies, addedTvshows, addedEpisodes)
	}()

	listLines(movies, func(line []byte) {
		line = bytes.TrimSpace(line)
		fields := splitListLine(line)
		if len(fields) <= 1 {
			return
		}
		item, value := fields[0], fields[1]
		switch ent := mediaType(item); ent {
		case imdb.EntityMovie:
			m := imdb.Movie{}
			if !parseMovie(item, &m) {
				return
			}
			if existed, err := parseId(atoms, item, &m.Id); err != nil {
				csql.Panic(err)
			} else if !existed {
				// We only add a name when we add an atom.
				if err = nameIns.Exec(m.Id, m.Title); err != nil {
					logf("Full movie info (that failed to add): %#v", m)
					csql.Panic(ef("Could not add name '%s': %s", m, err))
				}
			}
			err := mvIns.Exec(m.Id, m.Year, m.Sequence, m.Tv, m.Video)
			if err != nil {
				logf("Full movie info (that failed to add): %#v", m)
				csql.Panic(ef("Could not add movie '%s': %s", m, err))
			}
			addedMovies++
		case imdb.EntityTvshow:
			tv := imdb.Tvshow{}
			if !parseTvshow(item, &tv) {
				return
			}
			if !parseTvshowRange(value, &tv) {
				return
			}
			if existed, err := parseId(atoms, item, &tv.Id); err != nil {
				csql.Panic(err)
			} else if !existed {
				// We only add a name when we add an atom.
				if err = nameIns.Exec(tv.Id, tv.Title); err != nil {
					logf("Full tvshow info (that failed to add): %#v", tv)
					csql.Panic(ef("Could not add name '%s': %s", tv, err))
				}
			}
			err := tvIns.Exec(tv.Id, tv.Year, tv.Sequence,
				tv.YearStart, tv.YearEnd)
			if err != nil {
				logf("Full tvshow info (that failed to add): %#v", tv)
				csql.Panic(ef("Could not add tvshow '%s': %s", tv, err))
			}
			addedTvshows++
		case imdb.EntityEpisode:
			ep := imdb.Episode{}
			if !parseEpisode(atoms, item, &ep) {
				return
			}
			if !parseEpisodeYear(value, &ep) {
				return
			}
			if existed, err := parseId(atoms, item, &ep.Id); err != nil {
				csql.Panic(err)
			} else if !existed {
				// We only add a name when we add an atom.
				if err = nameIns.Exec(ep.Id, ep.Title); err != nil {
					logf("Full episode info (that failed to add): %#v", ep)
					csql.Panic(ef("Could not add name '%s': %s", ep, err))
				}
			}
			err := epIns.Exec(ep.Id, ep.TvshowId, ep.Year,
				ep.Season, ep.EpisodeNum)
			if err != nil {
				logf("Full episode info (that failed to add): %#v", ep)
				csql.Panic(ef("Could not add episode '%s': %s", ep, err))
			}
			addedEpisodes++
		default:
			csql.Panic(ef("Unrecognized entity %s", ent))
		}
	})
	return
}