// runUserOp starts a transaction and creates the user if it doesn't // yet exist. func runUserOp(ctx Context, userID, opType int) error { return crdb.ExecuteTx(ctx.DB, func(tx *sql.Tx) error { switch opType { case createUserOp: return createUser(tx, userID) case createPhotoOp: return createPhoto(tx, userID) case createCommentOp: return createComment(tx, userID) case listPhotosOp: return listPhotos(tx, userID, nil) case listCommentsOp: _, err := listComments(tx, userID, nil) return err case updatePhotoOp: return updatePhoto(tx, userID) case updateCommentOp: return updateComment(tx, userID) case deletePhotoOp: return deletePhoto(tx, userID) case deleteCommentOp: return deleteComment(tx, userID) default: return errors.Errorf("unsupported op type: %d", opType) } }) }
func worker(db *sql.DB, l func(string, ...interface{}), gen func() postingRequest) { for { req := gen() l("running %v", req) if err := crdb.ExecuteTx(db, func(tx *sql.Tx) error { return doPosting(tx, req) }); err != nil { pqErr, ok := err.(*pq.Error) if ok { if pqErr.Code.Class() == pq.ErrorClass("23") { // Integrity violations. Note that (especially with Postgres) // the primary key will often be violated under congestion. l("%s", pqErr) continue } if pqErr.Code.Class() == pq.ErrorClass("40") { // Transaction rollback errors (e.g. Postgres // serializability restarts) l("%s", pqErr) continue } } log.Fatal(err) } else { if *verbose { l("success") } counter.Incr(1) } } }
// remove removes a node give its name and its parent ID. // If 'checkChildren' is true, fails if the node has children. func (cfs CFS) remove(parentID uint64, name string, checkChildren bool) error { const lookupSQL = `SELECT id FROM fs.namespace WHERE (parentID, name) = ($1, $2)` const deleteNamespace = `DELETE FROM fs.namespace WHERE (parentID, name) = ($1, $2)` const deleteInode = `DELETE FROM fs.inode WHERE id = $1` const deleteBlock = `DELETE FROM fs.block WHERE id = $1` err := crdb.ExecuteTx(cfs.db, func(tx *sql.Tx) error { // Start by looking up the node ID. var id uint64 if err := tx.QueryRow(lookupSQL, parentID, name).Scan(&id); err != nil { return err } // Check if there are any children. if checkChildren { if err := checkIsEmpty(tx, id); err != nil { return err } } // Delete all entries. if _, err := tx.Exec(deleteNamespace, parentID, name); err != nil { return err } if _, err := tx.Exec(deleteInode, id); err != nil { return err } if _, err := tx.Exec(deleteBlock, id); err != nil { return err } return nil }) return err }
// rename moves 'oldParentID/oldName' to 'newParentID/newName'. // If 'newParentID/newName' already exists, it is deleted. // See NOTE on node.go:Rename. func (cfs CFS) rename(oldParentID, newParentID uint64, oldName, newName string) error { if oldParentID == newParentID && oldName == newName { return nil } const deleteNamespace = `DELETE FROM fs.namespace WHERE (parentID, name) = ($1, $2)` const insertNamespace = `INSERT INTO fs.namespace VALUES ($1, $2, $3)` const updateNamespace = `UPDATE fs.namespace SET id = $1 WHERE (parentID, name) = ($2, $3)` const deleteInode = `DELETE FROM fs.inode WHERE id = $1` err := crdb.ExecuteTx(cfs.db, func(tx *sql.Tx) error { // Lookup source inode. srcObject, err := getInode(tx, oldParentID, oldName) if err != nil { return err } // Lookup destination inode. destObject, err := getInode(tx, newParentID, newName) if err != nil && err != sql.ErrNoRows { return err } // Check that the rename is allowed. if err := validateRename(tx, srcObject, destObject); err != nil { return err } // At this point we know the following: // - srcObject is not nil // - destObject may be nil. If not, its inode can be deleted. if destObject == nil { // No new object: use INSERT. if _, err := tx.Exec(deleteNamespace, oldParentID, oldName); err != nil { return err } if _, err := tx.Exec(insertNamespace, newParentID, newName, srcObject.ID); err != nil { return err } } else { // Destination exists. if _, err := tx.Exec(deleteNamespace, oldParentID, oldName); err != nil { return err } if _, err := tx.Exec(updateNamespace, srcObject.ID, newParentID, newName); err != nil { return err } if _, err := tx.Exec(deleteInode, destObject.ID); err != nil { return err } } return nil }) return err }
func main() { db, err := sql.Open("postgres", "postgresql://root@localhost:26257/scld?sslmode=disable") if err != nil { log.Fatal("error connection to the database: ", err) } if _, err := db.Exec(`CREATE DATABASE IF NOT EXISTS scld; CREATE TABLE IF NOT EXISTS track_likes ( id int NOT NULL DEFAULT unique_rowid(), -- note: random ID username string NOT NULL DEFAULT '', track string NOT NULL DEFAULT '', liked_at timestamp NOT NULL DEFAULT NOW(), -- note: what is NOW()? INDEX username_liked_at (username, liked_at), INDEX track_liked_at (track, liked_at), PRIMARY KEY (id, username, track) ); -- initial setup: /* TRUNCATE TABLE track_likes; INSERT INTO track_likes (username, track) VALUES ('Tobias', 'Call Me Maybe'), ('Marc', e'I\'m Just A Gigolo'), ('Spencer', 'Carl Me Maybe'), ('Peter', 'When Nothing Else Mattis'), ('Ben', 'Hips Don''t Lie') */ `); err != nil { log.Fatal(err) } var wg sync.WaitGroup for i := 0; i < 10; i++ { wg.Add(1) go func() { defer wg.Done() if err := crdb.ExecuteTx(db, func(tx *sql.Tx) error { _, err := tx.Exec(` UPDATE track_likes SET liked_at = NOW() WHERE id IN (SELECT id FROM track_likes ORDER BY liked_at ASC LIMIT 1);`) if err != nil { fmt.Println("had to restart!") } return err }); err != nil { panic(err) } fmt.Println("ran an update") }() } wg.Wait() }
func (z *zeroSum) worker() { r := newRand() zipf := z.accountDistribution(r) for { from := zipf.Uint64() to := zipf.Uint64() if from == to { continue } db := z.DB[z.RandNode(r.Intn)] err := crdb.ExecuteTx(db, func(tx *gosql.Tx) error { rows, err := tx.Query(`SELECT id, balance FROM accounts WHERE id IN ($1, $2)`, from, to) if err != nil { return err } var fromBalance, toBalance int64 for rows.Next() { var id uint64 var balance int64 if err = rows.Scan(&id, &balance); err != nil { log.Fatal(context.Background(), err) } switch id { case from: fromBalance = balance case to: toBalance = balance default: panic(fmt.Sprintf("got unexpected account %d", id)) } } upsert := `UPSERT INTO accounts VALUES ($1, $3), ($2, $4)` _, err = tx.Exec(upsert, to, from, toBalance+1, fromBalance-1) return err }) if err != nil { z.maybeLogError(err) } else { atomic.AddUint64(&z.stats.ops, 1) z.accounts.Lock() z.accounts.m[from] = struct{}{} z.accounts.m[to] = struct{}{} z.accounts.Unlock() } } }
// create inserts a new node. // parentID: inode ID of the parent directory. // name: name of the new node // node: new node func (cfs CFS) create(parentID uint64, name string, node *Node) error { inode := node.toJSON() const insertNode = `INSERT INTO fs.inode VALUES ($1, $2)` const insertNamespace = `INSERT INTO fs.namespace VALUES ($1, $2, $3)` err := crdb.ExecuteTx(cfs.db, func(tx *sql.Tx) error { if _, err := tx.Exec(insertNode, node.ID, inode); err != nil { return err } if _, err := tx.Exec(insertNamespace, parentID, name, node.ID); err != nil { return err } return nil }) return err }
func testMonotonicInsertsInner(t *testing.T, c cluster.Cluster, cfg cluster.TestConfig) { var clients []mtClient for i := 0; i < c.NumNodes(); i++ { clients = append(clients, mtClient{ID: i, DB: makePGClient(t, c.PGUrl(i))}) } // We will insert into this table by selecting MAX(val) and increasing by // one and expect that val and sts (the commit timestamp) are both // simultaneously increasing. if _, err := clients[0].Exec(` CREATE DATABASE mono; CREATE TABLE IF NOT EXISTS mono.mono (val INT, sts STRING, node INT, tb INT); INSERT INTO mono.mono VALUES(-1, '0', -1, -1)`); err != nil { t.Fatal(err) } var idGen uint64 invoke := func(client mtClient) { logPrefix := fmt.Sprintf("%03d.%03d: ", atomic.AddUint64(&idGen, 1), client.ID) l := func(msg string, args ...interface{}) { t.Logf(logPrefix+msg, args...) } l("begin") defer l("done") var exRow, insRow mtRow var attempt int if err := crdb.ExecuteTx(client.DB, func(tx *gosql.Tx) error { attempt++ l("attempt %d", attempt) if err := tx.QueryRow(`SELECT cluster_logical_timestamp()`).Scan( &insRow.sts, ); err != nil { l(err.Error()) return err } l("read max val") if err := tx.QueryRow(`SELECT MAX(val) AS m FROM mono.mono`).Scan( &exRow.val, ); err != nil { l(err.Error()) return err } l("read max row for val=%d", exRow.val) if err := tx.QueryRow(`SELECT sts, node, tb FROM mono.mono WHERE val = $1`, exRow.val, ).Scan( &exRow.sts, &exRow.node, &exRow.tb, ); err != nil { l(err.Error()) return err } l("insert") if err := tx.QueryRow(` INSERT INTO mono.mono (val, sts, node, tb) VALUES($1, $2, $3, $4) RETURNING val, sts, node, tb`, exRow.val+1, insRow.sts, client.ID, 0, ).Scan( &insRow.val, &insRow.sts, &insRow.node, &insRow.tb, ); err != nil { l(err.Error()) return err } l("commit") return nil }); err != nil { t.Errorf("%T: %v", err, err) } } verify := func() { client := clients[0] var numDistinct int if err := client.QueryRow("SELECT COUNT(DISTINCT(val)) FROM mono.mono").Scan( &numDistinct, ); err != nil { t.Fatal(err) } rows, err := client.Query("SELECT val, sts, node, tb FROM mono.mono ORDER BY val ASC, sts ASC") if err != nil { t.Fatal(err) } var results mtRows for rows.Next() { var row mtRow if err := rows.Scan(&row.val, &row.sts, &row.node, &row.tb); err != nil { t.Fatal(err) } results = append(results, row) } if !sort.IsSorted(results) { t.Errorf("results are not sorted:\n%s", results) } if numDistinct != len(results) { t.Errorf("'val' column is not unique: %d results, but %d distinct:\n%s", len(results), numDistinct, results) } } concurrency := 2 * c.NumNodes() sem := make(chan struct{}, concurrency) timer := time.After(cfg.Duration) defer verify() defer func() { // Now that consuming has stopped, fill up the semaphore (i.e. wait for // still-running goroutines to stop) for i := 0; i < concurrency; i++ { sem <- struct{}{} } }() for { select { case sem <- struct{}{}: case <-stopper: return case <-timer: return } go func(client mtClient) { invoke(client) <-sem }(clients[rand.Intn(c.NumNodes())]) } }
func moveMoney(db *sql.DB, aggr *measurement) { useSystemAccount := *contention == "high" for !transfersComplete() { var readDuration, writeDuration time.Duration var fromBalance, toBalance int from, to := rand.Intn(*numAccounts)+1, rand.Intn(*numAccounts)+1 if from == to { continue } if useSystemAccount { // Use the first account number we generated as a coin flip to // determine whether we're transferring money into or out of // the system account. if from > *numAccounts/2 { from = systemAccountID } else { to = systemAccountID } } amount := rand.Intn(*maxTransfer) start := time.Now() attempts := 0 if err := crdb.ExecuteTx(db, func(tx *sql.Tx) error { attempts++ if attempts > 1 { atomic.AddInt32(&aggr.retries, 1) } startRead := time.Now() rows, err := tx.Query(`SELECT id, balance FROM account WHERE id IN ($1, $2)`, from, to) if err != nil { return err } readDuration = time.Since(startRead) for rows.Next() { var id, balance int if err = rows.Scan(&id, &balance); err != nil { log.Fatal(err) } switch id { case from: fromBalance = balance case to: toBalance = balance default: panic(fmt.Sprintf("got unexpected account %d", id)) } } startWrite := time.Now() if fromBalance < amount { return nil } insert := `INSERT INTO transaction (id, txn_ref) VALUES ($1, $2);` txnID := atomic.AddInt32(&txnCount, 1) _, err = tx.Exec(insert, txnID, fmt.Sprintf("txn %d", txnID)) if err != nil { return err } insert = `INSERT INTO transaction_leg (account_id, amount, running_balance, txn_id) VALUES ($1, $2, $3, $4);` if _, err = tx.Exec(insert, from, -amount, fromBalance-amount, txnID); err != nil { return err } if _, err = tx.Exec(insert, to, amount, toBalance+amount, txnID); err != nil { return err } update := `UPDATE account SET balance = $1 WHERE id = $2;` if _, err = tx.Exec(update, toBalance+amount, to); err != nil { return err } if _, err = tx.Exec(update, fromBalance-amount, from); err != nil { return err } writeDuration = time.Since(startWrite) return nil }); err != nil { log.Printf("failed transaction: %v", err) continue } atomic.AddInt32(&successCount, 1) if fromBalance >= amount { atomic.AddInt64(&aggr.read, readDuration.Nanoseconds()) atomic.AddInt64(&aggr.write, writeDuration.Nanoseconds()) atomic.AddInt64(&aggr.total, time.Since(start).Nanoseconds()) } } }