// run is the CSP-style main loop of the keyserver. All code critical for safe // persistence should be directly in run. All functions called from run should // either interpret data and modify their mutable arguments OR interact with the // network and disk, but not both. func (ks *Keyserver) run() { defer close(ks.stopped) var step proto.KeyserverStep wb := ks.db.NewBatch() for { select { case <-ks.stop: return case stepEntry := <-ks.log.WaitCommitted(): if stepEntry.ConfChange != nil { ks.log.ApplyConfChange(stepEntry.ConfChange) } stepBytes := stepEntry.Data if stepBytes == nil { continue // allow logs to skip slots for indexing purposes } if err := step.Unmarshal(stepBytes); err != nil { log.Panicf("invalid step pb in replicated log: %s", err) } // TODO: (for throughput) allow multiple steps per log entry // (pipelining). Maybe this would be better implemented at the log level? deferredIO := ks.step(&step, &ks.rs, wb) ks.rs.NextIndexLog++ wb.Put(tableReplicaState, proto.MustMarshal(&ks.rs)) if err := ks.db.Write(wb); err != nil { log.Panicf("sync step to db: %s", err) } wb.Reset() step.Reset() if deferredIO != nil { deferredIO() } case ks.leaderHint = <-ks.log.LeaderHintSet(): ks.updateEpochProposer() case <-ks.minEpochIntervalTimer.C: ks.minEpochIntervalPassed = true ks.updateEpochProposer() case <-ks.maxEpochIntervalTimer.C: ks.maxEpochIntervalPassed = true ks.updateEpochProposer() } } }
// step is called by run and changes the in-memory state. No i/o allowed. func (ks *Keyserver) step(step *proto.KeyserverStep, rs *proto.ReplicaState, wb kv.Batch) (deferredIO func()) { // ks: &const // step, rs, wb: &mut switch step.Type.(type) { case *proto.KeyserverStep_Update: index := step.GetUpdate().Update.NewEntry.Index prevUpdate, err := ks.getUpdate(index, math.MaxUint64) if err != nil { log.Printf("getUpdate: %s", err) ks.wr.Notify(step.UID, updateOutput{Error: fmt.Errorf("internal error")}) return } if err := ks.verifyUpdateDeterministic(prevUpdate, step.GetUpdate()); err != nil { ks.wr.Notify(step.UID, updateOutput{Error: err}) return } latestTree := ks.merkletree.GetSnapshot(rs.LatestTreeSnapshot) // sanity check: compare previous version in Merkle tree vs in updates table prevEntryHashTree, _, err := latestTree.Lookup(index) if err != nil { ks.wr.Notify(step.UID, updateOutput{Error: fmt.Errorf("internal error")}) return } var prevEntryHash []byte if prevUpdate != nil { prevEntryHash = make([]byte, 32) sha3.ShakeSum256(prevEntryHash, prevUpdate.Update.NewEntry.Encoding) } if !bytes.Equal(prevEntryHashTree, prevEntryHash) { log.Fatalf("ERROR: merkle tree and DB inconsistent for index %x: %x vs %x", index, prevEntryHashTree, prevEntryHash) } var entryHash [32]byte sha3.ShakeSum256(entryHash[:], step.GetUpdate().Update.NewEntry.Encoding) newTree, err := latestTree.BeginModification() if err != nil { ks.wr.Notify(step.UID, updateOutput{Error: fmt.Errorf("internal error")}) return } if err := newTree.Set(index, entryHash[:]); err != nil { log.Printf("setting index '%x' gave error: %s", index, err) ks.wr.Notify(step.UID, updateOutput{Error: fmt.Errorf("internal error")}) return } rs.LatestTreeSnapshot = newTree.Flush(wb).Nr epochNr := rs.LastEpochDelimiter.EpochNumber + 1 wb.Put(tableUpdateRequests(index, epochNr), proto.MustMarshal(step.GetUpdate())) ks.wr.Notify(step.UID, updateOutput{Epoch: epochNr}) rs.PendingUpdates = true ks.updateEpochProposer() if rs.LastEpochNeedsRatification { // We need to wait for the last epoch to appear in the verifier log before // inserting this update. wb.Put(tableUpdatesPendingRatification(rs.NextIndexLog), proto.MustMarshal(step.GetUpdate().Update)) } else { // We can deliver the update to verifiers right away. return ks.verifierLogAppend(&proto.VerifierStep{Type: &proto.VerifierStep_Update{Update: step.GetUpdate().Update}}, rs, wb) } case *proto.KeyserverStep_EpochDelimiter: if step.GetEpochDelimiter().EpochNumber <= rs.LastEpochDelimiter.EpochNumber { return // a duplicate of this step has already been handled } rs.LastEpochDelimiter = *step.GetEpochDelimiter() log.Printf("epoch %d", step.GetEpochDelimiter().EpochNumber) rs.PendingUpdates = false ks.resetEpochTimers(rs.LastEpochDelimiter.Timestamp.Time()) // rs.ThisReplicaNeedsToSignLastEpoch might already be true, if a majority // signed that did not include us. This will make us skip signing the last // epoch, but that's fine. rs.ThisReplicaNeedsToSignLastEpoch = true // However, it's not okay to see a new epoch delimiter before the previous // epoch has been ratified. if rs.LastEpochNeedsRatification { log.Panicf("new epoch delimiter but last epoch not ratified") } rs.LastEpochNeedsRatification = true ks.updateEpochProposer() deferredIO = ks.updateSignatureProposer snapshotNumberBytes := make([]byte, 8) binary.BigEndian.PutUint64(snapshotNumberBytes, rs.LatestTreeSnapshot) wb.Put(tableMerkleTreeSnapshot(step.GetEpochDelimiter().EpochNumber), snapshotNumberBytes) latestTree := ks.merkletree.GetSnapshot(rs.LatestTreeSnapshot) rootHash, err := latestTree.GetRootHash() if err != nil { log.Panicf("ks.latestTree.GetRootHash() failed: %s", err) } teh := &proto.EncodedTimestampedEpochHead{TimestampedEpochHead: proto.TimestampedEpochHead{ Head: proto.EncodedEpochHead{EpochHead: proto.EpochHead{ RootHash: rootHash, PreviousSummaryHash: rs.PreviousSummaryHash, Realm: ks.realm, Epoch: step.GetEpochDelimiter().EpochNumber, IssueTime: step.GetEpochDelimiter().Timestamp, }, Encoding: nil}, Timestamp: step.GetEpochDelimiter().Timestamp, }, Encoding: nil} teh.Head.UpdateEncoding() teh.UpdateEncoding() if rs.PreviousSummaryHash == nil { rs.PreviousSummaryHash = make([]byte, 64) } sha3.ShakeSum256(rs.PreviousSummaryHash[:], teh.Head.Encoding) wb.Put(tableEpochHeads(step.GetEpochDelimiter().EpochNumber), proto.MustMarshal(teh)) case *proto.KeyserverStep_ReplicaSigned: newSEH := step.GetReplicaSigned() epochNr := newSEH.Head.Head.Epoch // get epoch head tehBytes, err := ks.db.Get(tableEpochHeads(epochNr)) if err != nil { log.Panicf("get tableEpochHeads(%d): %s", epochNr, err) } // compare epoch head to signed epoch head if got, want := tehBytes, newSEH.Head.Encoding; !bytes.Equal(got, want) { log.Panicf("replica signed different head: wanted %x, got %x", want, got) } // insert all the new signatures into the ratifications table (there should // actually only be one) newSehBytes := proto.MustMarshal(newSEH) for id := range newSEH.Signatures { // the entry might already exist in the DB (if the proposals got // duplicated), but it doesn't matter wb.Put(tableRatifications(epochNr, id), newSehBytes) } deferredIO = func() { // First write to DB, *then* notify subscribers. That way, if subscribers // start listening before searching the DB, they're guaranteed to see the // signature: either it's already in the DB, or they'll get notified. If // the order was reversed, they could miss the notification but still not // see anything in the DB. ks.signatureBroadcast.Publish(epochNr, newSEH) } if epochNr != rs.LastEpochDelimiter.EpochNumber { break } if rs.ThisReplicaNeedsToSignLastEpoch && newSEH.Signatures[ks.replicaID] != nil { rs.ThisReplicaNeedsToSignLastEpoch = false ks.updateEpochProposer() // updateSignatureProposer should in general be called after writes // have been flushed to db, but given ThisReplicaNeedsToSignLast = // false we know that updateSignatureProposer will not access the db. ks.updateSignatureProposer() } // get all existing ratifications for this epoch allSignatures := make(map[uint64][]byte) existingRatifications, err := ks.allRatificationsForEpoch(epochNr) if err != nil { log.Panicf("allRatificationsForEpoch(%d): %s", epochNr, err) } for _, seh := range existingRatifications { for id, sig := range seh.Signatures { allSignatures[id] = sig } } // check whether the epoch was already ratified wasRatified := coname.VerifyPolicy(ks.serverAuthorized, tehBytes, allSignatures) if wasRatified { break } for id, sig := range newSEH.Signatures { allSignatures[id] = sig } // check whether the epoch has now become ratified nowRatified := coname.VerifyPolicy(ks.serverAuthorized, tehBytes, allSignatures) if !nowRatified { break } if !rs.LastEpochNeedsRatification { log.Panicf("%x: thought last epoch was not already ratified, but it was", ks.replicaID) } rs.LastEpochNeedsRatification = false ks.updateEpochProposer() var teh proto.EncodedTimestampedEpochHead err = teh.Unmarshal(tehBytes) if err != nil { log.Panicf("invalid epoch head %d (%x): %s", epochNr, tehBytes, err) } allSignaturesSEH := &proto.SignedEpochHead{ Head: teh, Signatures: allSignatures, } oldDeferredIO := deferredIO deferredSendEpoch := ks.verifierLogAppend(&proto.VerifierStep{Type: &proto.VerifierStep_Epoch{Epoch: allSignaturesSEH}}, rs, wb) deferredSendUpdates := []func(){} iter := ks.db.NewIterator(kv.BytesPrefix([]byte{tableUpdatesPendingRatificationPrefix})) defer iter.Release() for iter.Next() { update := &proto.SignedEntryUpdate{} err := update.Unmarshal(iter.Value()) if err != nil { log.Panicf("invalid pending update %x: %s", iter.Value(), err) } deferredSendUpdates = append(deferredSendUpdates, ks.verifierLogAppend(&proto.VerifierStep{Type: &proto.VerifierStep_Update{Update: update}}, rs, wb)) wb.Delete(iter.Key()) } deferredIO = func() { oldDeferredIO() // First, send the ratified epoch to verifiers deferredSendEpoch() // Then send updates that were waiting for that epoch to go out for _, f := range deferredSendUpdates { f() } } case *proto.KeyserverStep_VerifierSigned: rNew := step.GetVerifierSigned() for id := range rNew.Signatures { // Note: The signature *must* have been authenticated before being inserted // into the log, or else verifiers could just trample over everyone else's // signatures, including our own. dbkey := tableRatifications(rNew.Head.Head.Epoch, id) wb.Put(dbkey, proto.MustMarshal(rNew)) } ks.wr.Notify(step.UID, nil) return func() { // As above, first write to DB, *then* notify subscribers. ks.signatureBroadcast.Publish(rNew.Head.Head.Epoch, rNew) } default: log.Panicf("unknown step pb in replicated log: %#v", step) } return }