/// <exception cref="System.Exception"/> public virtual void TestQuorums() { IDictionary <string, SettableFuture <string> > futures = ImmutableMap.Of("f1", SettableFuture .Create <string>(), "f2", SettableFuture.Create <string>(), "f3", SettableFuture.Create <string>()); QuorumCall <string, string> q = QuorumCall.Create(futures); NUnit.Framework.Assert.AreEqual(0, q.CountResponses()); futures["f1"].Set("first future"); q.WaitFor(1, 0, 0, 100000, "test"); // wait for 1 response q.WaitFor(0, 1, 0, 100000, "test"); // wait for 1 success NUnit.Framework.Assert.AreEqual(1, q.CountResponses()); futures["f2"].SetException(new Exception("error")); NUnit.Framework.Assert.AreEqual(2, q.CountResponses()); futures["f3"].Set("second future"); q.WaitFor(3, 0, 100, 100000, "test"); // wait for 3 responses q.WaitFor(0, 2, 100, 100000, "test"); // 2 successes NUnit.Framework.Assert.AreEqual(3, q.CountResponses()); NUnit.Framework.Assert.AreEqual("f1=first future,f3=second future", Joiner.On("," ).WithKeyValueSeparator("=").Join(new SortedDictionary <string, string>(q.GetResults ()))); try { q.WaitFor(0, 4, 100, 10, "test"); NUnit.Framework.Assert.Fail("Didn't time out waiting for more responses than came back" ); } catch (TimeoutException) { } }
/// <summary>Wait for a quorum of loggers to respond to the given call.</summary> /// <remarks> /// Wait for a quorum of loggers to respond to the given call. If a quorum /// can't be achieved, throws a QuorumException. /// </remarks> /// <param name="q">the quorum call</param> /// <param name="timeoutMs">the number of millis to wait</param> /// <param name="operationName">textual description of the operation, for logging</param> /// <returns>a map of successful results</returns> /// <exception cref="QuorumException">if a quorum doesn't respond with success</exception> /// <exception cref="System.IO.IOException">if the thread is interrupted or times out /// </exception> internal virtual IDictionary <AsyncLogger, V> WaitForWriteQuorum <V>(QuorumCall <AsyncLogger , V> q, int timeoutMs, string operationName) { int majority = GetMajoritySize(); try { q.WaitFor(loggers.Count, majority, majority, timeoutMs, operationName); } catch (Exception) { // either all respond // or we get a majority successes // or we get a majority failures, Sharpen.Thread.CurrentThread().Interrupt(); throw new IOException("Interrupted waiting " + timeoutMs + "ms for a " + "quorum of nodes to respond." ); } catch (TimeoutException) { throw new IOException("Timed out waiting " + timeoutMs + "ms for a " + "quorum of nodes to respond." ); } if (q.CountSuccesses() < majority) { q.RethrowException("Got too many exceptions to achieve quorum size " + GetMajorityString ()); } return(q.GetResults()); }
/// <exception cref="System.IO.IOException"/> public override void FinalizeLogSegment(long firstTxId, long lastTxId) { QuorumCall <AsyncLogger, Void> q = loggers.FinalizeLogSegment(firstTxId, lastTxId); loggers.WaitForWriteQuorum(q, finalizeSegmentTimeoutMs, string.Format("finalizeLogSegment(%s-%s)" , firstTxId, lastTxId)); }
/// <exception cref="System.IO.IOException"/> public virtual bool HasSomeData() { QuorumCall <AsyncLogger, bool> call = loggers.IsFormatted(); try { call.WaitFor(loggers.Size(), 0, 0, HasdataTimeoutMs, "hasSomeData"); } catch (Exception) { throw new IOException("Interrupted while determining if JNs have data"); } catch (TimeoutException) { throw new IOException("Timed out waiting for response from loggers"); } if (call.CountExceptions() > 0) { call.RethrowException("Unable to check if JNs are ready for formatting"); } // If any of the loggers returned with a non-empty manifest, then // we should prompt for format. foreach (bool hasData in call.GetResults().Values) { if (hasData) { return(true); } } // Otherwise, none were formatted, we can safely format. return(false); }
/// <exception cref="System.IO.IOException"/> public virtual void SelectInputStreams(ICollection <EditLogInputStream> streams, long fromTxnId, bool inProgressOk) { QuorumCall <AsyncLogger, RemoteEditLogManifest> q = loggers.GetEditLogManifest(fromTxnId , inProgressOk); IDictionary <AsyncLogger, RemoteEditLogManifest> resps = loggers.WaitForWriteQuorum (q, selectInputStreamsTimeoutMs, "selectInputStreams"); Log.Debug("selectInputStream manifests:\n" + Joiner.On("\n").WithKeyValueSeparator (": ").Join(resps)); PriorityQueue <EditLogInputStream> allStreams = new PriorityQueue <EditLogInputStream >(64, JournalSet.EditLogInputStreamComparator); foreach (KeyValuePair <AsyncLogger, RemoteEditLogManifest> e in resps) { AsyncLogger logger = e.Key; RemoteEditLogManifest manifest = e.Value; foreach (RemoteEditLog remoteLog in manifest.GetLogs()) { Uri url = logger.BuildURLToFetchLogs(remoteLog.GetStartTxId()); EditLogInputStream elis = EditLogFileInputStream.FromUrl(connectionFactory, url, remoteLog.GetStartTxId(), remoteLog.GetEndTxId(), remoteLog.IsInProgress()); allStreams.AddItem(elis); } } JournalSet.ChainAndMakeRedundantStreams(streams, allStreams, fromTxnId); }
/// <exception cref="System.IO.IOException"/> public override void RecoverUnfinalizedSegments() { Preconditions.CheckState(!isActiveWriter, "already active writer"); Log.Info("Starting recovery process for unclosed journal segments..."); IDictionary <AsyncLogger, QJournalProtocolProtos.NewEpochResponseProto> resps = CreateNewUniqueEpoch (); Log.Info("Successfully started new epoch " + loggers.GetEpoch()); if (Log.IsDebugEnabled()) { Log.Debug("newEpoch(" + loggers.GetEpoch() + ") responses:\n" + QuorumCall.MapToString (resps)); } long mostRecentSegmentTxId = long.MinValue; foreach (QJournalProtocolProtos.NewEpochResponseProto r in resps.Values) { if (r.HasLastSegmentTxId()) { mostRecentSegmentTxId = Math.Max(mostRecentSegmentTxId, r.GetLastSegmentTxId()); } } // On a completely fresh system, none of the journals have any // segments, so there's nothing to recover. if (mostRecentSegmentTxId != long.MinValue) { RecoverUnclosedSegment(mostRecentSegmentTxId); } isActiveWriter = true; }
/// <exception cref="System.IO.IOException"/> protected internal override void FlushAndSync(bool durable) { int numReadyBytes = buf.CountReadyBytes(); if (numReadyBytes > 0) { int numReadyTxns = buf.CountReadyTxns(); long firstTxToFlush = buf.GetFirstReadyTxId(); System.Diagnostics.Debug.Assert(numReadyTxns > 0); // Copy from our double-buffer into a new byte array. This is for // two reasons: // 1) The IPC code has no way of specifying to send only a slice of // a larger array. // 2) because the calls to the underlying nodes are asynchronous, we // need a defensive copy to avoid accidentally mutating the buffer // before it is sent. DataOutputBuffer bufToSend = new DataOutputBuffer(numReadyBytes); buf.FlushTo(bufToSend); System.Diagnostics.Debug.Assert(bufToSend.GetLength() == numReadyBytes); byte[] data = bufToSend.GetData(); System.Diagnostics.Debug.Assert(data.Length == bufToSend.GetLength()); QuorumCall <AsyncLogger, Void> qcall = loggers.SendEdits(segmentTxId, firstTxToFlush , numReadyTxns, data); loggers.WaitForWriteQuorum(qcall, writeTimeoutMs, "sendEdits"); // Since we successfully wrote this batch, let the loggers know. Any future // RPCs will thus let the loggers know of the most recent transaction, even // if a logger has fallen behind. loggers.SetCommittedTxId(firstTxToFlush + numReadyTxns - 1); } }
public virtual QuorumCall <AsyncLogger, bool> IsFormatted() { IDictionary <AsyncLogger, ListenableFuture <bool> > calls = Maps.NewHashMap(); foreach (AsyncLogger logger in loggers) { calls[logger] = logger.IsFormatted(); } return(QuorumCall.Create(calls)); }
public virtual QuorumCall <AsyncLogger, Void> DiscardSegments(long startTxId) { IDictionary <AsyncLogger, ListenableFuture <Void> > calls = Maps.NewHashMap(); foreach (AsyncLogger logger in loggers) { ListenableFuture <Void> future = logger.DiscardSegments(startTxId); calls[logger] = future; } return(QuorumCall.Create(calls)); }
internal virtual QuorumCall <AsyncLogger, Void> Format(NamespaceInfo nsInfo) { IDictionary <AsyncLogger, ListenableFuture <Void> > calls = Maps.NewHashMap(); foreach (AsyncLogger logger in loggers) { ListenableFuture <Void> future = logger.Format(nsInfo); calls[logger] = future; } return(QuorumCall.Create(calls)); }
public virtual QuorumCall <AsyncLogger, Void> FinalizeLogSegment(long firstTxId, long lastTxId) { IDictionary <AsyncLogger, ListenableFuture <Void> > calls = Maps.NewHashMap(); foreach (AsyncLogger logger in loggers) { calls[logger] = logger.FinalizeLogSegment(firstTxId, lastTxId); } return(QuorumCall.Create(calls)); }
public virtual QuorumCall <AsyncLogger, Void> StartLogSegment(long txid, int layoutVersion ) { IDictionary <AsyncLogger, ListenableFuture <Void> > calls = Maps.NewHashMap(); foreach (AsyncLogger logger in loggers) { calls[logger] = logger.StartLogSegment(txid, layoutVersion); } return(QuorumCall.Create(calls)); }
> NewEpoch(NamespaceInfo nsInfo, long epoch) { IDictionary <AsyncLogger, ListenableFuture <QJournalProtocolProtos.NewEpochResponseProto > > calls = Maps.NewHashMap(); foreach (AsyncLogger logger in loggers) { calls[logger] = logger.NewEpoch(epoch); } return(QuorumCall.Create(calls)); }
> GetJournalState() { IDictionary <AsyncLogger, ListenableFuture <QJournalProtocolProtos.GetJournalStateResponseProto > > calls = Maps.NewHashMap(); foreach (AsyncLogger logger in loggers) { calls[logger] = logger.GetJournalState(); } return(QuorumCall.Create(calls)); }
public virtual QuorumCall <AsyncLogger, long> GetJournalCTime() { IDictionary <AsyncLogger, ListenableFuture <long> > calls = Maps.NewHashMap(); foreach (AsyncLogger logger in loggers) { ListenableFuture <long> future = logger.GetJournalCTime(); calls[logger] = future; } return(QuorumCall.Create(calls)); }
public virtual QuorumCall <AsyncLogger, Void> DoRollback() { IDictionary <AsyncLogger, ListenableFuture <Void> > calls = Maps.NewHashMap(); foreach (AsyncLogger logger in loggers) { ListenableFuture <Void> future = logger.DoRollback(); calls[logger] = future; } return(QuorumCall.Create(calls)); }
/// <exception cref="System.IO.IOException"/> public override EditLogOutputStream StartLogSegment(long txId, int layoutVersion) { Preconditions.CheckState(isActiveWriter, "must recover segments before starting a new one" ); QuorumCall <AsyncLogger, Void> q = loggers.StartLogSegment(txId, layoutVersion); loggers.WaitForWriteQuorum(q, startSegmentTimeoutMs, "startLogSegment(" + txId + ")"); return(new QuorumOutputStream(loggers, txId, outputBufferCapacity, writeTxnsTimeoutMs )); }
internal virtual QuorumCall <AsyncLogger, Void> AcceptRecovery(QJournalProtocolProtos.SegmentStateProto log, Uri fromURL) { IDictionary <AsyncLogger, ListenableFuture <Void> > calls = Maps.NewHashMap(); foreach (AsyncLogger logger in loggers) { ListenableFuture <Void> future = logger.AcceptRecovery(log, fromURL); calls[logger] = future; } return(QuorumCall.Create(calls)); }
public virtual QuorumCall <AsyncLogger, Void> SendEdits(long segmentTxId, long firstTxnId , int numTxns, byte[] data) { IDictionary <AsyncLogger, ListenableFuture <Void> > calls = Maps.NewHashMap(); foreach (AsyncLogger logger in loggers) { ListenableFuture <Void> future = logger.SendEdits(segmentTxId, firstTxnId, numTxns , data); calls[logger] = future; } return(QuorumCall.Create(calls)); }
> PrepareRecovery(long segmentTxId) { IDictionary <AsyncLogger, ListenableFuture <QJournalProtocolProtos.PrepareRecoveryResponseProto > > calls = Maps.NewHashMap(); foreach (AsyncLogger logger in loggers) { ListenableFuture <QJournalProtocolProtos.PrepareRecoveryResponseProto> future = logger .PrepareRecovery(segmentTxId); calls[logger] = future; } return(QuorumCall.Create(calls)); }
public virtual QuorumCall <AsyncLogger, bool> CanRollBack(StorageInfo storage, StorageInfo prevStorage, int targetLayoutVersion) { IDictionary <AsyncLogger, ListenableFuture <bool> > calls = Maps.NewHashMap(); foreach (AsyncLogger logger in loggers) { ListenableFuture <bool> future = logger.CanRollBack(storage, prevStorage, targetLayoutVersion ); calls[logger] = future; } return(QuorumCall.Create(calls)); }
public virtual QuorumCall <AsyncLogger, RemoteEditLogManifest> GetEditLogManifest( long fromTxnId, bool inProgressOk) { IDictionary <AsyncLogger, ListenableFuture <RemoteEditLogManifest> > calls = Maps.NewHashMap (); foreach (AsyncLogger logger in loggers) { ListenableFuture <RemoteEditLogManifest> future = logger.GetEditLogManifest(fromTxnId , inProgressOk); calls[logger] = future; } return(QuorumCall.Create(calls)); }
/// <exception cref="System.IO.IOException"/> public override bool CanRollBack(StorageInfo storage, StorageInfo prevStorage, int targetLayoutVersion) { QuorumCall <AsyncLogger, bool> call = loggers.CanRollBack(storage, prevStorage, targetLayoutVersion ); try { call.WaitFor(loggers.Size(), loggers.Size(), 0, CanRollBackTimeoutMs, "lockSharedStorage" ); if (call.CountExceptions() > 0) { call.RethrowException("Could not check if roll back possible for" + " one or more JournalNodes" ); } // Either they all return the same thing or this call fails, so we can // just return the first result. try { DFSUtil.AssertAllResultsEqual(call.GetResults().Values); } catch (Exception ae) { throw new IOException("Results differed for canRollBack", ae); } foreach (bool result in call.GetResults().Values) { return(result); } } catch (Exception) { throw new IOException("Interrupted waiting for lockSharedStorage() " + "response" ); } catch (TimeoutException) { throw new IOException("Timed out waiting for lockSharedStorage() " + "response"); } throw new Exception("Unreachable code."); }
/// <exception cref="System.IO.IOException"/> public override void DoRollback() { QuorumCall <AsyncLogger, Void> call = loggers.DoRollback(); try { call.WaitFor(loggers.Size(), loggers.Size(), 0, RollBackTimeoutMs, "doRollback"); if (call.CountExceptions() > 0) { call.RethrowException("Could not perform rollback of one or more JournalNodes"); } } catch (Exception) { throw new IOException("Interrupted waiting for doFinalize() response"); } catch (TimeoutException) { throw new IOException("Timed out waiting for doFinalize() response"); } }
/// <exception cref="System.IO.IOException"/> public override void DoUpgrade(Storage storage) { QuorumCall <AsyncLogger, Void> call = loggers.DoUpgrade(storage); try { call.WaitFor(loggers.Size(), loggers.Size(), 0, UpgradeTimeoutMs, "doUpgrade"); if (call.CountExceptions() > 0) { call.RethrowException("Could not perform upgrade of one or more JournalNodes"); } } catch (Exception) { throw new IOException("Interrupted waiting for doUpgrade() response"); } catch (TimeoutException) { throw new IOException("Timed out waiting for doUpgrade() response"); } }
/// <exception cref="System.IO.IOException"/> public override void Format(NamespaceInfo nsInfo) { QuorumCall <AsyncLogger, Void> call = loggers.Format(nsInfo); try { call.WaitFor(loggers.Size(), loggers.Size(), 0, FormatTimeoutMs, "format"); } catch (Exception) { throw new IOException("Interrupted waiting for format() response"); } catch (TimeoutException) { throw new IOException("Timed out waiting for format() response"); } if (call.CountExceptions() > 0) { call.RethrowException("Could not format one or more JournalNodes"); } }
/// <exception cref="System.IO.IOException"/> public override void DiscardSegments(long startTxId) { QuorumCall <AsyncLogger, Void> call = loggers.DiscardSegments(startTxId); try { call.WaitFor(loggers.Size(), loggers.Size(), 0, DiscardSegmentsTimeoutMs, "discardSegments" ); if (call.CountExceptions() > 0) { call.RethrowException("Could not perform discardSegments of one or more JournalNodes" ); } } catch (Exception) { throw new IOException("Interrupted waiting for discardSegments() response"); } catch (TimeoutException) { throw new IOException("Timed out waiting for discardSegments() response"); } }
/// <exception cref="System.IO.IOException"/> public override long GetJournalCTime() { QuorumCall <AsyncLogger, long> call = loggers.GetJournalCTime(); try { call.WaitFor(loggers.Size(), loggers.Size(), 0, GetJournalCtimeTimeoutMs, "getJournalCTime" ); if (call.CountExceptions() > 0) { call.RethrowException("Could not journal CTime for one " + "more JournalNodes"); } // Either they all return the same thing or this call fails, so we can // just return the first result. try { DFSUtil.AssertAllResultsEqual(call.GetResults().Values); } catch (Exception ae) { throw new IOException("Results differed for getJournalCTime", ae); } foreach (long result in call.GetResults().Values) { return(result); } } catch (Exception) { throw new IOException("Interrupted waiting for getJournalCTime() " + "response"); } catch (TimeoutException) { throw new IOException("Timed out waiting for getJournalCTime() " + "response"); } throw new Exception("Unreachable code."); }
/// <summary>Run recovery/synchronization for a specific segment.</summary> /// <remarks> /// Run recovery/synchronization for a specific segment. /// Postconditions: /// <ul> /// <li>This segment will be finalized on a majority /// of nodes.</li> /// <li>All nodes which contain the finalized segment will /// agree on the length.</li> /// </ul> /// </remarks> /// <param name="segmentTxId">the starting txid of the segment</param> /// <exception cref="System.IO.IOException"/> private void RecoverUnclosedSegment(long segmentTxId) { Preconditions.CheckArgument(segmentTxId > 0); Log.Info("Beginning recovery of unclosed segment starting at txid " + segmentTxId ); // Step 1. Prepare recovery QuorumCall <AsyncLogger, QJournalProtocolProtos.PrepareRecoveryResponseProto> prepare = loggers.PrepareRecovery(segmentTxId); IDictionary <AsyncLogger, QJournalProtocolProtos.PrepareRecoveryResponseProto> prepareResponses = loggers.WaitForWriteQuorum(prepare, prepareRecoveryTimeoutMs, "prepareRecovery(" + segmentTxId + ")"); Log.Info("Recovery prepare phase complete. Responses:\n" + QuorumCall.MapToString (prepareResponses)); // Determine the logger who either: // a) Has already accepted a previous proposal that's higher than any // other // // OR, if no such logger exists: // // b) Has the longest log starting at this transaction ID // TODO: we should collect any "ties" and pass the URL for all of them // when syncing, so we can tolerate failure during recovery better. KeyValuePair <AsyncLogger, QJournalProtocolProtos.PrepareRecoveryResponseProto> bestEntry = Sharpen.Collections.Max(prepareResponses, SegmentRecoveryComparator.Instance); AsyncLogger bestLogger = bestEntry.Key; QJournalProtocolProtos.PrepareRecoveryResponseProto bestResponse = bestEntry.Value; // Log the above decision, check invariants. if (bestResponse.HasAcceptedInEpoch()) { Log.Info("Using already-accepted recovery for segment " + "starting at txid " + segmentTxId + ": " + bestEntry); } else { if (bestResponse.HasSegmentState()) { Log.Info("Using longest log: " + bestEntry); } else { // None of the responses to prepareRecovery() had a segment at the given // txid. This can happen for example in the following situation: // - 3 JNs: JN1, JN2, JN3 // - writer starts segment 101 on JN1, then crashes before // writing to JN2 and JN3 // - during newEpoch(), we saw the segment on JN1 and decide to // recover segment 101 // - before prepare(), JN1 crashes, and we only talk to JN2 and JN3, // neither of which has any entry for this log. // In this case, it is allowed to do nothing for recovery, since the // segment wasn't started on a quorum of nodes. // Sanity check: we should only get here if none of the responses had // a log. This should be a postcondition of the recovery comparator, // but a bug in the comparator might cause us to get here. foreach (QJournalProtocolProtos.PrepareRecoveryResponseProto resp in prepareResponses .Values) { System.Diagnostics.Debug.Assert(!resp.HasSegmentState(), "One of the loggers had a response, but no best logger " + "was found."); } Log.Info("None of the responders had a log to recover: " + QuorumCall.MapToString (prepareResponses)); return; } } QJournalProtocolProtos.SegmentStateProto logToSync = bestResponse.GetSegmentState (); System.Diagnostics.Debug.Assert(segmentTxId == logToSync.GetStartTxId()); // Sanity check: none of the loggers should be aware of a higher // txid than the txid we intend to truncate to foreach (KeyValuePair <AsyncLogger, QJournalProtocolProtos.PrepareRecoveryResponseProto > e in prepareResponses) { AsyncLogger logger = e.Key; QJournalProtocolProtos.PrepareRecoveryResponseProto resp = e.Value; if (resp.HasLastCommittedTxId() && resp.GetLastCommittedTxId() > logToSync.GetEndTxId ()) { throw new Exception("Decided to synchronize log to " + logToSync + " but logger " + logger + " had seen txid " + resp.GetLastCommittedTxId() + " committed"); } } Uri syncFromUrl = bestLogger.BuildURLToFetchLogs(segmentTxId); QuorumCall <AsyncLogger, Void> accept = loggers.AcceptRecovery(logToSync, syncFromUrl ); loggers.WaitForWriteQuorum(accept, acceptRecoveryTimeoutMs, "acceptRecovery(" + TextFormat .ShortDebugString(logToSync) + ")"); // If one of the loggers above missed the synchronization step above, but // we send a finalize() here, that's OK. It validates the log before // finalizing. Hence, even if it is not "in sync", it won't incorrectly // finalize. QuorumCall <AsyncLogger, Void> finalize = loggers.FinalizeLogSegment(logToSync.GetStartTxId (), logToSync.GetEndTxId()); loggers.WaitForWriteQuorum(finalize, finalizeSegmentTimeoutMs, string.Format("finalizeLogSegment(%s-%s)" , logToSync.GetStartTxId(), logToSync.GetEndTxId())); }