In open_revs, do not count errors in quorum threshold calculation
authorNick Vatamaniuc <vatamane@apache.org>
Thu, 12 Jan 2017 18:50:20 +0000 (13:50 -0500)
committerNick Vatamaniuc <vatamane@apache.org>
Fri, 20 Jan 2017 15:56:14 +0000 (10:56 -0500)
Previously quorum check looked just at the number of replies and decided quorum
was met even if it only received errors. For example, if 2 nodes are in
maintance mode it might receive this sequence of replies:

  `rexi_EXIT, rexi_EXIT, ok`

In that case after the first two it would decide quorum (r=2) was met, return
what it had so far ([]) and kill the remaining worker, who was about to return
a valid revision.

The fix is to keep track of error replies and subtract them when deciding if
quorum was met.

COUCHDB-3271

src/fabric_doc_open_revs.erl

index 573cda7..5393f92 100644 (file)
@@ -24,6 +24,7 @@
     worker_count,
     workers,
     reply_count = 0,
+    reply_error_count = 0,
     r,
     revs,
     latest,
@@ -60,13 +61,15 @@ go(DbName, Id, Revs, Options) ->
 
 handle_message({rexi_DOWN, _, {_,NodeRef},_}, _Worker, #state{workers=Workers}=State) ->
     NewState = State#state{
-        workers = lists:keydelete(NodeRef, #shard.node, Workers)
+        workers = lists:keydelete(NodeRef, #shard.node, Workers),
+        reply_error_count = State#state.reply_error_count + 1
     },
     handle_message({ok, []}, nil, NewState);
 
 handle_message({rexi_EXIT, _}, Worker, #state{workers=Workers}=State) ->
     NewState = State#state{
-        workers = lists:delete(Worker, Workers)
+        workers = lists:delete(Worker, Workers),
+        reply_error_count = State#state.reply_error_count + 1
     },
     handle_message({ok, []}, nil, NewState);
 
@@ -80,18 +83,22 @@ handle_message({ok, RawReplies}, Worker, State) ->
         r = R,
         revs = Revs,
         latest = Latest,
-        repair = InRepair
+        repair = InRepair,
+        reply_error_count = ReplyErrorCount
     } = State,
 
     IsTree = Revs == all orelse Latest,
 
+    % Do not count error replies when checking quorum
+    QuorumReplies = ReplyCount + 1 - ReplyErrorCount >= R,
+
     {NewReplies, QuorumMet, Repair} = case IsTree of
         true ->
             {NewReplies0, AllInternal, Repair0} =
                     tree_replies(PrevReplies, tree_sort(RawReplies)),
             NumLeafs = couch_key_tree:count_leafs(PrevReplies),
             SameNumRevs = length(RawReplies) == NumLeafs,
-            QMet = AllInternal andalso SameNumRevs andalso ReplyCount + 1 >= R,
+            QMet = AllInternal andalso SameNumRevs andalso QuorumReplies,
             {NewReplies0, QMet, Repair0};
         false ->
             {NewReplies0, MinCount} = dict_replies(PrevReplies, RawReplies),
@@ -306,6 +313,7 @@ open_doc_revs_test_() ->
             check_ancestor_counted_in_quorum(),
             check_not_found_counts_for_descendant(),
             check_worker_error_skipped(),
+            check_quorum_only_counts_valid_responses(),
             check_not_found_replies_are_removed_when_doc_found(),
             check_not_found_returned_when_one_of_docs_not_found(),
             check_not_found_returned_when_doc_not_found()
@@ -469,6 +477,20 @@ check_worker_error_skipped() ->
     end).
 
 
+check_quorum_only_counts_valid_responses() ->
+    ?_test(begin
+        S0 = state0(revs(), true),
+        Msg1 = {rexi_EXIT, reason},
+        Msg2 = {rexi_EXIT, reason},
+        Msg3 = {ok, [foo1(), bar1(), baz1()]},
+        Expect = {stop, [bar1(), baz1(), foo1()]},
+
+        {ok, S1} = handle_message(Msg1, w1, S0),
+        {ok, S2} = handle_message(Msg2, w2, S1),
+        ?assertEqual(Expect, handle_message(Msg3, w3, S2))
+    end).
+
+
 check_not_found_replies_are_removed_when_doc_found() ->
     ?_test(begin
         Replies = replies_to_dict([foo1(), bar1(), fooNF()]),