k-choices error handling: detect empty joins, finer fail granularity

This commit is contained in:
Trolli Schmittlauch 2020-09-25 02:03:42 +02:00
parent 3b6d129bfc
commit 7a87d86c32

View file

@ -221,7 +221,9 @@ kChoicesNodeJoin nodeSTM bootstrapNode = do
-- edge case: however small the target is, at least join 1 vs
-- kCoicesVsJoin until target is met
joinedVss <- vsJoins vs0STM (totalCapacity ownLoadStats) (vservers node) joinLoadTarget (fromIntegral initialJoins) nodeSTM
liftIO . atomically . modifyTVar' nodeSTM $ \node' -> node'
if HMap.null joinedVss
then throwError "k-choices join unsuccessful, no vserver joined"
else liftIO . atomically . modifyTVar' nodeSTM $ \node' -> node'
{ vservers = HMap.union (vservers node') joinedVss }
pure ()
@ -231,13 +233,14 @@ kChoicesNodeJoin nodeSTM bootstrapNode = do
vsJoins _ _ vsmap _ 0 _ = pure vsmap
vsJoins queryVsSTM capacity vsmap remainingTargetLoad remainingJoins nodeSTM'
| remainingTargetLoad <= 0 = pure vsmap
| otherwise = (do
| otherwise = do
(acquiredLoad, (newNid, newVs)) <- kChoicesVsJoin queryVsSTM bootstrapNode capacity vsmap nodeSTM' remainingTargetLoad
-- on successful vserver join add the new VS to node and recurse
vsJoins queryVsSTM capacity (HMap.insert newNid newVs vsmap) (remainingTargetLoad - acquiredLoad) (pred remainingJoins) nodeSTM'
)
-- TODO: decide on whether and how to catch errors
-- on error, just reduce the amount of tries and retry
`catchError` (\e -> liftIO (putStrLn e) >> vsJoins queryVsSTM capacity vsmap remainingTargetLoad (pred remainingJoins) nodeSTM')
-- error cause 1: not a single queried node has responded -> indicates permanent failure
-- error cause 2: only a certain join failed, just ignore that join target for now, but problem: it will be the chosen
-- target even at the next attempt again
@ -284,6 +287,8 @@ kChoicesVsJoin queryVsSTM bootstrapNode capacity activeVss nodeSTM remainingTarg
$ segmentLoads
-- join at min cost
joinedNode <- fediChordJoinNewVs nodeSTM vsId toJoinOn
-- idea: a single join failure shall not make the whole process fail
--`catchError`
pure (mincost, joinedNode)
-- Possible optimisation: