Make key lookups fail after request exhaustion instead of providing default

Returning the own node as a default does not make sense in all contexts:
Especially for bootstrap joining this can be harmful, so signalling
instead that the lookup failed makes distinguishing on a case by case
basis possible.

Also contributes to #57
This commit is contained in:
Trolli Schmittlauch 2020-08-25 12:51:33 +02:00
parent 6c5e40f8ad
commit b23201a49c
2 changed files with 53 additions and 36 deletions

View file

@ -49,6 +49,8 @@ import Control.Concurrent.STM.TQueue
import Control.Concurrent.STM.TVar import Control.Concurrent.STM.TVar
import Control.Exception import Control.Exception
import Control.Monad (foldM, forM, forM_, void, when) import Control.Monad (foldM, forM, forM_, void, when)
import Control.Monad.IO.Class (MonadIO(..))
import Control.Monad.Except (MonadError(..), runExceptT)
import qualified Data.ByteString as BS import qualified Data.ByteString as BS
import Data.Either (rights) import Data.Either (rights)
import Data.Foldable (foldl', foldr') import Data.Foldable (foldl', foldr')
@ -533,9 +535,10 @@ requestJoin toJoinOn ownStateSTM = do
-- | Send a 'QueryID' 'Request' for getting the node that handles a certain key ID. -- | Send a 'QueryID' 'Request' for getting the node that handles a certain key ID.
requestQueryID :: LocalNodeState s -- ^ NodeState of the querying node requestQueryID :: (MonadIO m, MonadError String m)
=> LocalNodeState s -- ^ NodeState of the querying node
-> NodeID -- ^ target key ID to look up -> NodeID -- ^ target key ID to look up
-> IO RemoteNodeState -- ^ the node responsible for handling that key -> m RemoteNodeState -- ^ the node responsible for handling that key
-- 1. do a local lookup for the l closest nodes -- 1. do a local lookup for the l closest nodes
-- 2. create l sockets -- 2. create l sockets
-- 3. send a message async concurrently to all l nodes -- 3. send a message async concurrently to all l nodes
@ -543,23 +546,23 @@ requestQueryID :: LocalNodeState s -- ^ NodeState of the querying node
-- 5. repeat until FOUND (problem: new entries not necessarily already in cache, explicitly compare with closer results) -- 5. repeat until FOUND (problem: new entries not necessarily already in cache, explicitly compare with closer results)
-- TODO: deal with lookup failures -- TODO: deal with lookup failures
requestQueryID ns targetID = do requestQueryID ns targetID = do
firstCacheSnapshot <- readTVarIO . nodeCacheSTM $ ns firstCacheSnapshot <- liftIO . readTVarIO . nodeCacheSTM $ ns
-- TODO: make maxAttempts configurable -- TODO: make maxAttempts configurable
queryIdLookupLoop firstCacheSnapshot ns 50 targetID queryIdLookupLoop firstCacheSnapshot ns 50 targetID
-- | like 'requestQueryID, but allows passing of a custom cache, e.g. for joining -- | like 'requestQueryID, but allows passing of a custom cache, e.g. for joining
queryIdLookupLoop :: NodeCache -> LocalNodeState s -> Int -> NodeID -> IO RemoteNodeState queryIdLookupLoop :: (MonadIO m, MonadError String m) => NodeCache -> LocalNodeState s -> Int -> NodeID -> m RemoteNodeState
-- return node itself as default fallback value against infinite recursion. -- return node itself as default fallback value against infinite recursion.
-- TODO: consider using an Either instead of a default value -- TODO: consider using an Either instead of a default value
queryIdLookupLoop _ ns 0 _ = pure $ toRemoteNodeState ns queryIdLookupLoop _ ns 0 _ = throwError "exhausted maximum lookup attempts"
queryIdLookupLoop cacheSnapshot ns maxAttempts targetID = do queryIdLookupLoop cacheSnapshot ns maxAttempts targetID = do
let localResult = queryLocalCache ns cacheSnapshot (lNumBestNodes ns) targetID let localResult = queryLocalCache ns cacheSnapshot (lNumBestNodes ns) targetID
-- FOUND can only be returned if targetID is owned by local node -- FOUND can only be returned if targetID is owned by local node
case localResult of case localResult of
FOUND thisNode -> pure thisNode FOUND thisNode -> pure thisNode
FORWARD nodeSet -> do FORWARD nodeSet -> do
responseEntries <- sendQueryIdMessages targetID ns Nothing (remoteNode <$> Set.elems nodeSet) responseEntries <- liftIO $ sendQueryIdMessages targetID ns Nothing (remoteNode <$> Set.elems nodeSet)
now <- getPOSIXTime now <- liftIO getPOSIXTime
-- check for a FOUND and return it -- check for a FOUND and return it
case responseEntries of case responseEntries of
FOUND foundNode -> pure foundNode FOUND foundNode -> pure foundNode

View file

@ -166,6 +166,7 @@ fediChordBootstrapJoin nsSTM bootstrapNode = do
currentlyResponsible <- liftEither lookupResp currentlyResponsible <- liftEither lookupResp
liftIO . putStrLn $ "Trying to join on " <> show (getNid currentlyResponsible) liftIO . putStrLn $ "Trying to join on " <> show (getNid currentlyResponsible)
-- 2. then send a join to the currently responsible node -- 2. then send a join to the currently responsible node
liftIO $ putStrLn "send a bootstrap Join"
joinResult <- liftIO $ requestJoin currentlyResponsible nsSTM joinResult <- liftIO $ requestJoin currentlyResponsible nsSTM
liftEither joinResult liftEither joinResult
@ -244,26 +245,24 @@ bootstrapQueryId nsSTM (bootstrapHost, bootstrapPort) targetID = do
Just (FORWARD resultset) -> foldr' (addCacheEntryPure now) cacheAcc resultset Just (FORWARD resultset) -> foldr' (addCacheEntryPure now) cacheAcc resultset
) )
initCache resp initCache resp
currentlyResponsible <- queryIdLookupLoop bootstrapCache ns 50 $ getNid ns currentlyResponsible <- runExceptT $ queryIdLookupLoop bootstrapCache ns 50 $ getNid ns
pure $ Right currentlyResponsible pure currentlyResponsible
-- | join a node to the DHT using the global node cache -- | join a node to the DHT using the global node cache
-- node's position. -- node's position.
fediChordVserverJoin :: Service s (RealNodeSTM s) fediChordVserverJoin :: (MonadError String m, MonadIO m, Service s (RealNodeSTM s))
=> LocalNodeStateSTM s -- ^ the local 'NodeState' => LocalNodeStateSTM s -- ^ the local 'NodeState'
-> IO (Either String (LocalNodeStateSTM s)) -- ^ the joined 'NodeState' after a -> m (LocalNodeStateSTM s) -- ^ the joined 'NodeState' after a
-- successful join, otherwise an error message -- successful join, otherwise an error message
fediChordVserverJoin nsSTM = do fediChordVserverJoin nsSTM = do
ns <- readTVarIO nsSTM ns <- liftIO $ readTVarIO nsSTM
-- 1. get routed to the currently responsible node -- 1. get routed to the currently responsible node
currentlyResponsible <- requestQueryID ns $ getNid ns currentlyResponsible <- requestQueryID ns $ getNid ns
putStrLn $ "Trying to join on " <> show (getNid currentlyResponsible) liftIO . putStrLn $ "Trying to join on " <> show (getNid currentlyResponsible)
-- 2. then send a join to the currently responsible node -- 2. then send a join to the currently responsible node
joinResult <- requestJoin currentlyResponsible nsSTM joinResult <- liftIO $ requestJoin currentlyResponsible nsSTM
case joinResult of liftEither joinResult
Left err -> pure . Left $ "Error joining on " <> err
Right joinedNS -> pure . Right $ joinedNS
fediChordVserverLeave :: (MonadError String m, MonadIO m, Service s (RealNodeSTM s)) => LocalNodeState s -> m () fediChordVserverLeave :: (MonadError String m, MonadIO m, Service s (RealNodeSTM s)) => LocalNodeState s -> m ()
fediChordVserverLeave ns = do fediChordVserverLeave ns = do
@ -323,7 +322,7 @@ joinOnNewEntriesThread nsSTM = loop
pure () pure ()
-- otherwise try joining -- otherwise try joining
FORWARD _ -> do FORWARD _ -> do
joinResult <- fediChordVserverJoin nsSTM joinResult <- runExceptT $ fediChordVserverJoin nsSTM
either either
-- on join failure, sleep and retry -- on join failure, sleep and retry
-- TODO: make delay configurable -- TODO: make delay configurable
@ -504,18 +503,26 @@ stabiliseThread nsSTM = forever $ do
-- try looking up additional neighbours if list too short -- try looking up additional neighbours if list too short
forM_ [(length $ predecessors updatedNs)..(kNeighbours updatedNs)] (\_ -> do forM_ [(length $ predecessors updatedNs)..(kNeighbours updatedNs)] (\_ -> do
ns' <- readTVarIO nsSTM ns' <- readTVarIO nsSTM
nextEntry <- requestQueryID ns' $ pred . getNid $ lastDef (toRemoteNodeState ns') (predecessors ns') nextEntry <- runExceptT . requestQueryID ns' $ pred . getNid $ lastDef (toRemoteNodeState ns') (predecessors ns')
atomically $ do either
(const $ pure ())
(\entry -> atomically $ do
latestNs <- readTVar nsSTM latestNs <- readTVar nsSTM
writeTVar nsSTM $ addPredecessors [nextEntry] latestNs writeTVar nsSTM $ addPredecessors [entry] latestNs
)
nextEntry
) )
forM_ [(length $ successors updatedNs)..(kNeighbours updatedNs)] (\_ -> do forM_ [(length $ successors updatedNs)..(kNeighbours updatedNs)] (\_ -> do
ns' <- readTVarIO nsSTM ns' <- readTVarIO nsSTM
nextEntry <- requestQueryID ns' $ succ . getNid $ lastDef (toRemoteNodeState ns') (successors ns') nextEntry <- runExceptT . requestQueryID ns' $ succ . getNid $ lastDef (toRemoteNodeState ns') (successors ns')
atomically $ do either
(const $ pure ())
(\entry -> atomically $ do
latestNs <- readTVar nsSTM latestNs <- readTVar nsSTM
writeTVar nsSTM $ addSuccessors [nextEntry] latestNs writeTVar nsSTM $ addSuccessors [entry] latestNs
)
nextEntry
) )
newNs <- readTVarIO nsSTM newNs <- readTVarIO nsSTM
@ -638,7 +645,7 @@ requestMapPurge :: MVar RequestMap -> IO ()
requestMapPurge mapVar = forever $ do requestMapPurge mapVar = forever $ do
rMapState <- takeMVar mapVar rMapState <- takeMVar mapVar
now <- getPOSIXTime now <- getPOSIXTime
putMVar mapVar $ Map.filter (\entry@(RequestMapEntry _ _ ts) -> putMVar mapVar $ Map.filter (\(RequestMapEntry _ _ ts) ->
now - ts < responsePurgeAge now - ts < responsePurgeAge
) rMapState ) rMapState
threadDelay $ round responsePurgeAge * 2 * 10^6 threadDelay $ round responsePurgeAge * 2 * 10^6
@ -757,7 +764,7 @@ getKeyResponsibility nodeSTM lookupKey = do
-- new entry. -- new entry.
-- If no vserver is active in the DHT, 'Nothing' is returned. -- If no vserver is active in the DHT, 'Nothing' is returned.
updateLookupCache :: RealNodeSTM s -> NodeID -> IO (Maybe (String, PortNumber)) updateLookupCache :: RealNodeSTM s -> NodeID -> IO (Maybe (String, PortNumber))
updateLookupCache nodeSTM lookupKey = do updateLookupCache nodeSTM keyToLookup = do
(node, lookupSource) <- atomically $ do (node, lookupSource) <- atomically $ do
node <- readTVar nodeSTM node <- readTVar nodeSTM
let firstVs = headMay (vservers node) let firstVs = headMay (vservers node)
@ -767,18 +774,25 @@ updateLookupCache nodeSTM lookupKey = do
pure (node, lookupSource) pure (node, lookupSource)
maybe (do maybe (do
-- if no local node available, delete cache entry and return Nothing -- if no local node available, delete cache entry and return Nothing
atomically $ modifyTVar' (lookupCacheSTM node) $ Map.delete lookupKey atomically $ modifyTVar' (lookupCacheSTM node) $ Map.delete keyToLookup
pure Nothing pure Nothing
) )
(\n -> do (\n -> do
-- start a lookup from the node, update the cache with the lookup result and return it -- start a lookup from the node, update the cache with the lookup result and return it
newResponsible <- requestQueryID n lookupKey -- TODO: better retry management, because having no vserver joined yet should
let newEntry = (getDomain newResponsible, getServicePort newResponsible) -- be treated differently than other reasons for not getting a result.
newResponsible <- runExceptT $ requestQueryID n keyToLookup
either
(const $ pure Nothing)
(\result -> do
let newEntry = (getDomain result, getServicePort result)
now <- getPOSIXTime now <- getPOSIXTime
-- atomic update against lost updates -- atomic update against lost updates
atomically $ modifyTVar' (lookupCacheSTM node) $ atomically $ modifyTVar' (lookupCacheSTM node) $
Map.insert lookupKey (CacheEntry False newEntry now) Map.insert keyToLookup (CacheEntry False newEntry now)
pure $ Just newEntry pure $ Just newEntry
)
newResponsible
) lookupSource ) lookupSource