re-structure cacheVerifyThread to work on a RealNode and iterate over all joined vservers

contributes to #34
2020-09-21 02:11:43 +02:00 · 2020-09-21 02:11:43 +02:00 · 33ae904d17
commit 33ae904d17
parent 68de73d919
2 changed files with 135 additions and 116 deletions
--- a/src/Hash2Pub/DHTProtocol.hs
+++ b/src/Hash2Pub/DHTProtocol.hs
@ -269,7 +269,7 @@ handleIncomingRequest nsSTM sendQ msgSet sourceAddr = do
      Nothing -> pure ()
      Just aPart -> do
        let (SockAddrInet6 _ _ sourceIP _) = sourceAddr
-        queueAddEntries (Identity $ RemoteCacheEntry (sender aPart) now) ns
+        queueAddEntries (Identity $ RemoteCacheEntry (sender aPart) now) (cacheWriteQueue ns)
        -- distinguish on whether and how to respond. If responding, pass message to response generating function and write responses to send queue
        maybe (pure ()) (
            mapM_ (\resp -> atomically $ writeTQueue sendQ (resp, sourceAddr))
@ -542,7 +542,7 @@ requestJoin toJoinOn ownStateSTM = do
            writeTVar ownStateSTM newState
            pure (cacheInsertQ, newState)
        -- execute the cache insertions
-        mapM_ (\f -> f joinedState) cacheInsertQ
+        mapM_ (\f -> f (cacheWriteQueue joinedState)) cacheInsertQ
        if responses == Set.empty
                  then pure . Left $ "join error: got no response from " <> show (getNid toJoinOn)
           else do
@ -624,7 +624,7 @@ sendQueryIdMessages targetID ns lParam targets = do
                             _ -> Set.empty
            -- forward entries to global cache
-            queueAddEntries entrySet ns
+            queueAddEntries entrySet (cacheWriteQueue ns)
            -- return accumulated QueryResult
            pure $ case acc of
              -- once a FOUND as been encountered, return this as a result
@ -670,7 +670,7 @@ requestStabilise ns neighbour = do
                                                      )
                                                      ([],[]) respSet
            -- update successfully responded neighbour in cache
-            maybe (pure ()) (\p -> queueUpdateVerifieds (Identity $ senderID p) ns) $ headMay (Set.elems respSet)
+            maybe (pure ()) (\p -> queueUpdateVerifieds (Identity $ senderID p) (cacheWriteQueue ns)) $ headMay (Set.elems respSet)
            pure $ if null responsePreds && null responseSuccs
                      then Left "no neighbours returned"
                      else Right (responsePreds, responseSuccs)
@ -832,24 +832,24 @@ sendRequestTo timeoutMillis numAttempts msgIncomplete sock = do
 -- | enqueue a list of RemoteCacheEntries to be added to the global NodeCache
 queueAddEntries :: Foldable c => c RemoteCacheEntry
-                -> LocalNodeState s
+                -> TQueue (NodeCache -> NodeCache)
                -> IO ()
-queueAddEntries entries ns = do
+queueAddEntries entries cacheQ = do
    now <- getPOSIXTime
-    forM_ entries $ \entry -> atomically $ writeTQueue (cacheWriteQueue ns)  $ addCacheEntryPure now entry
+    forM_ entries $ \entry -> atomically $ writeTQueue cacheQ  $ addCacheEntryPure now entry
 -- | enque a list of node IDs to be deleted from the global NodeCache
 queueDeleteEntries :: Foldable c
                   => c NodeID
-                   -> LocalNodeState s
+                   -> TQueue (NodeCache -> NodeCache)
                   -> IO ()
-queueDeleteEntries ids ns = forM_ ids $ atomically . writeTQueue (cacheWriteQueue ns) . deleteCacheEntry
+queueDeleteEntries ids cacheQ = forM_ ids $ atomically . writeTQueue cacheQ . deleteCacheEntry
 -- | enque a single node ID to be deleted from the global NodeCache
 queueDeleteEntry :: NodeID
-                 -> LocalNodeState s
+                 -> TQueue (NodeCache -> NodeCache)
                 -> IO ()
 queueDeleteEntry toDelete = queueDeleteEntries $ Identity toDelete
@ -858,11 +858,11 @@ queueDeleteEntry toDelete = queueDeleteEntries $ Identity toDelete
 -- global 'NodeCache'.
 queueUpdateVerifieds :: Foldable c
                     => c NodeID
-                     -> LocalNodeState s
+                     -> TQueue (NodeCache -> NodeCache)
                     -> IO ()
-queueUpdateVerifieds nIds ns = do
+queueUpdateVerifieds nIds cacheQ = do
    now <- getPOSIXTime
-    forM_ nIds $ \nid' -> atomically $ writeTQueue (cacheWriteQueue ns) $
+    forM_ nIds $ \nid' -> atomically $ writeTQueue cacheQ $
        markCacheEntryAsVerified (Just now) nid'
 -- | retry an IO action at most *i* times until it delivers a result
--- a/src/Hash2Pub/FediChord.hs
+++ b/src/Hash2Pub/FediChord.hs
@ -366,59 +366,72 @@ nodeCacheWriter nodeSTM = do
 -- | Periodically iterate through cache, clean up expired entries and verify unverified ones
-nodeCacheVerifyThread :: LocalNodeStateSTM s -> IO ()
+nodeCacheVerifyThread :: RealNodeSTM s -> IO ()
-nodeCacheVerifyThread nsSTM = forever $ do
+nodeCacheVerifyThread nodeSTM = forever $ do
-    -- get cache
+    (node, firstVSSTM) <- atomically $ do
-    (ns, cache, maxEntryAge) <- atomically $ do
+        node <- readTVar nodeSTM
-        ns <- readTVar nsSTM
+        case headMay (HMap.elems $ vservers node) of
-        cache <- readTVar $ nodeCacheSTM ns
+          -- wait until first VS is joined
-        maxEntryAge <- confMaxNodeCacheAge . nodeConfig <$> readTVar (parentRealNode ns)
+          Nothing -> retry
-        pure (ns, cache, maxEntryAge)
+          Just vs' -> pure (node, vs')
    let
        maxEntryAge = confMaxNodeCacheAge $ nodeConfig node
        cacheQ = globalCacheWriteQueue node
    cache <- readTVarIO $ globalNodeCacheSTM node
    -- always use the first active VS as a sender for operations like Ping
    firstVS <- readTVarIO firstVSSTM
    -- iterate entries:
    -- for avoiding too many time syscalls, get current time before iterating.
    now <- getPOSIXTime
-    forM_ (nodeCacheEntries cache) (\(CacheEntry validated node ts) ->
+    forM_ (nodeCacheEntries cache) (\(CacheEntry validated cacheNode ts) ->
        -- case too old: delete (future work: decide whether pinging and resetting timestamp is better)
        if (now - ts) > maxEntryAge
           then
-           queueDeleteEntry (getNid node) ns
+           queueDeleteEntry (getNid cacheNode) cacheQ
-    -- case unverified: try verifying, otherwise delete
+        -- case unverified: try verifying, otherwise delete
           else if not validated
                then do
                    -- marking as verified is done by 'requestPing' as well
-                    pong <- requestPing ns node
+                    pong <- requestPing firstVS cacheNode
                    either (\_->
-                        queueDeleteEntry (getNid node) ns
+                        queueDeleteEntry (getNid cacheNode) cacheQ
                           )
                           (\vss ->
-                               if node `notElem` vss
+                               if cacheNode `notElem` vss
-                                  then queueDeleteEntry (getNid node) ns
+                                  then queueDeleteEntry (getNid cacheNode) firstVS
                                 -- after verifying a node, check whether it can be a closer neighbour
-                                 else do
+                                 -- do this for each node
-                                     if node `isPossiblePredecessor` ns
+                                 -- TODO: optimisation: place all LocalNodeStates on the cache ring and check whether any of them is the predecessor/ successor
                                 else forM_ (vservers node) (\nsSTM -> do
                                     ns <- readTVarIO nsSTM
                                     if cacheNode `isPossiblePredecessor` ns
                                        then atomically $ do
                                            ns' <- readTVar nsSTM
-                                            writeTVar nsSTM $ addPredecessors [node] ns'
+                                            writeTVar nsSTM $ addPredecessors [cacheNode] ns'
                                        else pure ()
-                                     if node `isPossibleSuccessor` ns
+                                     if cacheNode `isPossibleSuccessor` ns
                                        then atomically $ do
                                            ns' <- readTVar nsSTM
-                                            writeTVar nsSTM $ addSuccessors [node] ns'
+                                            writeTVar nsSTM $ addSuccessors [cacheNode] ns'
                                        else pure ()
                                                            )
                           ) pong
           else pure ()
                               )
    -- check the cache invariant per slice and, if necessary, do a single lookup to the
    -- middle of each slice not verifying the invariant
-    latestNs <- readTVarIO nsSTM
+    latestNode <- readTVarIO nodeSTM
-    latestCache <- readTVarIO $ nodeCacheSTM latestNs
+    forM_ (vservers latestNode) (\nsSTM -> do
-    let nodesToQuery targetID = case queryLocalCache latestNs latestCache (lNumBestNodes latestNs) targetID of
+        latestNs <- readTVarIO nsSTM
-                                  FOUND node -> [node]
+        latestCache <- readTVarIO $ nodeCacheSTM latestNs
-                                  FORWARD nodeSet -> remoteNode <$> Set.elems nodeSet
+        let nodesToQuery targetID = case queryLocalCache latestNs latestCache (lNumBestNodes latestNs) targetID of
-    forM_ (checkCacheSliceInvariants latestNs latestCache) (\targetID ->
+                                      FOUND node -> [node]
-        forkIO $ sendQueryIdMessages targetID latestNs (Just (1 + jEntriesPerSlice latestNs)) (nodesToQuery targetID) >> pure () -- ask for 1 entry more than j because of querying the middle
+                                      FORWARD nodeSet -> remoteNode <$> Set.elems nodeSet
-                                                             )
+        forM_ (checkCacheSliceInvariants latestNs latestCache) (\targetID ->
            forkIO $ sendQueryIdMessages targetID latestNs (Just (1 + jEntriesPerSlice latestNs)) (nodesToQuery targetID) >> pure () -- ask for 1 entry more than j because of querying the middle
                                                                 )
                                )
    threadDelay $ fromEnum (maxEntryAge / 20) `div` 10^6    -- convert from pico to milliseconds
@ -482,90 +495,93 @@ checkCacheSliceInvariants ns
 -- | Periodically send @StabiliseRequest' s to the closest neighbour nodes, until
 -- one responds, and get their neighbours for maintaining the own neighbour lists.
 -- If necessary, request new neighbours.
-stabiliseThread :: Service s (RealNodeSTM s) => LocalNodeStateSTM s -> IO ()
+stabiliseThread :: Service s (RealNodeSTM s) => RealNodeSTM s -> IO ()
-stabiliseThread nsSTM = forever $ do
+stabiliseThread nodeSTM = forever $ do
-    oldNs <- readTVarIO nsSTM
+    node <- readTVarIO nodeSTM
    forM_ (vservers node) (\nsSTM -> do
        oldNs <- readTVarIO nsSTM
-    -- iterate through the same snapshot, collect potential new neighbours
+        -- iterate through the same snapshot, collect potential new neighbours
-    -- and nodes to be deleted, and modify these changes only at the end of
+        -- and nodes to be deleted, and modify these changes only at the end of
-    -- each stabilise run.
+        -- each stabilise run.
-    -- This decision makes iterating through a potentially changing list easier.
+        -- This decision makes iterating through a potentially changing list easier.
-    -- don't contact all neighbours unless the previous one failed/ Left ed
+        -- don't contact all neighbours unless the previous one failed/ Left ed
-    predStabilise <- stabiliseClosestResponder oldNs predecessors 1 []
+        predStabilise <- stabiliseClosestResponder oldNs predecessors 1 []
-    succStabilise <- stabiliseClosestResponder oldNs predecessors 1 []
+        succStabilise <- stabiliseClosestResponder oldNs predecessors 1 []
    let
        (predDeletes, predNeighbours) = either (const ([], [])) id predStabilise
        (succDeletes, succNeighbours) = either (const ([], [])) id succStabilise
        allDeletes = predDeletes <> succDeletes
        allNeighbours = predNeighbours <> succNeighbours
    -- now actually modify the node state's neighbours
    updatedNs <- atomically $ do
        newerNsSnap <- readTVar nsSTM
        let
-            -- sorting and taking only k neighbours is taken care of by the
+            (predDeletes, predNeighbours) = either (const ([], [])) id predStabilise
-            -- setSuccessors/ setPredecessors functions
+            (succDeletes, succNeighbours) = either (const ([], [])) id succStabilise
-            newPreds = (predecessors newerNsSnap \\ allDeletes) <> allNeighbours
+            allDeletes = predDeletes <> succDeletes
-            newSuccs = (successors newerNsSnap \\ allDeletes) <> allNeighbours
+            allNeighbours = predNeighbours <> succNeighbours
            newNs = setPredecessors newPreds . setSuccessors newSuccs $ newerNsSnap
        writeTVar nsSTM newNs
        pure newNs
    -- delete unresponding nodes from cache as well
    mapM_ (atomically . writeTQueue (cacheWriteQueue updatedNs) . deleteCacheEntry . getNid) allDeletes
-    -- try looking up additional neighbours if list too short
+        -- now actually modify the node state's neighbours
-    forM_ [(length $ predecessors updatedNs)..(kNeighbours updatedNs)] (\_ -> do
+        updatedNs <- atomically $ do
-        ns' <- readTVarIO nsSTM
+            newerNsSnap <- readTVar nsSTM
-        nextEntry <- runExceptT . requestQueryID ns' $ pred . getNid $ lastDef (toRemoteNodeState ns') (predecessors ns')
+            let
-        either
+                -- sorting and taking only k neighbours is taken care of by the
-            (const $ pure ())
+                -- setSuccessors/ setPredecessors functions
-            (\entry -> atomically $ do
+                newPreds = (predecessors newerNsSnap \\ allDeletes) <> allNeighbours
-                latestNs <- readTVar nsSTM
+                newSuccs = (successors newerNsSnap \\ allDeletes) <> allNeighbours
-                writeTVar nsSTM $ addPredecessors [entry] latestNs
+                newNs = setPredecessors newPreds . setSuccessors newSuccs $ newerNsSnap
-            )
+            writeTVar nsSTM newNs
-            nextEntry
+            pure newNs
-                                                                       )
+        -- delete unresponding nodes from cache as well
        mapM_ (atomically . writeTQueue (cacheWriteQueue updatedNs) . deleteCacheEntry . getNid) allDeletes
-    forM_ [(length $ successors updatedNs)..(kNeighbours updatedNs)] (\_ -> do
+        -- try looking up additional neighbours if list too short
-        ns' <- readTVarIO nsSTM
+        forM_ [(length $ predecessors updatedNs)..(kNeighbours updatedNs)] (\_ -> do
-        nextEntry <- runExceptT . requestQueryID ns' $ succ . getNid $ lastDef (toRemoteNodeState ns') (successors ns')
+            ns' <- readTVarIO nsSTM
-        either
+            nextEntry <- runExceptT . requestQueryID ns' $ pred . getNid $ lastDef (toRemoteNodeState ns') (predecessors ns')
-            (const $ pure ())
+            either
-            (\entry -> atomically $ do
+                (const $ pure ())
-                latestNs <- readTVar nsSTM
+                (\entry -> atomically $ do
-                writeTVar nsSTM $ addSuccessors [entry] latestNs
+                    latestNs <- readTVar nsSTM
-            )
+                    writeTVar nsSTM $ addPredecessors [entry] latestNs
-            nextEntry
+                )
-                                                                     )
+                nextEntry
                                                                           )
-    newNs <- readTVarIO nsSTM
+        forM_ [(length $ successors updatedNs)..(kNeighbours updatedNs)] (\_ -> do
            ns' <- readTVarIO nsSTM
            nextEntry <- runExceptT . requestQueryID ns' $ succ . getNid $ lastDef (toRemoteNodeState ns') (successors ns')
            either
                (const $ pure ())
                (\entry -> atomically $ do
                    latestNs <- readTVar nsSTM
                    writeTVar nsSTM $ addSuccessors [entry] latestNs
                )
                nextEntry
                                                                         )
-    let
+        newNs <- readTVarIO nsSTM
        oldPredecessor = headDef (toRemoteNodeState oldNs) $ predecessors oldNs
        newPredecessor = headMay $ predecessors newNs
    -- manage need for service data migration:
    maybe (pure ()) (\newPredecessor' ->
        when (
            isJust newPredecessor
            && oldPredecessor /= newPredecessor'
        -- case: predecessor has changed in some way => own responsibility has changed in some way
        -- case 1: new predecessor is further away => broader responsibility, but new pred needs to push the data
        -- If this is due to a node leaving without transfering its data, try getting it from a redundant copy
        -- case 2: new predecessor is closer, it takes some of our data but somehow didn't join on us => push data to it
            && isInOwnResponsibilitySlice newPredecessor' oldNs) $ do
                ownService <- nodeService <$> (liftIO . readTVarIO $ parentRealNode newNs)
                migrationResult <- migrateData ownService (getNid newNs) (getNid oldPredecessor) (getNid newPredecessor') (getDomain newPredecessor', fromIntegral $ getServicePort newPredecessor')
                -- TODO: deal with migration failure, e.g retry
                pure ()
                    )
                    newPredecessor
-    stabiliseDelay <- confStabiliseInterval . nodeConfig <$> readTVarIO (parentRealNode newNs)
+        let
-    threadDelay stabiliseDelay
+            oldPredecessor = headDef (toRemoteNodeState oldNs) $ predecessors oldNs
            newPredecessor = headMay $ predecessors newNs
        -- manage need for service data migration:
        maybe (pure ()) (\newPredecessor' ->
            when (
                isJust newPredecessor
                && oldPredecessor /= newPredecessor'
            -- case: predecessor has changed in some way => own responsibility has changed in some way
            -- case 1: new predecessor is further away => broader responsibility, but new pred needs to push the data
            -- If this is due to a node leaving without transfering its data, try getting it from a redundant copy
            -- case 2: new predecessor is closer, it takes some of our data but somehow didn't join on us => push data to it
                && isInOwnResponsibilitySlice newPredecessor' oldNs) $ do
                    ownService <- nodeService <$> (liftIO . readTVarIO $ parentRealNode newNs)
                    migrationResult <- migrateData ownService (getNid newNs) (getNid oldPredecessor) (getNid newPredecessor') (getDomain newPredecessor', fromIntegral $ getServicePort newPredecessor')
                    -- TODO: deal with migration failure, e.g retry
                    pure ()
                        )
                        newPredecessor
          )
    threadDelay . confStabiliseInterval . nodeConfig $ node
  where
    -- | send a stabilise request to the n-th neighbour
    -- (specified by the provided getter function) and on failure retry
@ -636,8 +652,11 @@ fediMainThreads sock nodeSTM = do
    -- all get cancelled
    concurrently_
        (fediMessageHandler sendQ recvQ nodeSTM) $
-        concurrently_ (stabiliseThread nsSTM) $
+        -- decision whether to [1] launch 1 thread per VS or [2] let a single
-            concurrently_ (nodeCacheVerifyThread nsSTM) $
+        -- thread process all VSes sequentially:
        -- choose option 2 for the sake of limiting concurrency in simulation scenario
        concurrently_ (stabiliseThread nodeSTM) $
            concurrently_ (nodeCacheVerifyThread nodeSTM) $
                concurrently_ (convergenceSampleThread nsSTM) $
                    concurrently_ (lookupCacheCleanup $ parentRealNode ns) $
                        concurrently_