diff --git a/Hash2Pub.cabal b/Hash2Pub.cabal index 8970aaa..b343df3 100644 --- a/Hash2Pub.cabal +++ b/Hash2Pub.cabal @@ -46,7 +46,7 @@ category: Network extra-source-files: CHANGELOG.md common deps - build-depends: base ^>=4.12.0.0, containers ^>=0.6.0.1, bytestring, utf8-string ^>=1.0.1.1, network ^>=2.8.0.1, time ^>=1.8.0.2, cmdargs ^>= 0.10, cryptonite ^>= 0.25, memory, async, stm, asn1-encoding, asn1-types, asn1-parse, publicsuffix, network-byte-order, safe, iproute, mtl, random, servant, servant-server, servant-client, warp, text, unordered-containers, hashable, unicode-transforms, http-client, http-types, unbounded-delays + build-depends: base ^>=4.12.0.0, containers ^>=0.6.0.1, bytestring, utf8-string ^>=1.0.1.1, network ^>=2.8.0.1, time ^>=1.8.0.2, cmdargs ^>= 0.10, cryptonite ^>= 0.25, memory, async, stm, asn1-encoding, asn1-types, asn1-parse, publicsuffix, network-byte-order, safe, iproute, mtl, random, servant, servant-server, servant-client, warp, text, unordered-containers, hashable, unicode-transforms, http-client, http-types, unbounded-delays, dlist, formatting ghc-options: -Wall -Wpartial-fields -O2 diff --git a/app/Experiment.hs b/app/Experiment.hs index ffa8869..a999dea 100644 --- a/app/Experiment.hs +++ b/app/Experiment.hs @@ -33,7 +33,7 @@ parseSchedule = fmap (parseEntry . Txt.split (== ';')) . Txt.lines where parseEntry [delayT, contactT, tag] = (read $ Txt.unpack delayT, tag, read $ Txt.unpack contactT) - parseEntry _ = error "invalid schedule input format" + parseEntry entry = error $ "invalid schedule input format: " <> show entry executeSchedule :: Int -- ^ speedup factor -> [(Int, Hashtag, (String, Int))] -- ^ [(delay in microseconds, hashtag, (hostname, port))] diff --git a/app/Main.hs b/app/Main.hs index c08cd3c..eac223d 100644 --- a/app/Main.hs +++ b/app/Main.hs @@ -54,12 +54,12 @@ readConfig = do bootstrapHost : bootstrapPortString : _ -> [(bootstrapHost, read bootstrapPortString)] _ -> [] - fConf = FediChordConf { - confDomain = confDomainString + fConf = FediChordConf + { confDomain = confDomainString , confIP = toHostAddress6 . read $ ipString , confDhtPort = read portString , confBootstrapNodes = confBootstrapNodes' - , confStabiliseInterval = 60 * 10^6 + , confStabiliseInterval = 80 * 10^6 , confBootstrapSamplingInterval = 180 * 10^6 `div` speedup , confMaxLookupCacheAge = 300 / fromIntegral speedup , confJoinAttemptsInterval = 60 * 10^6 `div` speedup @@ -67,11 +67,14 @@ readConfig = do , confResponsePurgeAge = 60 / fromIntegral speedup , confRequestTimeout = 5 * 10^6 `div` speedup , confRequestRetries = 3 - } - sConf = ServiceConf { - confSubscriptionExpiryTime = fromIntegral $ 2*3600 `div` speedup + } + sConf = ServiceConf + { confSubscriptionExpiryTime = 24*3600 / fromIntegral speedup , confServicePort = read servicePortString , confServiceHost = confDomainString - } + , confLogfilePath = "../simulationData/logs/" <> confDomainString <> ".log" + , confSpeedupFactor = speedup + , confStatsEvalDelay = 120 * 10^6 `div` speedup + } pure (fConf, sConf) diff --git a/src/Hash2Pub/DHTProtocol.hs b/src/Hash2Pub/DHTProtocol.hs index 8258ca3..c86c0f1 100644 --- a/src/Hash2Pub/DHTProtocol.hs +++ b/src/Hash2Pub/DHTProtocol.hs @@ -587,7 +587,6 @@ sendQueryIdMessages targetID ns lParam targets = do nodeConf <- nodeConfig <$> readTVarIO (parentRealNode ns) let srcAddr = confIP nodeConf - -- ToDo: make attempts and timeout configurable queryThreads <- mapM (\resultNode -> async $ bracket (mkSendSocket srcAddr (getDomain resultNode) (getDhtPort resultNode)) close ( sendRequestTo (confRequestTimeout nodeConf) (confRequestRetries nodeConf) (lookupMessage targetID ns Nothing) )) targets @@ -866,7 +865,7 @@ mkServerSocket ip port = do sockAddr <- addrAddress <$> resolve (Just $ show . fromHostAddress6 $ ip) (Just port) sock <- socket AF_INET6 Datagram defaultProtocol setSocketOption sock IPv6Only 1 - bind sock sockAddr + bind sock sockAddr `catch` (\e -> putStrLn $ "Caught exception while bind " <> show sock <> " " <> show sockAddr <> ": " <> show (e :: SomeException)) pure sock -- | create a UDP datagram socket, connected to a destination. @@ -882,6 +881,6 @@ mkSendSocket srcIp dest destPort = do setSocketOption sendSock IPv6Only 1 -- bind to the configured local IP to make sure that outgoing packets are sent from -- this source address - bind sendSock srcAddr + bind sendSock srcAddr `catch` (\e -> putStrLn $ "Caught exception while mkSendSocket bind " <> show sendSock <> " " <> show srcAddr <> ": " <> show (e :: SomeException)) connect sendSock destAddr pure sendSock diff --git a/src/Hash2Pub/FediChord.hs b/src/Hash2Pub/FediChord.hs index 33044fa..9f14a1e 100644 --- a/src/Hash2Pub/FediChord.hs +++ b/src/Hash2Pub/FediChord.hs @@ -345,7 +345,6 @@ nodeCacheWriter nsSTM = -- | Periodically iterate through cache, clean up expired entries and verify unverified ones nodeCacheVerifyThread :: LocalNodeStateSTM s -> IO () nodeCacheVerifyThread nsSTM = forever $ do - putStrLn "cache verify run: begin" -- get cache (ns, cache, maxEntryAge) <- atomically $ do ns <- readTVar nsSTM @@ -398,7 +397,6 @@ nodeCacheVerifyThread nsSTM = forever $ do forkIO $ sendQueryIdMessages targetID latestNs (Just (1 + jEntriesPerSlice latestNs)) (nodesToQuery targetID) >> pure () -- ask for 1 entry more than j because of querying the middle ) - putStrLn "cache verify run: end" threadDelay $ fromEnum (maxEntryAge / 20) `div` 10^6 -- convert from pico to milliseconds @@ -465,7 +463,6 @@ stabiliseThread :: Service s (RealNodeSTM s) => LocalNodeStateSTM s -> IO () stabiliseThread nsSTM = forever $ do oldNs <- readTVarIO nsSTM - putStrLn "stabilise run: begin" -- iterate through the same snapshot, collect potential new neighbours -- and nodes to be deleted, and modify these changes only at the end of @@ -544,7 +541,6 @@ stabiliseThread nsSTM = forever $ do ) newPredecessor - putStrLn "stabilise run: end" stabiliseDelay <- confStabiliseInterval . nodeConfig <$> readTVarIO (parentRealNode newNs) threadDelay stabiliseDelay where diff --git a/src/Hash2Pub/FediChordTypes.hs b/src/Hash2Pub/FediChordTypes.hs index 3b563e6..4ce20a7 100644 --- a/src/Hash2Pub/FediChordTypes.hs +++ b/src/Hash2Pub/FediChordTypes.hs @@ -457,6 +457,12 @@ data ServiceConf = ServiceConf -- ^ listening port for service , confServiceHost :: String -- ^ hostname of service + , confLogfilePath :: String + -- ^ where to store the (measurement) log file + , confStatsEvalDelay :: Int + -- ^ delay between statistic rate measurement samplings, in microseconds + , confSpeedupFactor :: Int + -- While the speedup factor needs to be already included in all } class DHT d where diff --git a/src/Hash2Pub/PostService.hs b/src/Hash2Pub/PostService.hs index 89c14d2..ffeef17 100644 --- a/src/Hash2Pub/PostService.hs +++ b/src/Hash2Pub/PostService.hs @@ -12,25 +12,31 @@ import Control.Concurrent import Control.Concurrent.Async import Control.Concurrent.STM import Control.Exception (Exception (..), try) -import Control.Monad (foldM, forM_, forever, void, when) +import Control.Monad (foldM, forM, forM_, forever, unless, + void, when) import Control.Monad.IO.Class (liftIO) import Data.Bifunctor import qualified Data.ByteString.Lazy.UTF8 as BSUL import qualified Data.ByteString.UTF8 as BSU +import qualified Data.DList as D +import Data.Either (lefts, rights) import qualified Data.HashMap.Strict as HMap import qualified Data.HashSet as HSet -import Data.Maybe (isJust) +import Data.Maybe (fromJust, isJust) import Data.String (fromString) import Data.Text.Lazy (Text) import qualified Data.Text.Lazy as Txt +import qualified Data.Text.Lazy.IO as TxtI import Data.Text.Normalize (NormalizationMode (NFC), normalize) import Data.Time.Clock.POSIX import Data.Typeable (Typeable) import qualified Network.HTTP.Client as HTTP import qualified Network.HTTP.Types as HTTPT +import System.IO import System.Random import Text.Read (readEither) +import Formatting (fixed, format, int, (%)) import qualified Network.Wai.Handler.Warp as Warp import Servant import Servant.Client @@ -38,7 +44,9 @@ import Servant.Client import Hash2Pub.FediChordTypes import Hash2Pub.PostService.API import Hash2Pub.RingMap +import Hash2Pub.Utils +import Debug.Trace data PostService d = PostService { serviceConf :: ServiceConf @@ -49,13 +57,16 @@ data PostService d = PostService -- ^ for each tag store the subscribers + their queue , ownSubscriptions :: TVar (HMap.HashMap NodeID POSIXTime) -- ^ tags subscribed by the own node have an assigned lease time - , ownPosts :: TVar (HSet.HashSet Text) - -- ^ just store the existence of posts for saving memory, , relayInQueue :: TQueue (Hashtag, PostID, PostContent) -- ^ Queue for processing incoming posts of own instance asynchronously , postFetchQueue :: TQueue PostID + -- ^ queue of posts to be fetched , migrationsInProgress :: TVar (HMap.HashMap NodeID (MVar ())) , httpMan :: HTTP.Manager + , statsQueue :: TQueue StatsEvent + , loadStats :: TVar RelayStats + -- ^ current load stats, replaced periodically + , logFileHandle :: Handle } deriving (Typeable) @@ -79,26 +90,38 @@ instance DHT d => Service PostService d where threadVar <- newTVarIO =<< myThreadId -- own thread ID as placeholder subscriberVar <- newTVarIO emptyRMap ownSubsVar <- newTVarIO HMap.empty - ownPostVar <- newTVarIO HSet.empty + --ownPostVar <- newTVarIO HSet.empty relayInQueue' <- newTQueueIO postFetchQueue' <- newTQueueIO migrationsInProgress' <- newTVarIO HMap.empty httpMan' <- HTTP.newManager HTTP.defaultManagerSettings + statsQueue' <- newTQueueIO + loadStats' <- newTVarIO emptyStats + loggingFile <- openFile (confLogfilePath conf) WriteMode + hSetBuffering loggingFile LineBuffering let - thisService = PostService { - serviceConf = conf + thisService = PostService + { serviceConf = conf , baseDHT = dht , serviceThread = threadVar , subscribers = subscriberVar , ownSubscriptions = ownSubsVar - , ownPosts = ownPostVar + --, ownPosts = ownPostVar , relayInQueue = relayInQueue' , postFetchQueue = postFetchQueue' , migrationsInProgress = migrationsInProgress' , httpMan = httpMan' - } + , statsQueue = statsQueue' + , loadStats = loadStats' + , logFileHandle = loggingFile + } port' = fromIntegral (confServicePort conf) warpSettings = Warp.setPort port' . Warp.setHost (fromString . confServiceHost $ conf) $ Warp.defaultSettings + -- log a start message, this also truncates existing files + TxtI.hPutStrLn loggingFile $ Txt.unlines + [ "# Starting mock relay implementation" + , "#time stamp ; relay receive rate ;relay delivery rate ;instance publish rate ;instance fetch rate ;total subscriptions" + ] -- Run 'concurrently_' from another thread to be able to return the -- 'PostService'. -- Terminating that parent thread will make all child threads terminate as well. @@ -106,7 +129,11 @@ instance DHT d => Service PostService d where concurrently_ -- web server (Warp.runSettings warpSettings $ postServiceApplication thisService) - (processIncomingPosts thisService) + $ concurrently + -- background processing workers + (launchWorkerThreads thisService) + -- statistics/ measurements + (launchStatsThreads thisService) -- update thread ID after fork atomically $ writeTVar threadVar servThreadID pure thisService @@ -150,6 +177,7 @@ postServer service = relayInbox service :<|> tagUnsubscribe service +-- | delivery endpoint: receive posts of a handled tag and enqueue them for relaying relayInbox :: DHT d => PostService d -> Hashtag -> Text -> Handler NoContent relayInbox serv tag posts = do let @@ -166,8 +194,10 @@ relayInbox serv tag posts = do -- if noone subscribed to the tag, nothing needs to be done (pure ()) -- otherwise enqueue posts into broadcast queue of the tag - (\queue -> + (\queue -> do liftIO $ forM_ postIDs (atomically . writeTChan queue) + -- report the received post for statistic purposes + liftIO . atomically . writeTQueue (statsQueue serv) $ StatsEvent RelayReceiveEvent (length postIDs) (hashtagToId tag) ) broadcastChan pure NoContent @@ -178,6 +208,7 @@ newtype UnhandledTagException = UnhandledTagException String instance Exception UnhandledTagException +-- | delivery endpoint: receives a list of subscribers of tags and their outstanding queues for migration subscriptionDelivery :: DHT d => PostService d -> Integer -> Text -> Handler Text subscriptionDelivery serv senderID subList = do let @@ -223,27 +254,30 @@ subscriptionDelivery serv senderID subList = do enqueueSubscription subscriberSTM (normaliseTag tag) sub postList leaseTime +-- | endpoint for fetching a post by its ID postFetch :: PostService d -> Text -> Handler Text -postFetch serv postID = do - postSet <- liftIO . readTVarIO . ownPosts $ serv - if HSet.member postID postSet - -- decision: always return the same placeholder post - then pure placeholderPost - else throwError $ err404 { errBody = "No post found with this ID" } +postFetch serv _ = do + -- decision: for saving memory do not store published posts, just + -- pretend there is a post for each requested ID + liftIO . atomically . writeTQueue (statsQueue serv) $ StatsEvent IncomingPostFetchEvent 1 0 -- tag fetched for is irrelevant + pure placeholderPost +-- | endpoint for fetching multiple posts of this instance by their IDs postMultiFetch :: PostService d -> Text -> Handler Text postMultiFetch serv postIDs = do - let idList = Txt.lines postIDs - postSet <- liftIO . readTVarIO . ownPosts $ serv - -- look up existence of all given post IDs, fail if even one is missing - foldM (\response postID -> - if HSet.member postID postSet - then pure $ placeholderPost <> "\n" <> response - else throwError $ err404 { errBody = "No post found with this ID" } + let + idList = Txt.lines postIDs + -- decision: for saving memory do not store published posts, just + -- pretend there is a post for each requested ID + response = foldl (\response' _ -> + placeholderPost <> "\n" <> response' ) "" idList + liftIO . atomically . writeTQueue (statsQueue serv) $ StatsEvent IncomingPostFetchEvent (length idList) 0 -- tag fetched for is irrelevant + pure response +-- | delivery endpoint: inbox for initially publishing a post at an instance postInbox :: PostService d -> Text -> Handler NoContent postInbox serv post = do -- extract contained hashtags @@ -251,8 +285,7 @@ postInbox serv post = do containedTags = fmap (normaliseTag . Txt.tail) . filter ((==) '#' . Txt.head) . Txt.words $ post -- generate post ID postId <- liftIO $ Txt.pack . show <$> (randomRIO (0, 2^(128::Integer)-1) :: IO Integer) - -- add ID to own posts - liftIO . atomically $ modifyTVar' (ownPosts serv) (HSet.insert postId) + -- decision: for saving memory do not store published post IDs, just deliver a post for any requested ID -- enqueue a relay job for each tag liftIO $ forM_ (containedTags :: [Text]) (\tag -> atomically $ writeTQueue (relayInQueue serv) (tag, postId, post) @@ -260,6 +293,7 @@ postInbox serv post = do pure NoContent +-- | delivery endpoint: receive postIDs of a certain subscribed hashtag tagDelivery :: PostService d -> Text -> Text -> Handler Text tagDelivery serv hashtag posts = do let postIDs = Txt.lines posts @@ -271,6 +305,8 @@ tagDelivery serv hashtag posts = do pure () pure $ "Received a postID for tag " <> hashtag + +-- | receive subscription requests to a handled hashtag tagSubscribe :: DHT d => PostService d -> Text -> Maybe Text -> Handler Integer tagSubscribe serv hashtag origin = do responsible <- liftIO $ isResponsibleFor (baseDHT serv) (hashtagToId hashtag) @@ -287,9 +323,11 @@ tagSubscribe serv hashtag origin = do let leaseTime = now + confSubscriptionExpiryTime (serviceConf serv) -- setup subscription entry _ <- liftIO . atomically $ setupSubscriberChannel (subscribers serv) hashtag (BSU.toString $ HTTP.host req, HTTP.port req) leaseTime + --liftIO . putStrLn $ "just got a subscription to " <> Txt.unpack hashtag pure $ round leaseTime +-- | receive and handle unsubscription requests regarding a handled tag tagUnsubscribe :: DHT d => PostService d -> Text -> Maybe Text -> Handler Text tagUnsubscribe serv hashtag origin = do responsible <- liftIO $ isResponsibleFor (baseDHT serv) (hashtagToId hashtag) @@ -310,15 +348,15 @@ tagUnsubscribe serv hashtag origin = do clientAPI :: Proxy PostServiceAPI clientAPI = Proxy -relayInboxClient :: Text -> Text -> ClientM NoContent -relayInboxClient :<|> subscriptionDeliveryClient - :<|> postFetchClient - :<|> postMultiFetchClient - :<|> postInboxClient - :<|> tagDeliveryClient - :<|> tagSubscribeClient - :<|> tagUnsubscribeClient - = client clientAPI +relayInboxClient + :<|> subscriptionDeliveryClient + :<|> postFetchClient + :<|> postMultiFetchClient + :<|> postInboxClient + :<|> tagDeliveryClient + :<|> tagSubscribeClient + :<|> tagUnsubscribeClient + = client clientAPI -- | Deliver the subscriber list of all hashtags in the interval [fromTag, toTag] @@ -388,10 +426,12 @@ clientSubscribeTo serv tag = do Left (FailureResponse _ fresp) |(HTTPT.statusCode . responseStatusCode $ fresp) == 410 && allowRetry -> do -- responsibility gone, force new lookup newRes <- forceLookupKey (baseDHT serv) (Txt.unpack tag) + --putStrLn $ "failed subscribing to " <> Txt.unpack tag <> " on " <> foundHost doSubscribe newRes False Left err -> pure . Left . show $ err Right lease -> do atomically . modifyTVar' (ownSubscriptions serv) $ HMap.insert (hashtagToId tag) (fromInteger lease) + --putStrLn $ "just subscribed to " <> Txt.unpack tag <> " on " <> foundHost pure . Right $ lease ) lookupResponse @@ -525,15 +565,37 @@ lookupTagSubscriptions :: Hashtag -> RingMap NodeID a -> Maybe a lookupTagSubscriptions tag = rMapLookup (hashtagToId tag) --- normalise the unicode representation of a string to NFC +-- normalise the unicode representation of a string to NFC and convert to lower case normaliseTag :: Text -> Text -normaliseTag = Txt.fromStrict . normalize NFC . Txt.toStrict +normaliseTag = Txt.toLower . Txt.fromStrict . normalize NFC . Txt.toStrict -- | convert a hashtag to its representation on the DHT hashtagToId :: Hashtag -> NodeID hashtagToId = genKeyID . Txt.unpack + +readUpToTChan :: Int -> TChan a -> STM [a] +readUpToTChan 0 _ = pure [] +readUpToTChan n chan = do + readFromChan <- tryReadTChan chan + case readFromChan of + Nothing -> pure [] + Just val -> do + moreReads <- readUpToTChan (pred n) chan + pure (val:moreReads) + + +readUpToTQueue :: Int -> TQueue a -> STM [a] +readUpToTQueue 0 _ = pure [] +readUpToTQueue n q = do + readFromQueue <- tryReadTQueue q + case readFromQueue of + Nothing -> pure [] + Just val -> do + moreReads <- readUpToTQueue (pred n) q + pure (val:moreReads) + -- | define how to convert all showable types to PlainText -- No idea what I'm doing with these overlappable instances though ¯\_(ツ)_/¯ -- TODO: figure out how this overlapping stuff actually works https://downloads.haskell.org/~ghc/latest/docs/html/users_guide/glasgow_exts.html#instance-overlap @@ -545,36 +607,78 @@ instance {-# OVERLAPPABLE #-} Read a => MimeUnrender PlainText a where -- ====== worker threads ====== +-- TODO: make configurable +numParallelDeliveries = 10 + +launchWorkerThreads :: DHT d => PostService d -> IO () +launchWorkerThreads serv = concurrently_ + (processIncomingPosts serv) + $ concurrently_ + (purgeSubscriptionsThread serv) + $ concurrently_ + (fetchTagPosts serv) + (relayWorker serv) + + +-- | periodically remove expired subscription entries from relay subscribers +purgeSubscriptionsThread :: PostService d -> IO () +purgeSubscriptionsThread serv = forever $ do + -- read config + now <- getPOSIXTime + let + purgeInterval = confSubscriptionExpiryTime (serviceConf serv) / 10 + -- no need to atomically lock this, as newly incoming subscriptions do not + -- need to be purged + tagMap <- readTVarIO $ subscribers serv + forM_ tagMap $ \(subscriberMapSTM, _, _) -> + -- but each subscriberMap needs to be modified atomically + atomically . modifyTVar' subscriberMapSTM $ HMap.filter (\(_, ts) -> ts > now) + threadDelay $ fromEnum purgeInterval `div` 10^6 + + -- | process the pending relay inbox of incoming posts from the internal queue: -- Look up responsible relay node for given hashtag and forward post to it processIncomingPosts :: DHT d => PostService d -> IO () processIncomingPosts serv = forever $ do -- blocks until available - -- TODO: process multiple in parallel - (tag, pID, pContent) <- atomically . readTQueue $ relayInQueue serv - let pIdUri = "http://" <> (Txt.pack . confServiceHost . serviceConf $ serv) <> ":" <> (fromString . show . confServicePort . serviceConf $ serv) <> "/post/" <> pID - lookupRes <- lookupKey (baseDHT serv) (Txt.unpack tag) - case lookupRes of - -- no vserver active => wait and retry - Nothing -> threadDelay $ 10 * 10^6 - Just (responsibleHost, responsiblePort) -> do - resp <- runClientM (relayInboxClient tag $ pIdUri <> "," <> pContent) (mkClientEnv (httpMan serv) (BaseUrl Http responsibleHost (fromIntegral responsiblePort) "")) - case resp of - Left err -> do - putStrLn $ "Error: " <> show err - -- 410 error indicates outdated responsibility mapping - -- Simplification: just invalidate the mapping entry on all errors, force a re-lookup and re-queue the post - -- TODO: keep track of maximum retries - _ <- forceLookupKey (baseDHT serv) (Txt.unpack tag) - atomically . writeTQueue (relayInQueue serv) $ (tag, pID, pContent) - Right _ -> do - -- TODO: stats - -- idea for the experiment: each post publication makes the initial posting instance subscribe to all contained tags - now <- getPOSIXTime - subscriptionStatus <- HMap.lookup (hashtagToId tag) <$> readTVarIO (ownSubscriptions serv) - -- if not yet subscribed or subscription expires within 2 minutes, (re)subscribe to tag - when (maybe False (\subLease -> now - subLease < 120) subscriptionStatus) $ - void $ clientSubscribeTo serv tag + deliveriesToProcess <- atomically $ do + readResult <- readUpToTQueue numParallelDeliveries $ relayInQueue serv + if null readResult + then retry + else pure readResult + runningJobs <- forM deliveriesToProcess $ \(tag, pID, pContent) -> async $ do + let pIdUri = "http://" <> (Txt.pack . confServiceHost . serviceConf $ serv) <> ":" <> (fromString . show . confServicePort . serviceConf $ serv) <> "/post/" <> pID + lookupRes <- lookupKey (baseDHT serv) (Txt.unpack tag) + case lookupRes of + -- no vserver active => wait and retry + Nothing -> threadDelay (10 * 10^6) >> pure (Left "no vserver active") + Just (responsibleHost, responsiblePort) -> do + resp <- runClientM (relayInboxClient tag $ pIdUri <> "," <> pContent) (mkClientEnv (httpMan serv) (BaseUrl Http responsibleHost (fromIntegral responsiblePort) "")) + case resp of + Left err -> do + -- 410 error indicates outdated responsibility mapping + -- Simplification: just invalidate the mapping entry on all errors, force a re-lookup and re-queue the post + -- TODO: keep track of maximum retries + _ <- forceLookupKey (baseDHT serv) (Txt.unpack tag) + atomically . writeTQueue (relayInQueue serv) $ (tag, pID, pContent) + pure . Left $ "Error: " <> show err + Right _ -> do + -- idea for the experiment: each post publication makes the initial posting instance subscribe to all contained tags + now <- getPOSIXTime + subscriptionStatus <- HMap.lookup (hashtagToId tag) <$> readTVarIO (ownSubscriptions serv) + -- if not yet subscribed or subscription expires within 5 minutes, (re)subscribe to tag + when (maybe True (\subLease -> now - subLease < 300) subscriptionStatus) $ + void $ clientSubscribeTo serv tag + + -- for evaluation, return the tag of the successfully forwarded post + pure $ Right tag + + -- collect async results + results <- mapM waitCatch runningJobs + -- report the count of published posts for statistics + atomically . writeTQueue (statsQueue serv) $ StatsEvent PostPublishEvent (length . rights $ results) 0 -- hashtag published to doesn't matter + pure () + -- | process the pending fetch jobs of delivered post IDs: Delivered posts are tried to be fetched from their URI-ID @@ -588,14 +692,193 @@ fetchTagPosts serv = forever $ do resp <- try $ HTTP.httpLbs fetchReq (httpMan serv) :: IO (Either HTTP.HttpException (HTTP.Response BSUL.ByteString)) case resp of Right response -> - if HTTPT.statusCode (HTTP.responseStatus response) == 200 - then - -- success, TODO: statistics - putStrLn "post fetch success" - else - -- TODO error handling, retry - pure () + -- TODO error handling, retry + --if HTTPT.statusCode (HTTP.responseStatus response) == 200 + -- then + -- -- success, TODO: statistics + -- else + pure () Left _ -> -- TODO error handling, retry pure () + +relayWorker :: PostService d -> IO () +relayWorker serv = forever $ do + -- atomically (to be able to retry) fold a list of due delivery actions + jobsToProcess <- atomically $ do + subscriptionMap <- readTVar $ subscribers serv + jobList <- D.toList <$> foldM (\jobAcc (subscriberMapSTM, _, tag) -> do + subscriberMap <- readTVar subscriberMapSTM + foldM (\jobAcc' ((subHost, subPort), (postChan, _)) -> do + postsToDeliver <- readUpToTChan 500 postChan + let postDeliveryAction = runClientM (tagDeliveryClient tag (Txt.unlines postsToDeliver)) (mkClientEnv (httpMan serv) (BaseUrl Http subHost (fromIntegral subPort) "")) + -- append relay push job to job list + pure $ if not (null postsToDeliver) + then jobAcc' `D.snoc` (do + deliveryResult <- postDeliveryAction + either + (const $ pure ()) + -- on successful push, record that event for statistics + (const . atomically . writeTQueue (statsQueue serv) $ StatsEvent RelayDeliveryEvent (length postsToDeliver) (hashtagToId tag)) + deliveryResult + pure deliveryResult + ) + else jobAcc' + ) jobAcc $ HMap.toList subscriberMap + ) D.empty subscriptionMap + -- if no relay jobs, then retry + if null jobList + then retry + else pure jobList + + -- when processing the list, send several deliveries in parallel + forM_ (chunksOf numParallelDeliveries jobsToProcess) $ \jobset -> do + runningJobs <- mapM async jobset + -- so far just dropping failed attempts, TODO: retry mechanism + results <- mapM waitCatch runningJobs + let + successfulResults = rights results + unsuccessfulResults = lefts results + unless (null unsuccessfulResults) $ putStrLn ("ERR: " <> show (length unsuccessfulResults) <> " failed deliveries!") + putStrLn $ "successfully relayed " <> show (length successfulResults) + pure () + + +-- ======= statistics/measurement and logging ======= + +data StatsEventType = PostPublishEvent + | RelayReceiveEvent + | RelayDeliveryEvent + | IncomingPostFetchEvent + deriving (Enum, Show, Eq) + +-- | Represents measurement event of a 'StatsEventType' with a count relevant for a certain key +data StatsEvent = StatsEvent StatsEventType Int NodeID + deriving (Show, Eq) + + +-- | measured rates of relay performance +-- TODO: maybe include other metrics in here as well, like number of subscribers? +data RelayStats = RelayStats + { relayReceiveRates :: RingMap NodeID Double + -- ^ rate of incoming posts in the responsibility of this relay + , relayDeliveryRates :: RingMap NodeID Double + -- ^ rate of relayed outgoing posts + , postFetchRate :: Double -- no need to differentiate between tags + -- ^ number of post-fetches delivered + , postPublishRate :: Double + -- ^ rate of initially publishing posts through this instance + } + deriving (Show, Eq) + + + +launchStatsThreads :: PostService d -> IO () +launchStatsThreads serv = do + -- create shared accumulator + sharedAccum <- newTVarIO emptyStats + concurrently_ + (accumulateStatsThread sharedAccum $ statsQueue serv) + (evaluateStatsThread serv sharedAccum) + + +-- | Read stats events from queue and add them to a shared accumulator. +-- Instead of letting the events accumulate in the queue and allocate linear memory, immediately fold the result. +accumulateStatsThread :: TVar RelayStats -> TQueue StatsEvent -> IO () +accumulateStatsThread statsAccumulator statsQ = forever $ do + -- blocks until stats event arrives + event <- atomically $ readTQueue statsQ + -- add the event number to current accumulator + atomically $ modifyTVar' statsAccumulator $ statsAdder event + + +-- | add incoming stats events to accumulator value +statsAdder :: StatsEvent -> RelayStats -> RelayStats +statsAdder event stats = case event of + StatsEvent PostPublishEvent num _ -> + stats {postPublishRate = fromIntegral num + postPublishRate stats} + StatsEvent RelayReceiveEvent num key -> + stats {relayReceiveRates = sumIfEntryExists key (fromIntegral num) (relayReceiveRates stats)} + StatsEvent RelayDeliveryEvent num key -> + stats {relayDeliveryRates = sumIfEntryExists key (fromIntegral num) (relayDeliveryRates stats)} + StatsEvent IncomingPostFetchEvent num _ -> + stats {postFetchRate = fromIntegral num + postFetchRate stats} + where + sumIfEntryExists = addRMapEntryWith (\newVal oldVal -> + let toInsert = fromJust $ extractRingEntry newVal + in + case oldVal of + KeyEntry n -> KeyEntry (n + toInsert) + ProxyEntry pointer (Just (KeyEntry n)) -> ProxyEntry pointer (Just (KeyEntry $ n + toInsert)) + ProxyEntry pointer Nothing -> ProxyEntry pointer (Just newVal) + _ -> error "RingMap nested too deeply" + ) + + +-- Periodically exchange the accumulated statistics with empty ones, evaluate them +-- and make them the current statistics of the service. +evaluateStatsThread :: PostService d -> TVar RelayStats -> IO () +evaluateStatsThread serv statsAcc = getPOSIXTime >>= loop + where + loop previousTs = do + threadDelay $ confStatsEvalDelay (serviceConf serv) + -- get and reset the stats accumulator + summedStats <- atomically $ do + stats <- readTVar statsAcc + writeTVar statsAcc emptyStats + pure stats + -- as the transaction might retry several times, current time needs to + -- be read afterwards + now <- getPOSIXTime + -- evaluate stats rate and replace server stats + -- persistently store in a TVar so it can be retrieved later by the DHT + let timePassed = (now - previousTs) * fromIntegral (confSpeedupFactor $ serviceConf serv) + rateStats = evaluateStats timePassed summedStats + atomically $ writeTVar (loadStats serv) rateStats + -- and now what? write a log to file + -- format: total relayReceiveRates;total relayDeliveryRates;postFetchRate;postPublishRate; subscriberSum + -- later: current (reported) load, target load + subscriberSum <- sumSubscribers + TxtI.hPutStrLn (logFileHandle serv) $ + format (fixed 9 % ";" % fixed 20 % ";" % fixed 20 % ";" % fixed 20 % ";" % fixed 20 % ";" % int ) + (realToFrac now :: Double) + (sum . relayReceiveRates $ rateStats) + (sum . relayDeliveryRates $ rateStats) + (postPublishRate rateStats) + (postFetchRate rateStats) + subscriberSum + loop now + + sumSubscribers = do + tagMap <- readTVarIO $ subscribers serv + foldM (\subscriberSum (subscriberMapSTM, _, _) -> do + subscriberMap <- readTVarIO subscriberMapSTM + pure $ subscriberSum + HMap.size subscriberMap + ) + 0 tagMap + + +-- | Evaluate the accumulated statistic events: Currently mostly calculates the event +-- rates by dividing through the collection time frame +evaluateStats :: POSIXTime -> RelayStats -> RelayStats +evaluateStats timeInterval summedStats = + -- first sum all event numbers, then divide through number of seconds passed to + -- get rate per second + RelayStats + { relayReceiveRates = (/ intervalSeconds) <$> relayReceiveRates summedStats + , relayDeliveryRates = (/ intervalSeconds) <$> relayDeliveryRates summedStats + , postPublishRate = postPublishRate summedStats / intervalSeconds + , postFetchRate = postFetchRate summedStats / intervalSeconds + } + where + intervalSeconds = realToFrac timeInterval + + +emptyStats :: RelayStats +emptyStats = RelayStats + { relayReceiveRates = emptyRMap + , relayDeliveryRates = emptyRMap + , postFetchRate = 0 + , postPublishRate = 0 + } diff --git a/src/Hash2Pub/RingMap.hs b/src/Hash2Pub/RingMap.hs index ae1ec15..a2fe3ae 100644 --- a/src/Hash2Pub/RingMap.hs +++ b/src/Hash2Pub/RingMap.hs @@ -25,6 +25,29 @@ instance (Bounded k, Ord k, Eq a) => Eq (RingMap k a) where instance (Bounded k, Ord k, Show k, Show a) => Show (RingMap k a) where show rmap = shows ("RingMap " :: String) (show $ getRingMap rmap) + +instance (Bounded k, Ord k) => Functor (RingMap k) where + -- | map a function over all payload values of a 'RingMap' + fmap f = RingMap . Map.map traversingF . getRingMap + where + traversingF (KeyEntry a) = KeyEntry (f a) + traversingF (ProxyEntry pointer (Just entry)) = ProxyEntry pointer (Just $ traversingF entry) + traversingF (ProxyEntry pointer Nothing) = ProxyEntry pointer Nothing + + +instance (Bounded k, Ord k) => Foldable (RingMap k) where + foldr f initVal = Map.foldr traversingFR initVal . getRingMap + where + traversingFR (KeyEntry a) acc = f a acc + traversingFR (ProxyEntry _ Nothing) acc = acc + traversingFR (ProxyEntry _ (Just entry)) acc = traversingFR entry acc + foldl f initVal = Map.foldl traversingFL initVal . getRingMap + where + traversingFL acc (KeyEntry a) = f acc a + traversingFL acc (ProxyEntry _ Nothing) = acc + traversingFL acc (ProxyEntry _ (Just entry)) = traversingFL acc entry + + -- | entry of a 'RingMap' that holds a value and can also -- wrap around the lookup direction at the edges of the name space. data RingEntry k a = KeyEntry a @@ -133,7 +156,7 @@ rMapLookupPred :: (Bounded k, Ord k, Num k) rMapLookupPred = lookupWrapper Map.lookupLT Map.lookupLE Backwards addRMapEntryWith :: (Bounded k, Ord k) - => (RingEntry k a -> RingEntry k a -> RingEntry k a) + => (RingEntry k a -> RingEntry k a -> RingEntry k a) -- ^ f new_value mold_value -> k -- ^ key -> a -- ^ value -> RingMap k a