process incoming posts in parallel

This commit is contained in:
Trolli Schmittlauch 2020-09-09 18:50:45 +02:00
parent 12fcd13754
commit e3a8912360

View file

@ -12,7 +12,8 @@ import Control.Concurrent
import Control.Concurrent.Async import Control.Concurrent.Async
import Control.Concurrent.STM import Control.Concurrent.STM
import Control.Exception (Exception (..), try) import Control.Exception (Exception (..), try)
import Control.Monad (foldM, forM_, forever, void, when) import Control.Monad (foldM, forM, forM_, forever, void,
when)
import Control.Monad.IO.Class (liftIO) import Control.Monad.IO.Class (liftIO)
import Data.Bifunctor import Data.Bifunctor
import qualified Data.ByteString.Lazy.UTF8 as BSUL import qualified Data.ByteString.Lazy.UTF8 as BSUL
@ -560,6 +561,28 @@ normaliseTag = Txt.fromStrict . normalize NFC . Txt.toStrict
hashtagToId :: Hashtag -> NodeID hashtagToId :: Hashtag -> NodeID
hashtagToId = genKeyID . Txt.unpack hashtagToId = genKeyID . Txt.unpack
readUpToTChan :: Int -> TChan a -> STM [a]
readUpToTChan 0 _ = pure []
readUpToTChan n chan = do
readFromChan <- tryReadTChan chan
case readFromChan of
Nothing -> pure []
Just val -> do
moreReads <- readUpToTChan (pred n) chan
pure (val:moreReads)
readUpToTQueue :: Int -> TQueue a -> STM [a]
readUpToTQueue 0 _ = pure []
readUpToTQueue n q = do
readFromQueue <- tryReadTQueue q
case readFromQueue of
Nothing -> pure []
Just val -> do
moreReads <- readUpToTQueue (pred n) q
pure (val:moreReads)
-- | define how to convert all showable types to PlainText -- | define how to convert all showable types to PlainText
-- No idea what I'm doing with these overlappable instances though ¯\_(ツ)_/¯ -- No idea what I'm doing with these overlappable instances though ¯\_(ツ)_/¯
-- TODO: figure out how this overlapping stuff actually works https://downloads.haskell.org/~ghc/latest/docs/html/users_guide/glasgow_exts.html#instance-overlap -- TODO: figure out how this overlapping stuff actually works https://downloads.haskell.org/~ghc/latest/docs/html/users_guide/glasgow_exts.html#instance-overlap
@ -571,36 +594,50 @@ instance {-# OVERLAPPABLE #-} Read a => MimeUnrender PlainText a where
-- ====== worker threads ====== -- ====== worker threads ======
-- TODO: make configurable
numParallelDeliveries = 10
-- | process the pending relay inbox of incoming posts from the internal queue: -- | process the pending relay inbox of incoming posts from the internal queue:
-- Look up responsible relay node for given hashtag and forward post to it -- Look up responsible relay node for given hashtag and forward post to it
processIncomingPosts :: DHT d => PostService d -> IO () processIncomingPosts :: DHT d => PostService d -> IO ()
processIncomingPosts serv = forever $ do processIncomingPosts serv = forever $ do
-- blocks until available -- blocks until available
-- TODO: process multiple in parallel deliveriesToProcess <- atomically $ do
(tag, pID, pContent) <- atomically . readTQueue $ relayInQueue serv readResult <- readUpToTQueue numParallelDeliveries $ relayInQueue serv
let pIdUri = "http://" <> (Txt.pack . confServiceHost . serviceConf $ serv) <> ":" <> (fromString . show . confServicePort . serviceConf $ serv) <> "/post/" <> pID if null readResult
lookupRes <- lookupKey (baseDHT serv) (Txt.unpack tag) then retry
case lookupRes of else pure readResult
-- no vserver active => wait and retry runningJobs <- forM deliveriesToProcess $ \(tag, pID, pContent) -> async $ do
Nothing -> threadDelay $ 10 * 10^6 let pIdUri = "http://" <> (Txt.pack . confServiceHost . serviceConf $ serv) <> ":" <> (fromString . show . confServicePort . serviceConf $ serv) <> "/post/" <> pID
Just (responsibleHost, responsiblePort) -> do lookupRes <- lookupKey (baseDHT serv) (Txt.unpack tag)
resp <- runClientM (relayInboxClient tag $ pIdUri <> "," <> pContent) (mkClientEnv (httpMan serv) (BaseUrl Http responsibleHost (fromIntegral responsiblePort) "")) case lookupRes of
case resp of -- no vserver active => wait and retry
Left err -> do Nothing -> threadDelay (10 * 10^6) >> pure (Left "no vserver active")
putStrLn $ "Error: " <> show err Just (responsibleHost, responsiblePort) -> do
-- 410 error indicates outdated responsibility mapping resp <- runClientM (relayInboxClient tag $ pIdUri <> "," <> pContent) (mkClientEnv (httpMan serv) (BaseUrl Http responsibleHost (fromIntegral responsiblePort) ""))
-- Simplification: just invalidate the mapping entry on all errors, force a re-lookup and re-queue the post case resp of
-- TODO: keep track of maximum retries Left err -> do
_ <- forceLookupKey (baseDHT serv) (Txt.unpack tag) -- 410 error indicates outdated responsibility mapping
atomically . writeTQueue (relayInQueue serv) $ (tag, pID, pContent) -- Simplification: just invalidate the mapping entry on all errors, force a re-lookup and re-queue the post
Right _ -> do -- TODO: keep track of maximum retries
-- TODO: stats _ <- forceLookupKey (baseDHT serv) (Txt.unpack tag)
-- idea for the experiment: each post publication makes the initial posting instance subscribe to all contained tags atomically . writeTQueue (relayInQueue serv) $ (tag, pID, pContent)
now <- getPOSIXTime pure . Left $ "Error: " <> show err
subscriptionStatus <- HMap.lookup (hashtagToId tag) <$> readTVarIO (ownSubscriptions serv) Right _ -> do
-- if not yet subscribed or subscription expires within 2 minutes, (re)subscribe to tag -- idea for the experiment: each post publication makes the initial posting instance subscribe to all contained tags
when (maybe False (\subLease -> now - subLease < 120) subscriptionStatus) $ now <- getPOSIXTime
void $ clientSubscribeTo serv tag subscriptionStatus <- HMap.lookup (hashtagToId tag) <$> readTVarIO (ownSubscriptions serv)
-- if not yet subscribed or subscription expires within 2 minutes, (re)subscribe to tag
when (maybe False (\subLease -> now - subLease < 120) subscriptionStatus) $
void $ clientSubscribeTo serv tag
-- for evaluation, return the tag of the successfully forwarded post
pure $ Right tag
-- collect async results
results <- mapM waitCatch runningJobs
-- TODO: statistics
-- | process the pending fetch jobs of delivered post IDs: Delivered posts are tried to be fetched from their URI-ID -- | process the pending fetch jobs of delivered post IDs: Delivered posts are tried to be fetched from their URI-ID
@ -626,10 +663,6 @@ fetchTagPosts serv = forever $ do
pure () pure ()
-- TODO: make configurable
numParallelDeliveries = 10
-- TODO: paralellelisation
relayWorker :: PostService d -> IO () relayWorker :: PostService d -> IO ()
relayWorker serv = forever $ do relayWorker serv = forever $ do
-- atomically (to be able to retry) fold a list of due delivery actions -- atomically (to be able to retry) fold a list of due delivery actions
@ -638,7 +671,7 @@ relayWorker serv = forever $ do
jobList <- D.toList <$> foldM (\jobAcc (subscriberMapSTM, _, tag) -> do jobList <- D.toList <$> foldM (\jobAcc (subscriberMapSTM, _, tag) -> do
subscriberMap <- readTVar subscriberMapSTM subscriberMap <- readTVar subscriberMapSTM
foldM (\jobAcc' ((subHost, subPort), (postChan, _)) -> do foldM (\jobAcc' ((subHost, subPort), (postChan, _)) -> do
postsToDeliver <- readUpTo 500 postChan postsToDeliver <- readUpToTChan 500 postChan
-- append fetch job to job list -- append fetch job to job list
pure $ if not (null postsToDeliver) pure $ if not (null postsToDeliver)
then jobAcc' `D.snoc` runClientM (tagDeliveryClient tag (Txt.unlines postsToDeliver)) (mkClientEnv (httpMan serv) (BaseUrl Http subHost (fromIntegral subPort) "")) then jobAcc' `D.snoc` runClientM (tagDeliveryClient tag (Txt.unlines postsToDeliver)) (mkClientEnv (httpMan serv) (BaseUrl Http subHost (fromIntegral subPort) ""))
@ -658,16 +691,6 @@ relayWorker serv = forever $ do
-- TODO: stats -- TODO: stats
pure () pure ()
where
readUpTo :: Int -> TChan a -> STM [a]
readUpTo 0 _ = pure []
readUpTo n chan = do
readFromChan <- tryReadTChan chan
case readFromChan of
Nothing -> pure []
Just val -> do
moreReads <- readUpTo (pred n) chan
pure (val:moreReads)
-- ======= statistics/measurement and logging ======= -- ======= statistics/measurement and logging =======