process incoming posts in parallel
This commit is contained in:
parent
12fcd13754
commit
e3a8912360
|
@ -12,7 +12,8 @@ import Control.Concurrent
|
||||||
import Control.Concurrent.Async
|
import Control.Concurrent.Async
|
||||||
import Control.Concurrent.STM
|
import Control.Concurrent.STM
|
||||||
import Control.Exception (Exception (..), try)
|
import Control.Exception (Exception (..), try)
|
||||||
import Control.Monad (foldM, forM_, forever, void, when)
|
import Control.Monad (foldM, forM, forM_, forever, void,
|
||||||
|
when)
|
||||||
import Control.Monad.IO.Class (liftIO)
|
import Control.Monad.IO.Class (liftIO)
|
||||||
import Data.Bifunctor
|
import Data.Bifunctor
|
||||||
import qualified Data.ByteString.Lazy.UTF8 as BSUL
|
import qualified Data.ByteString.Lazy.UTF8 as BSUL
|
||||||
|
@ -560,6 +561,28 @@ normaliseTag = Txt.fromStrict . normalize NFC . Txt.toStrict
|
||||||
hashtagToId :: Hashtag -> NodeID
|
hashtagToId :: Hashtag -> NodeID
|
||||||
hashtagToId = genKeyID . Txt.unpack
|
hashtagToId = genKeyID . Txt.unpack
|
||||||
|
|
||||||
|
|
||||||
|
readUpToTChan :: Int -> TChan a -> STM [a]
|
||||||
|
readUpToTChan 0 _ = pure []
|
||||||
|
readUpToTChan n chan = do
|
||||||
|
readFromChan <- tryReadTChan chan
|
||||||
|
case readFromChan of
|
||||||
|
Nothing -> pure []
|
||||||
|
Just val -> do
|
||||||
|
moreReads <- readUpToTChan (pred n) chan
|
||||||
|
pure (val:moreReads)
|
||||||
|
|
||||||
|
|
||||||
|
readUpToTQueue :: Int -> TQueue a -> STM [a]
|
||||||
|
readUpToTQueue 0 _ = pure []
|
||||||
|
readUpToTQueue n q = do
|
||||||
|
readFromQueue <- tryReadTQueue q
|
||||||
|
case readFromQueue of
|
||||||
|
Nothing -> pure []
|
||||||
|
Just val -> do
|
||||||
|
moreReads <- readUpToTQueue (pred n) q
|
||||||
|
pure (val:moreReads)
|
||||||
|
|
||||||
-- | define how to convert all showable types to PlainText
|
-- | define how to convert all showable types to PlainText
|
||||||
-- No idea what I'm doing with these overlappable instances though ¯\_(ツ)_/¯
|
-- No idea what I'm doing with these overlappable instances though ¯\_(ツ)_/¯
|
||||||
-- TODO: figure out how this overlapping stuff actually works https://downloads.haskell.org/~ghc/latest/docs/html/users_guide/glasgow_exts.html#instance-overlap
|
-- TODO: figure out how this overlapping stuff actually works https://downloads.haskell.org/~ghc/latest/docs/html/users_guide/glasgow_exts.html#instance-overlap
|
||||||
|
@ -571,36 +594,50 @@ instance {-# OVERLAPPABLE #-} Read a => MimeUnrender PlainText a where
|
||||||
|
|
||||||
-- ====== worker threads ======
|
-- ====== worker threads ======
|
||||||
|
|
||||||
|
-- TODO: make configurable
|
||||||
|
numParallelDeliveries = 10
|
||||||
|
|
||||||
|
|
||||||
-- | process the pending relay inbox of incoming posts from the internal queue:
|
-- | process the pending relay inbox of incoming posts from the internal queue:
|
||||||
-- Look up responsible relay node for given hashtag and forward post to it
|
-- Look up responsible relay node for given hashtag and forward post to it
|
||||||
processIncomingPosts :: DHT d => PostService d -> IO ()
|
processIncomingPosts :: DHT d => PostService d -> IO ()
|
||||||
processIncomingPosts serv = forever $ do
|
processIncomingPosts serv = forever $ do
|
||||||
-- blocks until available
|
-- blocks until available
|
||||||
-- TODO: process multiple in parallel
|
deliveriesToProcess <- atomically $ do
|
||||||
(tag, pID, pContent) <- atomically . readTQueue $ relayInQueue serv
|
readResult <- readUpToTQueue numParallelDeliveries $ relayInQueue serv
|
||||||
let pIdUri = "http://" <> (Txt.pack . confServiceHost . serviceConf $ serv) <> ":" <> (fromString . show . confServicePort . serviceConf $ serv) <> "/post/" <> pID
|
if null readResult
|
||||||
lookupRes <- lookupKey (baseDHT serv) (Txt.unpack tag)
|
then retry
|
||||||
case lookupRes of
|
else pure readResult
|
||||||
-- no vserver active => wait and retry
|
runningJobs <- forM deliveriesToProcess $ \(tag, pID, pContent) -> async $ do
|
||||||
Nothing -> threadDelay $ 10 * 10^6
|
let pIdUri = "http://" <> (Txt.pack . confServiceHost . serviceConf $ serv) <> ":" <> (fromString . show . confServicePort . serviceConf $ serv) <> "/post/" <> pID
|
||||||
Just (responsibleHost, responsiblePort) -> do
|
lookupRes <- lookupKey (baseDHT serv) (Txt.unpack tag)
|
||||||
resp <- runClientM (relayInboxClient tag $ pIdUri <> "," <> pContent) (mkClientEnv (httpMan serv) (BaseUrl Http responsibleHost (fromIntegral responsiblePort) ""))
|
case lookupRes of
|
||||||
case resp of
|
-- no vserver active => wait and retry
|
||||||
Left err -> do
|
Nothing -> threadDelay (10 * 10^6) >> pure (Left "no vserver active")
|
||||||
putStrLn $ "Error: " <> show err
|
Just (responsibleHost, responsiblePort) -> do
|
||||||
-- 410 error indicates outdated responsibility mapping
|
resp <- runClientM (relayInboxClient tag $ pIdUri <> "," <> pContent) (mkClientEnv (httpMan serv) (BaseUrl Http responsibleHost (fromIntegral responsiblePort) ""))
|
||||||
-- Simplification: just invalidate the mapping entry on all errors, force a re-lookup and re-queue the post
|
case resp of
|
||||||
-- TODO: keep track of maximum retries
|
Left err -> do
|
||||||
_ <- forceLookupKey (baseDHT serv) (Txt.unpack tag)
|
-- 410 error indicates outdated responsibility mapping
|
||||||
atomically . writeTQueue (relayInQueue serv) $ (tag, pID, pContent)
|
-- Simplification: just invalidate the mapping entry on all errors, force a re-lookup and re-queue the post
|
||||||
Right _ -> do
|
-- TODO: keep track of maximum retries
|
||||||
-- TODO: stats
|
_ <- forceLookupKey (baseDHT serv) (Txt.unpack tag)
|
||||||
-- idea for the experiment: each post publication makes the initial posting instance subscribe to all contained tags
|
atomically . writeTQueue (relayInQueue serv) $ (tag, pID, pContent)
|
||||||
now <- getPOSIXTime
|
pure . Left $ "Error: " <> show err
|
||||||
subscriptionStatus <- HMap.lookup (hashtagToId tag) <$> readTVarIO (ownSubscriptions serv)
|
Right _ -> do
|
||||||
-- if not yet subscribed or subscription expires within 2 minutes, (re)subscribe to tag
|
-- idea for the experiment: each post publication makes the initial posting instance subscribe to all contained tags
|
||||||
when (maybe False (\subLease -> now - subLease < 120) subscriptionStatus) $
|
now <- getPOSIXTime
|
||||||
void $ clientSubscribeTo serv tag
|
subscriptionStatus <- HMap.lookup (hashtagToId tag) <$> readTVarIO (ownSubscriptions serv)
|
||||||
|
-- if not yet subscribed or subscription expires within 2 minutes, (re)subscribe to tag
|
||||||
|
when (maybe False (\subLease -> now - subLease < 120) subscriptionStatus) $
|
||||||
|
void $ clientSubscribeTo serv tag
|
||||||
|
|
||||||
|
-- for evaluation, return the tag of the successfully forwarded post
|
||||||
|
pure $ Right tag
|
||||||
|
|
||||||
|
-- collect async results
|
||||||
|
results <- mapM waitCatch runningJobs
|
||||||
|
-- TODO: statistics
|
||||||
|
|
||||||
|
|
||||||
-- | process the pending fetch jobs of delivered post IDs: Delivered posts are tried to be fetched from their URI-ID
|
-- | process the pending fetch jobs of delivered post IDs: Delivered posts are tried to be fetched from their URI-ID
|
||||||
|
@ -626,10 +663,6 @@ fetchTagPosts serv = forever $ do
|
||||||
pure ()
|
pure ()
|
||||||
|
|
||||||
|
|
||||||
-- TODO: make configurable
|
|
||||||
numParallelDeliveries = 10
|
|
||||||
|
|
||||||
-- TODO: paralellelisation
|
|
||||||
relayWorker :: PostService d -> IO ()
|
relayWorker :: PostService d -> IO ()
|
||||||
relayWorker serv = forever $ do
|
relayWorker serv = forever $ do
|
||||||
-- atomically (to be able to retry) fold a list of due delivery actions
|
-- atomically (to be able to retry) fold a list of due delivery actions
|
||||||
|
@ -638,7 +671,7 @@ relayWorker serv = forever $ do
|
||||||
jobList <- D.toList <$> foldM (\jobAcc (subscriberMapSTM, _, tag) -> do
|
jobList <- D.toList <$> foldM (\jobAcc (subscriberMapSTM, _, tag) -> do
|
||||||
subscriberMap <- readTVar subscriberMapSTM
|
subscriberMap <- readTVar subscriberMapSTM
|
||||||
foldM (\jobAcc' ((subHost, subPort), (postChan, _)) -> do
|
foldM (\jobAcc' ((subHost, subPort), (postChan, _)) -> do
|
||||||
postsToDeliver <- readUpTo 500 postChan
|
postsToDeliver <- readUpToTChan 500 postChan
|
||||||
-- append fetch job to job list
|
-- append fetch job to job list
|
||||||
pure $ if not (null postsToDeliver)
|
pure $ if not (null postsToDeliver)
|
||||||
then jobAcc' `D.snoc` runClientM (tagDeliveryClient tag (Txt.unlines postsToDeliver)) (mkClientEnv (httpMan serv) (BaseUrl Http subHost (fromIntegral subPort) ""))
|
then jobAcc' `D.snoc` runClientM (tagDeliveryClient tag (Txt.unlines postsToDeliver)) (mkClientEnv (httpMan serv) (BaseUrl Http subHost (fromIntegral subPort) ""))
|
||||||
|
@ -658,16 +691,6 @@ relayWorker serv = forever $ do
|
||||||
-- TODO: stats
|
-- TODO: stats
|
||||||
pure ()
|
pure ()
|
||||||
|
|
||||||
where
|
|
||||||
readUpTo :: Int -> TChan a -> STM [a]
|
|
||||||
readUpTo 0 _ = pure []
|
|
||||||
readUpTo n chan = do
|
|
||||||
readFromChan <- tryReadTChan chan
|
|
||||||
case readFromChan of
|
|
||||||
Nothing -> pure []
|
|
||||||
Just val -> do
|
|
||||||
moreReads <- readUpTo (pred n) chan
|
|
||||||
pure (val:moreReads)
|
|
||||||
|
|
||||||
-- ======= statistics/measurement and logging =======
|
-- ======= statistics/measurement and logging =======
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue