split up stats summing and evaluating, launch threads
This commit is contained in:
parent
c823e6357a
commit
5c338b9cd7
|
@ -19,7 +19,7 @@ import qualified Data.ByteString.Lazy.UTF8 as BSUL
|
||||||
import qualified Data.ByteString.UTF8 as BSU
|
import qualified Data.ByteString.UTF8 as BSU
|
||||||
import qualified Data.HashMap.Strict as HMap
|
import qualified Data.HashMap.Strict as HMap
|
||||||
import qualified Data.HashSet as HSet
|
import qualified Data.HashSet as HSet
|
||||||
import Data.Maybe (isJust, fromJust)
|
import Data.Maybe (fromJust, isJust)
|
||||||
import Data.String (fromString)
|
import Data.String (fromString)
|
||||||
import Data.Text.Lazy (Text)
|
import Data.Text.Lazy (Text)
|
||||||
import qualified Data.Text.Lazy as Txt
|
import qualified Data.Text.Lazy as Txt
|
||||||
|
@ -54,9 +54,11 @@ data PostService d = PostService
|
||||||
, relayInQueue :: TQueue (Hashtag, PostID, PostContent)
|
, relayInQueue :: TQueue (Hashtag, PostID, PostContent)
|
||||||
-- ^ Queue for processing incoming posts of own instance asynchronously
|
-- ^ Queue for processing incoming posts of own instance asynchronously
|
||||||
, postFetchQueue :: TQueue PostID
|
, postFetchQueue :: TQueue PostID
|
||||||
|
-- ^ queue of posts to be fetched
|
||||||
, migrationsInProgress :: TVar (HMap.HashMap NodeID (MVar ()))
|
, migrationsInProgress :: TVar (HMap.HashMap NodeID (MVar ()))
|
||||||
, httpMan :: HTTP.Manager
|
, httpMan :: HTTP.Manager
|
||||||
, statsQueue :: TQueue StatsEvent
|
, statsQueue :: TQueue StatsEvent
|
||||||
|
, loadStats :: TVar RelayStats
|
||||||
}
|
}
|
||||||
deriving (Typeable)
|
deriving (Typeable)
|
||||||
|
|
||||||
|
@ -86,6 +88,7 @@ instance DHT d => Service PostService d where
|
||||||
migrationsInProgress' <- newTVarIO HMap.empty
|
migrationsInProgress' <- newTVarIO HMap.empty
|
||||||
httpMan' <- HTTP.newManager HTTP.defaultManagerSettings
|
httpMan' <- HTTP.newManager HTTP.defaultManagerSettings
|
||||||
statsQueue' <- newTQueueIO
|
statsQueue' <- newTQueueIO
|
||||||
|
loadStats' <- newTVarIO emptyStats
|
||||||
let
|
let
|
||||||
thisService = PostService
|
thisService = PostService
|
||||||
{ serviceConf = conf
|
{ serviceConf = conf
|
||||||
|
@ -99,6 +102,7 @@ instance DHT d => Service PostService d where
|
||||||
, migrationsInProgress = migrationsInProgress'
|
, migrationsInProgress = migrationsInProgress'
|
||||||
, httpMan = httpMan'
|
, httpMan = httpMan'
|
||||||
, statsQueue = statsQueue'
|
, statsQueue = statsQueue'
|
||||||
|
, loadStats = loadStats'
|
||||||
}
|
}
|
||||||
port' = fromIntegral (confServicePort conf)
|
port' = fromIntegral (confServicePort conf)
|
||||||
warpSettings = Warp.setPort port' . Warp.setHost (fromString . confServiceHost $ conf) $ Warp.defaultSettings
|
warpSettings = Warp.setPort port' . Warp.setHost (fromString . confServiceHost $ conf) $ Warp.defaultSettings
|
||||||
|
@ -109,7 +113,11 @@ instance DHT d => Service PostService d where
|
||||||
concurrently_
|
concurrently_
|
||||||
-- web server
|
-- web server
|
||||||
(Warp.runSettings warpSettings $ postServiceApplication thisService)
|
(Warp.runSettings warpSettings $ postServiceApplication thisService)
|
||||||
(processIncomingPosts thisService)
|
$ concurrently
|
||||||
|
-- post queue processing
|
||||||
|
(processIncomingPosts thisService)
|
||||||
|
-- statistics/ measurements
|
||||||
|
(launchStatsThreads thisService)
|
||||||
-- update thread ID after fork
|
-- update thread ID after fork
|
||||||
atomically $ writeTVar threadVar servThreadID
|
atomically $ writeTVar threadVar servThreadID
|
||||||
pure thisService
|
pure thisService
|
||||||
|
@ -606,63 +614,64 @@ fetchTagPosts serv = forever $ do
|
||||||
-- ======= statistics/measurement and logging =======
|
-- ======= statistics/measurement and logging =======
|
||||||
|
|
||||||
data StatsEventType = PostPublishEvent
|
data StatsEventType = PostPublishEvent
|
||||||
-- ^ initial publishing of a post by an instance
|
| RelayReceiveEvent
|
||||||
| RelayReceiveEvent
|
| RelayDeliveryEvent
|
||||||
-- ^ receiving of posts because of being the responsible relay, for estimation of \tau_t
|
| IncomingPostFetchEvent
|
||||||
-- TODO: record for which hashtag
|
deriving (Enum, Show, Eq)
|
||||||
| RelayDeliveryEvent
|
|
||||||
-- ^ delivering (or at least attempt) a post to a subscriber
|
|
||||||
| IncomingPostFetchEvent
|
|
||||||
-- ^ another instance fetches a post from this instance
|
|
||||||
deriving (Enum, Show, Eq)
|
|
||||||
|
|
||||||
-- | Represents measurement event of a 'StatsEventType' with a count relevant for a certain key
|
-- | Represents measurement event of a 'StatsEventType' with a count relevant for a certain key
|
||||||
data StatsEvent = StatsEvent StatsEventType Int NodeID
|
data StatsEvent = StatsEvent StatsEventType Int NodeID
|
||||||
|
|
||||||
-- TODO: make delay configurable
|
|
||||||
statsAccuDelay = 300000
|
|
||||||
|
|
||||||
-- | periodically flush the stats queue and accumulate all events inside
|
-- | measured rates of relay performance
|
||||||
accumulateStatsThread :: TQueue StatsEvent -> IO ()
|
-- TODO: maybe include other metrics in here as well, like number of subscribers?
|
||||||
accumulateStatsThread statsQ = getPOSIXTime >>= flushLoop
|
data RelayStats = RelayStats
|
||||||
where
|
{ relayReceiveRates :: RingMap NodeID Double
|
||||||
flushLoop previousRun = do
|
-- ^ rate of incoming posts in the responsibility of this relay
|
||||||
now <- getPOSIXTime
|
, relayDeliveryRates :: RingMap NodeID Double
|
||||||
-- TODO: instead of letting the events accumulate in the queue and allocate linear memory, immediately fold the result
|
-- ^ rate of relayed outgoing posts
|
||||||
-- but how to achieve the periodicity when blocking on a queue?
|
, postFetchRate :: Double -- no need to differentiate between tags
|
||||||
-- idea: let another thread periodically exchange the RelayStats, modify it atomically (Konzept "unterm Arsch wegziehen")
|
-- ^ number of post-fetches delivered
|
||||||
threadDelay statsAccuDelay
|
, postPublishRate :: Double
|
||||||
latestEvents <- atomically $ flushTQueue statsQ
|
-- ^ rate of initially publishing posts through this instance
|
||||||
-- accumulate the events
|
|
||||||
-- and now what? write a log to file, probably as a forkIO
|
|
||||||
-- persistently store in a TVar so it can be retrieved later by the DHT
|
|
||||||
flushLoop now
|
|
||||||
|
|
||||||
|
|
||||||
accumulateStats :: POSIXTime -> [StatsEvent] -> RelayStats
|
|
||||||
accumulateStats timeInterval events =
|
|
||||||
-- first sum all event numbers, then divide through number of seconds passed to
|
|
||||||
-- get rate per second
|
|
||||||
RelayStats
|
|
||||||
{ relayReceiveRates = mapRMap (/ intervalSeconds) $ relayReceiveRates summedStats
|
|
||||||
, relayDeliveryRates = mapRMap (/ intervalSeconds) $ relayDeliveryRates summedStats
|
|
||||||
, postPublishRate = postPublishRate summedStats / intervalSeconds
|
|
||||||
, postFetchRate = postFetchRate summedStats / intervalSeconds
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
-- TODO: make delay configurable
|
||||||
|
statsEvalDelay = 300000
|
||||||
|
|
||||||
|
|
||||||
|
launchStatsThreads :: PostService d -> IO ()
|
||||||
|
launchStatsThreads serv = do
|
||||||
|
-- create shared accumulator
|
||||||
|
sharedAccum <- newTVarIO emptyStats
|
||||||
|
concurrently_
|
||||||
|
(accumulateStatsThread sharedAccum $ statsQueue serv)
|
||||||
|
(evaluateStatsThread serv sharedAccum)
|
||||||
|
|
||||||
|
|
||||||
|
-- | Read stats events from queue and add them to a shared accumulator.
|
||||||
|
-- Instead of letting the events accumulate in the queue and allocate linear memory, immediately fold the result.
|
||||||
|
accumulateStatsThread :: TVar RelayStats -> TQueue StatsEvent -> IO ()
|
||||||
|
accumulateStatsThread statsAccumulator statsQ = forever $ do
|
||||||
|
-- blocks until stats event arrives
|
||||||
|
event <- atomically $ readTQueue statsQ
|
||||||
|
-- add the event number to current accumulator
|
||||||
|
atomically $ modifyTVar' statsAccumulator $ statsAdder event
|
||||||
|
|
||||||
|
|
||||||
|
-- | add incoming stats events to accumulator value
|
||||||
|
statsAdder :: StatsEvent -> RelayStats -> RelayStats
|
||||||
|
statsAdder event stats = case event of
|
||||||
|
StatsEvent PostPublishEvent num _ ->
|
||||||
|
stats {postPublishRate = fromIntegral num + postPublishRate stats}
|
||||||
|
StatsEvent RelayReceiveEvent num key ->
|
||||||
|
stats {relayReceiveRates = sumIfEntryExists key (fromIntegral num) (relayReceiveRates stats)}
|
||||||
|
StatsEvent RelayDeliveryEvent num key ->
|
||||||
|
stats {relayDeliveryRates = sumIfEntryExists key (fromIntegral num) (relayDeliveryRates stats)}
|
||||||
|
StatsEvent IncomingPostFetchEvent num _ ->
|
||||||
|
stats {postFetchRate = fromIntegral num + postFetchRate stats}
|
||||||
where
|
where
|
||||||
intervalSeconds = fromIntegral (fromEnum timeInterval) / 10^12
|
|
||||||
summedStats = foldl (\stats event -> case event of
|
|
||||||
StatsEvent PostPublishEvent num _ ->
|
|
||||||
stats {postPublishRate = fromIntegral num + postPublishRate stats}
|
|
||||||
StatsEvent RelayReceiveEvent num key ->
|
|
||||||
stats {relayReceiveRates = sumIfEntryExists key (fromIntegral num) (relayReceiveRates stats)}
|
|
||||||
StatsEvent RelayDeliveryEvent num key ->
|
|
||||||
stats {relayDeliveryRates = sumIfEntryExists key (fromIntegral num) (relayDeliveryRates stats)}
|
|
||||||
StatsEvent IncomingPostFetchEvent num _ ->
|
|
||||||
stats {postFetchRate = fromIntegral num + postFetchRate stats}
|
|
||||||
)
|
|
||||||
emptyStats
|
|
||||||
events
|
|
||||||
sumIfEntryExists = addRMapEntryWith (\newVal oldVal ->
|
sumIfEntryExists = addRMapEntryWith (\newVal oldVal ->
|
||||||
let toInsert = fromJust $ extractRingEntry newVal
|
let toInsert = fromJust $ extractRingEntry newVal
|
||||||
in
|
in
|
||||||
|
@ -673,8 +682,45 @@ accumulateStats timeInterval events =
|
||||||
_ -> error "RingMap nested too deeply"
|
_ -> error "RingMap nested too deeply"
|
||||||
)
|
)
|
||||||
|
|
||||||
-- idea: first just sum with foldl, and then map the time division over all values
|
|
||||||
|
|
||||||
|
-- Periodically exchange the accumulated statistics with empty ones, evaluate them
|
||||||
|
-- and make them the current statistics of the service.
|
||||||
|
evaluateStatsThread :: PostService d -> TVar RelayStats -> IO ()
|
||||||
|
evaluateStatsThread serv statsAcc = getPOSIXTime >>= loop
|
||||||
|
where
|
||||||
|
loop previousTs = do
|
||||||
|
threadDelay statsEvalDelay
|
||||||
|
-- get and reset the stats accumulator
|
||||||
|
summedStats <- atomically $ do
|
||||||
|
stats <- readTVar statsAcc
|
||||||
|
writeTVar statsAcc emptyStats
|
||||||
|
pure stats
|
||||||
|
-- as the transaction might retry several times, current time needs to
|
||||||
|
-- be read afterwards
|
||||||
|
now <- getPOSIXTime
|
||||||
|
-- evaluate stats rate and replace server stats
|
||||||
|
atomically . writeTVar (loadStats serv) . evaluateStats (now - previousTs) $ summedStats
|
||||||
|
-- idea: let another thread periodically exchange the RelayStats, modify it atomically (Konzept "unterm Arsch wegziehen")
|
||||||
|
-- and now what? write a log to file, probably as a forkIO
|
||||||
|
-- persistently store in a TVar so it can be retrieved later by the DHT
|
||||||
|
loop now
|
||||||
|
|
||||||
|
|
||||||
|
-- | Evaluate the accumulated statistic events: Currently mostly calculates the event
|
||||||
|
-- rates by dividing through the collection time frame
|
||||||
|
evaluateStats :: POSIXTime -> RelayStats -> RelayStats
|
||||||
|
evaluateStats timeInterval summedStats =
|
||||||
|
-- first sum all event numbers, then divide through number of seconds passed to
|
||||||
|
-- get rate per second
|
||||||
|
RelayStats
|
||||||
|
{ relayReceiveRates = mapRMap (/ intervalSeconds) $ relayReceiveRates summedStats
|
||||||
|
, relayDeliveryRates = mapRMap (/ intervalSeconds) $ relayDeliveryRates summedStats
|
||||||
|
, postPublishRate = postPublishRate summedStats / intervalSeconds
|
||||||
|
, postFetchRate = postFetchRate summedStats / intervalSeconds
|
||||||
|
}
|
||||||
|
where
|
||||||
|
-- TODO: take speedup into account
|
||||||
|
intervalSeconds = fromIntegral (fromEnum timeInterval) / 10^12
|
||||||
|
|
||||||
|
|
||||||
emptyStats :: RelayStats
|
emptyStats :: RelayStats
|
||||||
|
@ -684,16 +730,3 @@ emptyStats = RelayStats
|
||||||
, postFetchRate = 0
|
, postFetchRate = 0
|
||||||
, postPublishRate = 0
|
, postPublishRate = 0
|
||||||
}
|
}
|
||||||
|
|
||||||
-- | measured rates of relay performance
|
|
||||||
-- TODO: maybe include other metrics in here as well, like number of subscribers?
|
|
||||||
data RelayStats = RelayStats
|
|
||||||
{ relayReceiveRates :: RingMap NodeID Double
|
|
||||||
-- ^ rate of incoming posts in the responsibility of this relay
|
|
||||||
, relayDeliveryRates :: RingMap NodeID Double
|
|
||||||
-- ^ rate of relayed outgoing posts
|
|
||||||
, postFetchRate :: Double -- no need to differentiate between tags
|
|
||||||
-- ^ number of post-fetches delivered
|
|
||||||
, postPublishRate :: Double
|
|
||||||
-- ^ rate of initially publishing posts through this instance
|
|
||||||
}
|
|
||||||
|
|
Loading…
Reference in a new issue