Hash2Pub/src/Hash2Pub/DHTProtocol.hs

271 lines
11 KiB
Haskell
Raw Normal View History

{-# LANGUAGE OverloadedStrings #-}
module Hash2Pub.DHTProtocol
( QueryResponse (..)
2020-05-15 10:52:48 +02:00
, queryLocalCache
, addCacheEntry
, addCacheEntryPure
, deleteCacheEntry
2020-05-15 10:52:48 +02:00
, markCacheEntryAsVerified
, RemoteCacheEntry(..)
, toRemoteCacheEntry
, remoteNode_
, Action(..)
, ActionPayload(..)
, FediChordMessage(..)
, maximumParts
)
where
import Data.Maybe (maybe, fromMaybe)
import qualified Data.Set as Set
import qualified Data.Map as Map
import Data.Time.Clock.POSIX
2020-05-12 21:24:56 +02:00
import Network.Socket hiding (send, sendTo, recv, recvFrom)
import Network.Socket.ByteString
import System.Timeout
import Control.Monad.State.Strict
import Hash2Pub.FediChord
( NodeID
, NodeState (..)
, getSuccessors
, putSuccessors
, getPredecessors
, putPredecessors
, cacheGetNodeStateUnvalidated
, NodeCache
, CacheEntry(..)
, cacheLookup
, cacheLookupSucc
, cacheLookupPred
, localCompare
)
import Debug.Trace (trace)
-- === queries ===
data QueryResponse = FORWARD (Set.Set RemoteCacheEntry) -- ^return closest nodes from local cache.
-- whole cache entry is returned for making
-- the entry time stamp available to the
-- protocol serialiser
| FOUND NodeState -- ^node is the responsible node for queried ID
deriving (Show, Eq)
-- TODO: evaluate more fine-grained argument passing to allow granular locking
-- | look up an ID to either claim responsibility for it or return the closest l nodes from the local cache
2020-05-15 10:52:48 +02:00
queryLocalCache :: NodeState -> NodeCache -> Int -> NodeID -> QueryResponse
queryLocalCache ownState nCache lBestNodes targetID
-- as target ID falls between own ID and first predecessor, it is handled by this node
| (targetID `localCompare` ownID) `elem` [LT, EQ] && not (null preds) && (targetID `localCompare` head preds == GT) = FOUND ownState
-- my interpretation: the "l best next hops" are the l-1 closest preceding nodes and
-- the closest succeeding node (like with the p initiated parallel queries
| otherwise = FORWARD $ closestSuccessor `Set.union` closestPredecessors
where
preds = fromMaybe [] $ getPredecessors ownState
ownID = nid ownState
closestSuccessor :: Set.Set RemoteCacheEntry
closestSuccessor = maybe Set.empty Set.singleton $ toRemoteCacheEntry =<< cacheLookupSucc targetID nCache
closestPredecessors :: Set.Set RemoteCacheEntry
closestPredecessors = closestPredecessor (lBestNodes-1) $ nid ownState
closestPredecessor :: (Integral n, Show n) => n -> NodeID -> Set.Set RemoteCacheEntry
closestPredecessor 0 _ = Set.empty
closestPredecessor remainingLookups lastID
| remainingLookups < 0 = Set.empty
| otherwise =
let result = cacheLookupPred lastID nCache
in
case toRemoteCacheEntry =<< result of
Nothing -> Set.empty
Just nPred@(RemoteCacheEntry ns ts) -> Set.insert nPred $ closestPredecessor (remainingLookups-1) (nid ns)
-- === protocol serialisation data types
data Action =
QueryID
| Join
| Leave
| Stabilise
| Ping
deriving (Show, Eq, Enum)
data FediChordMessage =
Request {
requestID :: Integer
, sender :: NodeState
, parts :: Integer
, part :: Integer
-- ^ part starts at 0
, action :: Action
, payload :: Maybe ActionPayload
}
| Response {
responseTo :: Integer
, senderID :: NodeID
, parts :: Integer
, part :: Integer
, action :: Action
, payload :: Maybe ActionPayload
} deriving (Show, Eq)
data ActionPayload =
QueryIDRequestPayload {
queryTargetID :: NodeID
, queryLBestNodes :: Integer
}
| JoinRequestPayload
| LeaveRequestPayload {
leaveSuccessors :: [NodeID]
, leavePredecessors :: [NodeID]
}
| StabiliseRequestPayload
| PingRequestPayload
| QueryIDResponsePayload {
queryResult :: QueryResponse
}
| JoinResponsePayload {
joinSuccessors :: [NodeID]
, joinPredecessors :: [NodeID]
, joinCache :: [RemoteCacheEntry]
}
| LeaveResponsePayload
| StabiliseResponsePayload {
stabiliseSuccessors :: [NodeID]
, stabilisePredecessors :: [NodeID]
}
| PingResponsePayload {
pingNodeStates :: [NodeState]
}
deriving (Show, Eq)
-- | global limit of parts per message used when (de)serialising messages.
-- Used to limit the impact of DOS attempts with partial messages.
maximumParts :: Num a => a
maximumParts = 150
-- | dedicated data type for cache entries sent to or received from the network,
-- as these have to be considered as unvalidated. Also helps with separation of trust.
data RemoteCacheEntry = RemoteCacheEntry NodeState POSIXTime
deriving (Show, Eq)
instance Ord RemoteCacheEntry where
(RemoteCacheEntry ns1 _) `compare` (RemoteCacheEntry ns2 _) = nid ns1 `compare` nid ns2
toRemoteCacheEntry :: CacheEntry -> Maybe RemoteCacheEntry
toRemoteCacheEntry (NodeEntry _ ns ts) = Just $ RemoteCacheEntry ns ts
toRemoteCacheEntry (ProxyEntry _ (Just entry@NodeEntry{})) = toRemoteCacheEntry entry
toRemoteCacheEntry _ = Nothing
-- helper function for use in tests
remoteNode_ :: RemoteCacheEntry -> NodeState
remoteNode_ (RemoteCacheEntry ns _) = ns
-- cache operations
-- | update or insert a 'RemoteCacheEntry' into the cache,
-- converting it to a local 'CacheEntry'
addCacheEntry :: RemoteCacheEntry -- ^ a remote cache entry received from network
-> NodeCache -- ^ node cache to insert to
-> IO NodeCache -- ^ new node cache with the element inserted
addCacheEntry entry cache = do
now <- getPOSIXTime
return $ addCacheEntryPure now entry cache
-- | pure version of 'addCacheEntry' with current time explicitly specified as argument
addCacheEntryPure :: POSIXTime -- ^ current time
-> RemoteCacheEntry -- ^ a remote cache entry received from network
-> NodeCache -- ^ node cache to insert to
-> NodeCache -- ^ new node cache with the element inserted
addCacheEntryPure now (RemoteCacheEntry ns ts) cache =
let
-- TODO: limit diffSeconds to some maximum value to prevent malicious nodes from inserting entries valid nearly until eternity
timestamp' = if ts <= now then ts else now
newCache = Map.insertWith insertCombineFunction (nid ns) (NodeEntry False ns timestamp') cache
insertCombineFunction newVal@(NodeEntry newValidationState newNode newTimestamp) oldVal =
case oldVal of
ProxyEntry n _ -> ProxyEntry n (Just newVal)
NodeEntry oldValidationState _ oldTimestamp -> NodeEntry oldValidationState newNode (max oldTimestamp newTimestamp)
in
newCache
-- | delete the node with given ID from cache
deleteCacheEntry :: NodeID -- ^ID of the node to be deleted
-> NodeCache -- ^cache to delete from
-> NodeCache -- ^cache without the specified element
deleteCacheEntry = Map.update modifier
where
modifier (ProxyEntry idPointer _) = Just (ProxyEntry idPointer Nothing)
modifier NodeEntry {} = Nothing
-- | Mark a cache entry as verified after pinging it, possibly bumping its timestamp.
markCacheEntryAsVerified :: Maybe POSIXTime -- ^ the (current) timestamp to be
-- given to the entry, or Nothing
-> NodeID -- ^ which node to mark
-> NodeCache -- ^ current node cache
-> NodeCache -- ^ new NodeCache with the updated entry
markCacheEntryAsVerified timestamp = Map.adjust adjustFunc
where
adjustFunc (NodeEntry _ ns ts) = NodeEntry True ns $ fromMaybe ts timestamp
adjustFunc (ProxyEntry _ (Just entry)) = adjustFunc entry
adjustFunc entry = entry
2020-05-15 10:52:48 +02:00
-- ====== message send and receive operations ======
requestQueryID :: NodeState -> NodeID -> IO NodeState
-- 1. do a local lookup for the l closest nodes
-- 2. create l sockets
-- 3. send a message async concurrently to all l nodes
-- 4. collect the results, insert them into cache
-- 5. repeat until FOUND (problem: new entries not necessarily already in cache, explicitly compare with closer results)
requestQueryID ns targetID = do
cacheSnapshot <- readIORef $ getNodeCacheRef ns
let localResult = queryLocalCache ns cacheSnapshot (fromMaybe 1 $ getLNumBestNodes ns) targetID
-- FOUND can only be returned if targetID is owned by local node
case localResult of
FOUND thisNode -> return thisNode
FORWARD nodeSet ->
sockets <- mapM (\resultNode -> mkSendSocket (domain result) (dhtPort resultNode)) $ Set.toList nodeSet
-- ToDo: process results immediately instead of waiting for the last one to finish, see https://stackoverflow.com/a/38815224/9198613
responses = mapM
sendRequestTo :: Int -- ^ timeout in seconds
-> Int -- ^ number of retries
-> FediChordMessage -- ^ the message to be sent
-> Socket -- ^ connected socket to use for sending
-> IO (Set.Set FediChordMessage) -- ^ responses
sendRequestTo timeout attempts msg sock = do
let requests = serialiseMessage 1200 msg
-- ToDo: make attempts and timeout configurable
attempts 3 . timeout 5000 $ do
where
-- state reingeben: state = noch nicht geackte messages, result = responses
sendAndAck :: Socket -> StateT (Map.Map Integer BS.ByteString) IO (Set.Set FediChordMessage)
sendAndAck sock = do
remainingSends <- get
sendMany sock $ Map.elems remainingSends
-- timeout pro receive socket, danach catMaybes
-- wichtig: Pakete können dupliziert werden, dh es können mehr ACKs als gesendete parts ankommen
replicateM
-- idea: send all parts at once
-- Set/ Map with unacked parts
-- then recv with timeout for |unackedParts| attempts, receive acked parts from set/ map
-- how to manage individual retries? nested "attempts"
2020-05-15 10:52:48 +02:00
-- | retry an IO action at most *i* times until it delivers a result
attempts :: Int -- ^ number of retries *i*
-> IO (Maybe a) -- ^ action to retry
-> IO (Maybe a) -- ^ result after at most *i* retries
attempts 0 _ = return Nothing
attempts i action = do
actionResult <- action
case actionResult of
Nothing -> attempts (i-1) action
Just res -> return $ Just res