paper_hashtag_federation/literature.bib


@online{lainpleroma.soykaf.comActivityPubPleroma2018,
  langid = {american},
  title = {{{ActivityPub}} in {{Pleroma}}},
  url = {https://blog.soykaf.com/post/activity-pub-in-pleroma/},
  abstract = {Implementing ActivityPub in Pleroma},
  urldate = {2018-11-04},
  date = {2018-03-04T11:35:09+01:00},
  keywords = {unread},
  author = {{lain@pleroma.soykaf.com}},
  file = {/home/spiollinux/Zotero/storage/K9KTYMZY/activity-pub-in-pleroma.html},
  annotation = {\begin{itemize}

\item signatures not specified in AP

\begin{itemize}

\item ~~~ 2 kinds of signatures in Mastodon:

\begin{itemize}

\item HTTP signatures

\item Linked Data Signatures

\end{itemize}

\item ~~~ Pleroma: only

\end{itemize}

\end{itemize}

\subsection{visibility of posts}

Another thing you'll have to implement is Mastodon's visibility model. Mastodon signals post visibility by the adresses it puts into the to and cc fields of the activity. \textbf{These visibility suggestions are not enforced by any encryption. Other servers are expected to respect them, but there's no guarantee.}

Mastodon has four visibily modes:

\begin{itemize}

\item Public. This is signalled by having the special public address (\href{https://www.w3.org/ns/activitystreams#Public}{https://www.w3.org/ns/activitystreams\#Public}) in the to field.

\item Unlisted. This is signalled by having the special public address in the cc field.

\item Private (aka ``Friends Only''). This is signalled by NOT having the public address in the recipient fields, but having the user's follower address in the to field.

\item Direct. This is signalled by having just the recipients' IDs in the to field and NO public address.

\end{itemize}

~}
}

@online{ActivityPubOneProtocol,
  title = {{{ActivityPub}} - One Protocol to Rule Them All? - {{Dennis Schubert}} - {{https://schub.io/}}},
  url = {https://schub.io/blog/2018/02/01/activitypub-one-protocol-to-rule-them-all.html},
  urldate = {2018-11-04},
  keywords = {unread},
  file = {/home/spiollinux/Zotero/storage/ERD7TREQ/activitypub-one-protocol-to-rule-them-all.html},
  annotation = {\begin{itemize}

\item encryption is not specified in AP, https is optional

\end{itemize}}
}

@online{SocialCGActivityPubAuthentication,
  title = {{{SocialCG}}/{{ActivityPub}}/{{Authentication Authorization}} - {{W3C Wiki}} - {{https://www.w3.org/}}},
  url = {https://www.w3.org/wiki/index.php?title=SocialCG/ActivityPub/Authentication_Authorization&oldid=105248},
  abstract = {best practises for authentication in ActivityPub (not part of the standard)},
  type = {wiki page},
  urldate = {2018-11-04},
  file = {/home/spiollinux/Zotero/storage/54X2KL4N/Authentication_Authorization.html}
}

@online{SecurityVocabularyHttps,
  title = {The {{Security Vocabulary}} - {{https://web-payments.org/}}},
  url = {https://web-payments.org/vocabs/security#publicKey},
  abstract = {describes the security vocabulary for web services working with linked data
part of PaySwarm standardization, still WIP},
  urldate = {2018-11-04},
  keywords = {website},
  file = {/home/spiollinux/Zotero/storage/CNJAIU4F/security.html},
  annotation = {is this really the latest published standard?}
}

@report{I-D.cavage-http-signatures,
  title = {Signing {{HTTP Messages}}},
  url = {http://www.ietf.org/internet-drafts/draft-cavage-http-signatures-10.txt},
  number = {draft-cavage-http-signatures-10},
  institution = {{IETF Secretariat}},
  type = {Internet-Draft},
  date = {2018-05},
  keywords = {unread},
  author = {Cavage, Mark and Sporny, Manu},
  howpublished = {Working Draft},
  annotation = {`keyId` field is an opaque string that the server can
   use to look up the component they need to validate the signature.  It
   could be an SSH key fingerprint, a URL to machine-readable key data,
   an LDAP DN, etc.  Management of keys and assignment of `keyId` is out
   of scope for this document.}
}

@online{TwisterP2PMicroblogging,
  langid = {american},
  title = {Twister | {{P2P}} Microblogging Platform - {{http://twister.net.co/}}},
  url = {http://twister.net.co/},
  shorttitle = {Twister | {{P2P}} Microblogging Platform - Http},
  urldate = {2018-11-04},
  keywords = {DHT,Kademlia,Social Network,bitcoin,website,related work},
  file = {/home/spiollinux/Zotero/storage/CLWQI7EV/twister.net.co.html},
  annotation = {\begin{itemize}

\item all posts are stored in a Kademlia DHT

\begin{itemize}

\item what are the lookup keys?

\item consistency?

\end{itemize}

\end{itemize}}
}

@online{NearlyAllDHT,
  title = {Nearly All {{DHT}} Implementations Vulnerable to 'merge' Bug. - {{Nick}}'s {{Blog}} - {{http://blog.notdot.net/}}},
  url = {http://blog.notdot.net/2008/6/Nearly-all-DHT-implementations-vulnerable-to-merge-bug},
  urldate = {2018-11-04},
  keywords = {unread},
  file = {/home/spiollinux/Zotero/storage/KX2FT2YT/Nearly-all-DHT-implementations-vulnerable-to-merge-bug.html}
}

@article{freitasTwisterP2PMicroblogging2013,
  archivePrefix = {arXiv},
  eprinttype = {arxiv},
  eprint = {1312.7152},
  primaryClass = {cs},
  title = {Twister - a {{P2P}} Microblogging Platform},
  url = {http://arxiv.org/abs/1312.7152},
  abstract = {This paper proposes a new microblogging architecture based on peer-to-peer networks overlays. The proposed platform is comprised of three mostly independent overlay networks. The first provides distributed user registration and authentication and is based on the Bitcoin protocol. The second one is a Distributed Hash Table (DHT) overlay network providing key/value storage for user resources and tracker location for the third network. The last network is a collection of possibly disjoint "swarms" of followers, based on the Bittorrent protocol, which can be used for efficient near-instant notification delivery to many users. By leveraging from existing and proven technologies, twister provides a new microblogging platform offering security, scalability and privacy features. A mechanism provides incentive for entities that contribute processing time to run the user registration network, rewarding such entities with the privilege of sending a single unsolicited ("promoted") message to the entire network. The number of unsolicited messages per day is defined in order to not upset users.},
  urldate = {2018-11-04},
  date = {2013-12-26},
  keywords = {Computer Science - Cryptography and Security,Computer Science - Networking and Internet Architecture,related work},
  author = {Freitas, Miguel},
  file = {/home/spiollinux/Zotero/storage/IEGC9UPK/Freitas - 2013 - twister - a P2P microblogging platform.pdf;/home/spiollinux/Zotero/storage/EUHUKS6U/1312.html},
  annotation = {section on hashtags:

~

Just like mentioning, hashtags must be detected in the content of new mes-\\
sages being posted to the network. A copy of the message is sent to a special\\
address of multi-value list storage:\\
IDhashtagt = H ([hashtagt, ``hashtag'', ``multi''])\\
This is pretty much the same mechanism of downward message thread nav-\\
igation except for an additional feature: a hashtag creates a new swarm similar\\
to IDswarm j . Neighbors of such IDhashtagt are be forced to join this virtual\\
swarm which has no sequential content (file). Posts that include the hashtag are\\
DHT routed to a neighbor member of the swarm, from which they are broadcast\\
to the swarm's members.\\
8\\
This swarm is therefore just used to create a distributed tracker and broad-\\
cast mechanism for users willing to monitor such hashtags. New members join-\\
ing the swarm may also request the last messages from the multi-valued storage\\
(DHT network), without guarantee of completeness.

~

~

~

Swarm is used for posts only. Both public and private (Direct Messages).

~

DHT storage provides no guarantees

BitTorrent tracker storage: neighbours responsible for converting DHT messages to bittorrent file-like content; itself is read-only

\begin{itemize}

\item redundancy of responsible neighbours possible?

\item what is this read-only file-like structure? storage guarantees?

\begin{itemize}

\item "members can choose to only hold the latest messages" -{$>$} so no storage guarantees, consistency, explicit sharding

\end{itemize}

\end{itemize}}
}

@online{SocialrelayPublicPost,
  langid = {english},
  title = {Social-Relay: {{Public}} Post Relay for the {{Diaspora}} Federated Social Network Protocol},
  url = {https://git.feneas.org/jaywink/social-relay},
  abstract = {Public post relay for the Diaspora federated social network protocol},
  journaltitle = {GitLab},
  urldate = {2018-11-04},
  keywords = {Diaspora,relay,code,related work},
  file = {/home/spiollinux/Zotero/storage/J49F8QDG/social-relay.html}
}

@online{PublicPostFederation2017,
  langid = {english},
  title = {Public Post Federation},
  url = {https://discourse.diasporafoundation.org/t/public-post-federation/264/111},
  abstract = {I will only address the claim of better scaling now, and agree on disagreeing on everything else.  The worker distributing public entities generates the payload exactly once, then leverages typhoeus to actually do the requests. typhoeus internally is calling libcurl-multi multithreaded, with a concurrency we define in the config. If you want, you can say that this thing does one job: deliver and deliver fast. Delivering requests is not a bottleneck anymore and it hasn't been in quite a while. I...},
  journaltitle = {diaspora* Discourse},
  urldate = {2018-11-04},
  date = {2017-09-05T01:38:35+00:00},
  keywords = {unread,Diaspora,relay,related work},
  file = {/home/spiollinux/Zotero/storage/5JW2W3RZ/111.html}
}

@online{RelayServersPublic,
  title = {Relay Servers for Public Posts - Diaspora* Project Wiki - {{https://wiki.diasporafoundation.org/}}},
  url = {https://wiki.diasporafoundation.org/Relay_servers_for_public_posts},
  urldate = {2018-11-04},
  keywords = {Diaspora,relay,related work},
  file = {/home/spiollinux/Zotero/storage/DD5B8HJH/Relay_servers_for_public_posts.html}
}

@online{ActivityPubRocksHttps,
  title = {{{ActivityPub Rocks}}! - {{https://activitypub.rocks/}}},
  url = {https://activitypub.rocks/implementation-report/},
  urldate = {2018-11-04},
  keywords = {unread},
  file = {/home/spiollinux/Zotero/storage/PQ2H3F7B/implementation-report.html}
}

@misc{webberActivityPubDecentralizedDistributed,
  title = {{{ActivityPub}}: {{From Decentralized}} to {{Distributed}}},
  url = {https://github.com/WebOfTrustInfo/rebooting-the-web-of-trust-fall2017/raw/master/final-documents/activitypub-decentralized-distributed.pdf},
  keywords = {unread},
  author = {Webber, Christopher Lemmer and Sporny, Manu},
  file = {/home/spiollinux/Zotero/storage/MVL5J77P/activitypub-decentralized-distributed.pdf},
  annotation = {\begin{itemize}

\item introduces decentrelazide identifiers and E2E crypto

\end{itemize}}
}

@online{RWOTWhitePapers,
  title = {{{RWOT}}: {{White Papers}} - {{https://www.weboftrust.info/}}},
  url = {https://www.weboftrust.info/papers.html},
  urldate = {2018-11-04},
  keywords = {unread},
  file = {/home/spiollinux/Zotero/storage/3K2BUKBX/papers.html}
}

@report{LinkedDataSignatures,
  title = {Linked {{Data Signatures}} 1.0},
  url = {https://w3c-dvcg.github.io/ld-signatures/},
  institution = {{W3C Digital Verification Community Group}},
  type = {Community Draft},
  urldate = {2018-11-04},
  date = {2018-11-04},
  keywords = {Authentication,unread,ActivityPub,signatures},
  author = {Longley, David and Sporny, Manu and Allen, Christopher},
  file = {/home/spiollinux/Zotero/storage/7D5JC4JN/ld-signatures.html}
}

@report{snellActivityVocabulary,
  langid = {english},
  title = {Activity {{Vocabulary}}},
  url = {https://www.w3.org/TR/activitystreams-vocabulary/},
  shorttitle = {Activity {{Vocabulary}} - Https},
  institution = {{W3C Social Web Working Group}},
  type = {W3C Recommendation},
  urldate = {2018-11-05},
  keywords = {unread},
  author = {Snell, James M},
  file = {/home/spiollinux/Zotero/storage/MWJARNGE/activitystreams-vocabulary.html}
}

@online{MastodonPubrelay,
  langid = {english},
  title = {Mastodon / Pub-Relay},
  url = {https://source.joinmastodon.org/mastodon/pub-relay},
  abstract = {A service-type ActivityPub actor that will re-broadcast anything sent to it to anyone who subscribes to it.},
  journaltitle = {GitLab},
  urldate = {2018-11-05},
  keywords = {unread,related work},
  file = {/home/spiollinux/Zotero/storage/AX86PS3Z/pub-relay.html}
}

@online{LinkedDataSignaturesa,
  langid = {english},
  title = {Linked {{Data Signatures}} + Public Key {{URI}} {$\cdot$} {{Issue}} \#203 {$\cdot$} W3c/Activitypub},
  url = {https://github.com/w3c/activitypub/issues/203},
  abstract = {9.2 says to include a \&quot;link to the public key\&quot; in the actors profile object, but nowhere is it mentioned what name this should have in the profile object, AFAICT. When Linked Data Signatu...},
  journaltitle = {GitHub},
  urldate = {2018-11-05},
  keywords = {unread},
  file = {/home/spiollinux/Zotero/storage/IEUK5ATJ/203.html}
}

@online{LitePubSpecificationDraft,
  title = {{{LitePub}} Specification Draft},
  url = {https://litepub.social/litepub/overview.html},
  urldate = {2018-11-11},
  file = {/home/spiollinux/Zotero/storage/H5TEPPTC/overview.html}
}

@misc{closeACLsDon,
  title = {{{ACLs}} Don't},
  url = {http://waterken.sourceforge.net/aclsdont/current.pdf},
  abstract = {The ACL model is unable to make correct access
decisions for interactions involving more than two
principals, since required information is not retained
across message sends. Though this deficiency has long
been documented in the published literature, it is
not widely understood. This logic error in the ACL
model is exploited by both the clickjacking and Cross-
Site Request Forgery attacks that affect many Web
applications.},
  urldate = {2018-11-11},
  keywords = {unread},
  author = {Close, Tyler},
  file = {/home/spiollinux/Zotero/storage/AA86NIVA/current.pdf}
}

@inproceedings{DecentralizedPublicKey,
  title = {Decentralized {{Public Key Infrastructure}}},
  url = {https://github.com/WebOfTrustInfo/rebooting-the-web-of-trust/raw/master/final-documents/dpki.pdf},
  eventtitle = {Rebooting the {{Web}} of {{Trust}}},
  urldate = {2018-11-04},
  keywords = {unread},
  file = {/home/spiollinux/Zotero/storage/X27QDC2K/dpki.pdf}
}

@report{robinsonDiasporaSocialRelay,
  title = {Diaspora* {{Social Relay}} Design Concept},
  url = {https://raw.githubusercontent.com/jaywink/social-relay/master/docs/relays.md},
  urldate = {2018-11-11},
  keywords = {unread,related work},
  author = {Robinson, Jason},
  file = {/home/spiollinux/Zotero/storage/8LBDH2VK/relays.txt}
}

@inproceedings{Scribe-NGC,
  location = {{UCL, London, UK.}},
  title = {Scribe: {{The}} Design of a Large-Scale Event Notification Infrastructure},
  volume = {2233},
  booktitle = {Networked {{Group Communication}}, {{Third International COST264 Workshop}} ({{NGC}}'2001)},
  series = {Lecture {{Notes}} in {{Computer Science}}},
  date = {2001-11},
  pages = {30-43},
  keywords = {unread,DHT,Pastry,related work},
  author = {Rowstron, Antony and Kermarrec, Anne-Marie and Castro, Miguel and Druschel, Peter},
  editor = {Crowcroft, Jon and Hofmann, Markus},
  file = {/home/spiollinux/Zotero/storage/LNYASSPU/scribe.pdf}
}

@inproceedings{PAST-Hotos,
  location = {{Schoss Elmau, Germany}},
  title = {{{PAST}}: {{A Persistent}} and {{Anonymous Store}}},
  booktitle = {{{HotOS VIII}}},
  date = {2001-05},
  keywords = {unread,DHT,Pastry,related work},
  author = {Druschel, Peter and Rowstron, Antony},
  file = {/home/spiollinux/Zotero/storage/ICE9WUDR/Druschel und Rowstron - 2001 - PAST A Persistent and Anonymous Store.pdf}
}

@inproceedings{Pastry-Middleware,
  location = {{Heidelberg, Germany}},
  title = {Pastry: {{Scalable}}, Distributed Object Location and Routing for Large-Scale Peer-to-Peer Systems},
  booktitle = {{{IFIP}}/{{ACM International Conference}} on {{Distributed Systems Platforms}} ({{Middleware}})},
  date = {2001-11},
  pages = {329-350},
  keywords = {unread,DHT,Pastry,related work},
  author = {Rowstron, Antony and Druschel, Peter},
  file = {/home/spiollinux/Zotero/storage/WRBE2SX3/pastry.pdf},
  annotation = {takes into account network locality; it seeks to minimize the distance mes-

sages travel, according to a scalar proximity metric like the number of IP routing hops.

~

There are already some distributed messaging or storage systems built on-top of pastry

~

unmaintained since 2009}
}

@inproceedings{SplitStream-SOSP,
  location = {{Bolton Landing, New York, USA}},
  title = {{{SplitStream}}: {{High}}-Bandwidth Multicast in a Cooperative Environment},
  booktitle = {19th {{ACM Symposium}} on {{Operating Systems Principles}} ({{SOSP}}'03)},
  date = {2003-10},
  keywords = {unread,related work},
  author = {Castro, Miguel and Druschel, Peter and Kermarrec, Anne-Marie and Nandi, Animesh and Rowstron, Antony and Singh, Atul},
  file = {/home/spiollinux/Zotero/storage/Z3H57LAW/splitstream-sosp.pdf},
  annotation = {achieves higher throughput by splitting up/ sharding multicast data streams over multiple paths

~

wants to avoid high load on middle nodes}
}

@inproceedings{mozoTagIndexedDHT2006,
  title = {Tag {{Indexed DHT}} for {{Scalable Search Infrastructure}} in {{Social NetworkApplications}}},
  doi = {10.1109/P2P.2006.43},
  abstract = {Social applications associate a set of user defined keywords named tags when publishing social objects in order to locate them later. We propose T-DHT, a hybrid unstructured-structured DHT based approach, to cope with the high demanding requirements of social applications, in a fully scalable, distributed and balanced way. T-DHT behaves as a structured DHT when publishing "tag, social-object" associations, and as an unstructured filter driven network when searching for the social-objects by means of any tag combination. The publishing process stores tag information across node links in order to drive adequately the later search operations in at most O(Log(N)). The search process takes at most O(logN) node hops for any tag combination and uses the previously stored node link tag information. Although T-DHT has been devised to build a scalable social application infrastructure, it can also be applied to solve general peer-to-peer keyword search problems},
  eventtitle = {Sixth {{IEEE International Conference}} on {{Peer}}-to-{{Peer Computing}} ({{P2P}}'06)},
  booktitle = {Sixth {{IEEE International Conference}} on {{Peer}}-to-{{Peer Computing}} ({{P2P}}'06)},
  date = {2006-09},
  pages = {77-78},
  keywords = {unread,computational complexity,distributed hash table,distributed processing,Ethernet networks,file organisation,Filters,hybrid unstructured-structured DHT,indexing,Information filtering,information retrieval,Keyword search,node link tag information,Peer to peer computing,peer-to-peer keyword search,Publishing,Routing,Scalability,scalable search infrastructure,scalable social application infrastructure,search operation process,social network application,Social network services,social object publishing,social object search,social sciences computing,Switches,tag indexed DHT,unstructured filter driven network,user defined keyword,related work},
  author = {Mozo, A. and Salvachua, J.},
  file = {/home/spiollinux/Zotero/storage/ZFTAMNNU/t-dht.pdf;/home/spiollinux/Zotero/storage/3II6ZVET/1698596.html}
}

@inproceedings{paul2017lilliput,
  title = {Lilliput: {{A Storage Service}} for {{Lightweight Peer}}-to-{{Peer Online Social Networks}}},
  date = {2017},
  keywords = {DHT,Social Network,related work},
  author = {Paul, Thomas and Lochschmidt, Niklas and Salah, Hani and Datta, Anwitaman and Strufe, Thorsten},
  file = {/home/spiollinux/Zotero/storage/36XSSCDU/000.pdf},
  organization = {IEEE (26th International Conference on Computer Communications and Networks (ICCCN))},
  annotation = {\begin{itemize}

\item focuses on high churn rate/ volatility of nodes, which is not necessary for my use case

\begin{itemize}

\item are some of the presented techniques still useful?

\end{itemize}

\item replica groups/ data overlays can form the groups responsible for a hashtag (redundancy) ???

\begin{itemize}

\item small overlays, using flooding within the group

\end{itemize}

\item consistency even with frequent updates

\item security goals: confidentiality mostly irrelevant for us

\item has \textbf{no concept of keywords, only (user) profiles}

\begin{itemize}

\item we could try to emulate a keyword by re-purposing profile = hashtag/ keyword, but do semantics then still match?

\item what arg-key to use? has to be unique per profile/ keyword

\end{itemize}

\item ={$>$} not really usable for us, except for redundancy

\end{itemize}}
}

@online{twitterincNumbersTwitterStatistics2011,
  langid = {english},
  title = {\#numbers: {{Twitter}} Statistics from 2009},
  url = {https://blog.twitter.com/official/en_us/a/2011/numbers.html},
  urldate = {2018-11-24},
  date = {2011-03-14},
  keywords = {statistics},
  author = {{Twitter Inc.}},
  file = {/home/spiollinux/Zotero/storage/BZGLNP2K/numbers.html}
}

@online{TwitterUsageStatistics,
  langid = {english},
  title = {Twitter {{Usage Statistics}} - {{Internet Live Stats}} - {{http://www.internetlivestats.com/}}},
  url = {http://www.internetlivestats.com/twitter-statistics/},
  shorttitle = {Twitter {{Usage Statistics}} - {{Internet Live Stats}} - Http},
  abstract = {How many tweets are sent per second, minute, day, month, and year? Twitter statistics with live counter showing estimated current tweets in real time. Current and historical numbers, growth rates, and curious facts. Charts, infographics, data, and interesting info.},
  urldate = {2018-11-24},
  file = {/home/spiollinux/Zotero/storage/YJQV524K/twitter-statistics.html}
}

@inproceedings{perfittMegaphoneFaultTolerant2010,
  title = {Megaphone: {{Fault Tolerant}}, {{Scalable}}, and {{Trustworthy P2P Microblogging}}},
  doi = {10.1109/ICIW.2010.77},
  shorttitle = {Megaphone},
  abstract = {Micro-blogging, or the posting of weblogs entries that have a small number of characters (160 characters or less), has recently become more mainstream. Services that implement micro-blogging such as Twitter are usually based on the client- server model. This limits their scalability and fault tolerance. In this paper, we present a new secure microblogging system that is based on a peer-to-peer network. The network is arranged based on user certificates and is scalable, does not have a single point of failure, and does not depend on a single vendor's proprietary service. The paper outlines the protocol specifics and provides implementation details for a secure, scaleable microblogging system.},
  eventtitle = {2010 {{Fifth International Conference}} on {{Internet}} and {{Web Applications}} and {{Services}}},
  booktitle = {2010 {{Fifth International Conference}} on {{Internet}} and {{Web Applications}} and {{Services}}},
  date = {2010-05},
  pages = {469-477},
  keywords = {Internet,unread,DHT,Social Network,Pastry,Peer to peer computing,Bandwidth,Computer science,Fault tolerance,fault tolerant,fault tolerant computing,Information services,megaphone,Message passing,microblogging,multicast tree,nodes,Organizing,P2P,peer-to-peer computing,peer-to-peer network,scaleable microblogging system,trustworthy P2P microblogging,Web services,Web sites,Weblogs entries,PubSub,multicast,scribe,related work},
  author = {Perfitt, T. and Englert, B.},
  file = {/home/spiollinux/Zotero/storage/JR6BPLD3/Perfitt und Englert - 2010 - Megaphone Fault Tolerant, Scalable, and Trustwort.pdf;/home/spiollinux/Zotero/storage/XZBNTZAX/5476495.html},
  annotation = {\begin{itemize}

\item "Poster" at the top of a multicast tree could be the node responsible for the hashtag(slice)

\item utilizing the described following of a user for following a hashtag?

\begin{itemize}

\item difference: ID of node != hash(hashtag) -{$>$} so not the node itself, but a neighbour

\item hash(hashtag) as group-ID of multicast group

\item join message sent towards that ID

\end{itemize}

\item problem: caching always increases the number of parties needed to be trusted, right?

\item in addition to this, strategy on how to choose the tag responsibles is needed

\begin{itemize}

\item probably just by IP?

\end{itemize}

\item session keys are unnecessary for public posts

\item LAST X for retrieving old messages

\begin{itemize}

\item in our use case better use a time period (start + end timestamp)

\end{itemize}

\item do we need a session key? using TLS between nodes might be nice

\begin{itemize}

\item but which certs? Is opportunistic encryption still better than nothing?

\item each node is supposed to be run together with an instance (additional ones might be possible) -{$>$} domain name + cert anyways

\end{itemize}

\end{itemize}}
}

@article{terpstraPeertoPeerApproachContentBased,
  langid = {english},
  title = {A {{Peer}}-to-{{Peer Approach}} to {{Content}}-{{Based Publish}}/{{Subscribe}}},
  abstract = {Publish/subscribe systems are successfully used to decouple distributed applications. However, their efficiency is closely tied to the topology of the underlying network, the design of which has been neglected. Peer-to-peer network topologies can offer inherently bounded delivery depth, load sharing, and self-organisation. In this paper, we present a contentbased publish/subscribe system routed over a peer-to-peer topology graph. The implications of combining these approaches are explored and a particular implementation using elements from Rebeca and Chord is proven correct.},
  pages = {8},
  keywords = {unread,DHT,PubSub,multicast,related work,Chord,REBECA},
  author = {Terpstra, Wesley W and Behnel, Stefan and Fiege, Ludger and Zeidler, Andreas and Buchmann, Alejandro P},
  file = {/home/spiollinux/Zotero/storage/YXAPRET9/Terpstra et al. - A Peer-to-Peer Approach to Content-Based PublishS.pdf},
  annotation = {Our contribution in this paper is to take the\\
graph topology and management of a peer-to-peer network\\
and couple it with the highly flexible routing of a pub-\\
lish/subscribe system. Of particular interest, our network\\
preserves the use of fully general filters

\begin{itemize}

\item creates multiple multicast trees on a Chord DHT to avoid overload of subtrees

\end{itemize}}
}

@article{mozoScalableTagSearch2008,
  langid = {english},
  title = {Scalable Tag Search in Social Network Applications},
  volume = {31},
  issn = {01403664},
  url = {http://linkinghub.elsevier.com/retrieve/pii/S0140366407003222},
  doi = {10.1016/j.comcom.2007.08.035},
  number = {3},
  journaltitle = {Computer Communications},
  urldate = {2018-12-10},
  date = {2008-02},
  pages = {423-436},
  keywords = {DHT,Social Network,distributed hash table,bloom filter,store and query,search,related work},
  author = {Mozo, Alberto and Salvach\'ua, Joaqu\'in},
  file = {/home/spiollinux/Zotero/storage/ATNQRKIX/Mozo und Salvachúa - 2008 - Scalable tag search in social network applications.pdf},
  annotation = {\begin{itemize}

\item focus on also finding tag conjunctions efficiently (out-of-scope for my research)

\item aims to solve "hot-spot" problem of popular hashtags overloading a node

\item no subscription, only store-and-query

\item redundancy, security?

\item node responsible for a tag still stores reference of tags (and their posts), but in a bloom filter

\item links of a node: how many of them?

\item nodes other than the "responsible" one can directly fulfill the task of pointing towards the destination node

\begin{itemize}

\item -{$>$} potential attack vector of non-responsible nodes just lying/ pointing to a subset only?

\item this even eliminates the need for an attacker to obtain the DHT ID responsible for the tag

\end{itemize}

\item related work mentioned in introduction might be interesting, although suffering from hot-spot problems

\begin{itemize}

\item for single key words: 28, 29?

\end{itemize}

\item strategies on how to deal with hot-spot around the tag are BULLSHIT, especially for a single-tag use case

\item fact that posts themselves are spread all over the DHT, this approach is bad for subscription

\begin{itemize}

\item subscription to responsible node possible, but this removes optimizations and generates many locate packets

\item or does the node itself store at least the direct reference? but then: hot-spot

\end{itemize}

\end{itemize}}
}

@software{RWOT5BostonMassachusetts2018,
  title = {{{RWOT5}} in {{Boston}}, {{Massachusetts}} ({{October}} 2017). {{Contribute}} to {{WebOfTrustInfo}}/Rebooting-the-Web-of-Trust-Fall2017 Development by Creating an Account on {{GitHub}}},
  url = {https://github.com/WebOfTrustInfo/rebooting-the-web-of-trust-fall2017},
  organization = {{Web of Trust Info}},
  urldate = {2018-12-17},
  date = {2018-12-11T04:24:35Z},
  origdate = {2017-04-28T22:02:00Z}
}

@online{blog!ActivityPubWorseBetter2019,
  langid = {english},
  title = {{{ActivityPub}}: {{The}} "{{Worse Is Better}}" {{Approach}} to {{Federated Social Networking}}},
  url = {https://blog.dereferenced.org/activitypub-the-worse-is-better-approach-to-federated-social-networking},
  shorttitle = {{{ActivityPub}}},
  abstract = {This is the first article in a series that will be a fairly critical review of ActivityPub from a trust \& safety perspective.  Stay tun...},
  journaltitle = {kaniini's blog!},
  urldate = {2019-01-13},
  date = {2019-01-07T01:36:48Z},
  author = {{blog!}, kaniini's},
  file = {/home/spiollinux/Zotero/storage/684D45MA/activitypub-the-worse-is-better-approach-to-federated-social-networking.html}
}

@online{GroupsOStatusCommunity,
  title = {Groups - {{OStatus Community Group}}},
  url = {https://www.w3.org/community/ostatus/wiki/Groups},
  urldate = {2019-02-18},
  keywords = {related work},
  file = {/home/spiollinux/Zotero/storage/ZJ7NRYA2/Groups.html}
}

@online{GroupsGnusocialNet,
  title = {Groups - Gnusocial.Net},
  url = {https://gnusocial.net/doc/groups},
  urldate = {2019-02-18},
  keywords = {related work},
  file = {/home/spiollinux/Zotero/storage/7KAHMLU6/groups.html}
}

@online{PleromaRelay,
  langid = {english},
  title = {Pleroma / Relay},
  url = {https://git.pleroma.social/pleroma/relay},
  abstract = {Generic LitePub relay (works with all LitePub consumers and Mastodon)},
  journaltitle = {GitLab},
  urldate = {2019-02-18},
  keywords = {relay,related work},
  file = {/home/spiollinux/Zotero/storage/HA4C3UZA/relay.html}
}

@article{urdanetaSurveyDHTSecurity2011,
  langid = {english},
  title = {A Survey of {{DHT}} Security Techniques},
  volume = {43},
  issn = {03600300},
  url = {http://portal.acm.org/citation.cfm?doid=1883612.1883615},
  doi = {10.1145/1883612.1883615},
  number = {2},
  journaltitle = {ACM Computing Surveys},
  urldate = {2019-02-18},
  date = {2011-01-01},
  pages = {1-49},
  keywords = {security,unread,DHT,Kademlia,Pastry,Chord,survey},
  author = {Urdaneta, Guido and Pierre, Guillaume and Steen, Maarten Van},
  file = {/home/spiollinux/Zotero/storage/8NLLR9TH/Urdaneta et al. - 2011 - A survey of DHT security techniques.pdf},
  annotation = {\begin{itemize}

\item crucial aspect: robust and secure assignment of node identifiers to keep fraction of malicious nodes small and prevent nodes from choosing their location

\begin{itemize}

\item to prevent Sybil and Eclipse attacks

\end{itemize}

\item main 2 problems:

\begin{itemize}

\item creating malicious nodes

\item isolating benign nodes from each other

\end{itemize}

\item for scalability: small number of links to peers compared to total number of nodes

\item Sybil attack: create many maliciouds nodes

\begin{itemize}

\item breaks the pre-assumption of defenses that malocious nodes are a minority

\item example for single attacker with many IPs: botnet

\item 3.6 Danezis et al.: interesting approach to assume that malicious nodes are preferredly adde throug other malicious nodes, then prefer diverse, infrequently used paths; problem: assumes social relationships between nodes

\item 3.9: computational puzzles have to be solved periodically, problems: difficulty level that fits all nodes, power consumption

\end{itemize}

\item Eclipse attack: try to control sufficient fraction of neighbouring nodes to poison the routing table of a node and isolate it

\begin{itemize}

\item can be utilized for routing and storing attacks

\item defense is successful, if malicious nodes cannot concentrate in the neighbourhood (fraction is similarly high as in whol network)

\item Castro: use 2 routing tables: one optimized one (with proximity information) + 1 verified one for fallback

\item \textbf{Chord:} constrains identifiers of nodes usable in routing tables, works with random \& stable nodeIDs if attackers are spread over DHT

\end{itemize}

\item routing attacks: refuse to forward or forward to wrong node (maybe even DoS an unrelated server)

\begin{itemize}

\item prefer iterative routing, as this can be checked better

\item assign nodeIDs in verifiable, hard to force way

\item identities with public keys

\end{itemize}

\item storage attacks: bogus responses/ deny existence

\begin{itemize}

\item redundancy/ replication: consult at least 2 replicas

\item try to make data self-certifying: only consult redundant nodes if integrity check fails

\item 5.4 Sanchez: divide Chord finger table into multiple rings that are routed independently

\item 5.6 Harvesf and Blough: place replicas at equally-spaced locations on Chord ring, for more diverse routes to them

\item normal Chord stores replicas at numerically close nodes

\item data redundancy is not enough, if nodes can choose their own position in key space

\item redundant routing for reliable locateability of keys

\end{itemize}

\end{itemize}}
}

@report{lemmerwebberActivityPub2018,
  langid = {english},
  title = {{{ActivityPub}}},
  url = {https://www.w3.org/TR/2018/REC-activitypub-20180123/},
  abstract = {The ActivityPub protocol is a decentralized social networking protocol based upon the [ActivityStreams] 2.0 data format. It provides a client to server API for creating, updating and deleting content, as well as a federated server to server API for delivering notifications and content.},
  institution = {{W3C Social Web Working Group}},
  type = {W3C Recommendation},
  urldate = {2019-03-02},
  date = {2018-01-23},
  author = {Lemmer Webber, Christopher and Tallon, Jessica and Shepherd, Erin and Guy, Amy and Prodromou, Evan},
  file = {/home/spiollinux/Zotero/storage/CYAKSDHL/REC-activitypub-20180123.html}
}

@online{twitterinc.Twitter2019,
  title = {Twitter},
  url = {https://twitter.com/},
  type = {Social Network},
  urldate = {2019-03-02},
  date = {2019-03-02},
  author = {{Twitter Inc.}},
  file = {/home/spiollinux/Zotero/storage/9EDJTRHC/twitter.com.html}
}

@report{RFC2119,
  title = {Key Words for Use in {{RFCs}} to {{Indicate Requirement Levels}}},
  url = {http://www.rfc-editor.org/rfc/rfc2119.txt},
  number = {14},
  institution = {{RFC Editor / RFC Editor}},
  type = {BCP},
  date = {1997-03},
  keywords = {RFC,meta},
  author = {Bradner, Scott},
  file = {/home/spiollinux/Zotero/storage/77ETI3DG/rfc2119.html},
  issn = {2070-1721},
  howpublished = {Internet Requests for Comments},
  note = {http://www.rfc-editor.org/rfc/rfc2119.txt}
}

@inproceedings{DBLP:conf/iptps/ByersCM03,
  title = {Simple {{Load Balancing}} for {{Distributed Hash Tables}}.},
  booktitle = {{{IPTPS}}},
  date = {2003},
  pages = {80-87},
  keywords = {load balancing},
  author = {Byers, John W. and Considine, Jeffrey and Mitzenmacher, Michael},
  file = {/home/spiollinux/Zotero/storage/FY37N94A/simple_load_balancing.ps},
  ee = {http://springerlink.metapress.com/openurl.asp?genre=article\&issn=0302-9743\&volume=2735\&spage=80},
  crossref = {DBLP:conf/iptps/2003},
  bibsource = {DBLP, http://dblp.uni-trier.de},
  annotation = {\begin{itemize}

\item only deals with balancing differently-sized arcs of the key space with equally-sized items

\begin{itemize}

\item this is not the problem here, as different hashtags will have different loads ={$>$} different items with different sizes/ loads

\end{itemize}

\end{itemize}}
}

@incollection{raoLoadBalancingStructured2003,
  langid = {english},
  location = {{Berlin, Heidelberg}},
  title = {Load {{Balancing}} in {{Structured P2P Systems}}},
  volume = {2735},
  isbn = {978-3-540-40724-9 978-3-540-45172-3},
  url = {http://link.springer.com/10.1007/978-3-540-45172-3_6},
  abstract = {Most P2P systems that provide a DHT abstraction distribute objects among ``peer nodes'' by choosing random identifiers for the objects. This could result in an O(log N) imbalance. Besides, P2P systems can be highly heterogeneous, i.e. they may consist of peers that range from old desktops behind modem lines to powerful servers connected to the Internet through high-bandwidth lines. In this paper, we address the problem of load balancing in such P2P systems.},
  booktitle = {Peer-to-{{Peer Systems II}}},
  publisher = {{Springer Berlin Heidelberg}},
  urldate = {2019-03-14},
  date = {2003},
  pages = {68-79},
  keywords = {load balancing},
  author = {Rao, Ananth and Lakshminarayanan, Karthik and Surana, Sonesh and Karp, Richard and Stoica, Ion},
  editor = {Kaashoek, M. Frans and Stoica, Ion},
  editorb = {Goos, Gerhard and Hartmanis, Juris and van Leeuwen, Jan},
  editorbtype = {redactor},
  options = {useprefix=true},
  file = {/home/spiollinux/Zotero/storage/975MJEHQ/Rao et al. - 2003 - Load Balancing in Structured P2P Systems.pdf},
  doi = {10.1007/978-3-540-45172-3_6},
  annotation = {\begin{itemize}

\item uses virtual servers

\item but these virtual servers are moved around arbitrarily with no precautions against sybil or node-id attacks

\end{itemize}}
}

@inproceedings{riecheThermaldissipationbasedApproachBalancing2004,
  title = {A Thermal-Dissipation-Based Approach for Balancing Data Load in Distributed Hash Tables},
  doi = {10.1109/LCN.2004.10},
  abstract = {A major objective of peer-to-peer (P2P) systems is the management of large amounts of data distributed across many systems. Distributed hash tables (DHT) are designed for highly scalable, self-organizing, and efficient distribution and lookup of data, whereby data is stored globally persistent. The range of values of the corresponding hash function is partitioned and each interval is assigned to a node of the DHT. Because the assignment of data to nodes is based on hash functions, one assumes that the respective data load is distributed evenly across all participating nodes. However most DHT show difficulties with load balancing as we demonstrate in this paper. As a solution for this problem, we present a new and very simple approach for balancing stored data between peers in a fashion analogous to the dissipation of heat energy in materials. We compare this algorithm with other approaches for load balancing and present results based on simulations and a prototype implementation. This new algorithm improves the distribution of load in DHT without requiring major changes of the DHT themselves. In addition, we show that the fault tolerance of peer-to-peer systems is increased by the proposed algorithm.},
  eventtitle = {29th {{Annual IEEE International Conference}} on {{Local Computer Networks}}},
  booktitle = {29th {{Annual IEEE International Conference}} on {{Local Computer Networks}}},
  date = {2004-11},
  pages = {15-23},
  keywords = {unread,Peer to peer computing,fault tolerant computing,load balancing,client-server systems,Data engineering,data load balancing,distributed databases,distributed hash tables,Engineering management,fault tolerance,hash function,Load management,P2P systems,peer-to-peer systems,Protocols,resource allocation,storage management,Systems engineering and theory,table lookup,Thermal engineering,Thermal loading,Thermal management,thermal-dissipation-based approach,Virtual prototyping},
  author = {Rieche, S. and Petrak, L. and Wehrle, K.},
  file = {/home/spiollinux/Zotero/storage/QNDRCXA3/Rieche et al. - 2004 - A thermal-dissipation-based approach for balancing.pdf;/home/spiollinux/Zotero/storage/ZH3HTFFV/login.html}
}

@misc{kargerSimpleEfficientLoad,
  langid = {english},
  title = {Simple {{Efficient Load Balancing Algorithms}} for {{Peer}}-to-{{Peer Systems}}},
  abstract = {Load balancing is a critical issue for the efficient operation of peer-to-peer networks. We give two new loadbalancing protocols whose provable performance guarantees are within a constant factor of optimal. Our protocols refine the consistent hashing data structure that underlies the Chord (and Koorde) P2P network. Both preserve Chord's logarithmic query time and near-optimal data migration cost.},
  keywords = {unread,load balancing},
  author = {Karger, David R and Ruhl, Matthias},
  file = {/home/spiollinux/Zotero/storage/ZHQLC38W/Karger und Ruhl - Simple Efficient Load Balancing Algorithms for Pee.pdf},
  annotation = {\begin{itemize}

\item virtual servers, but only one of them active per node at each point of time

\item As int he original Chord protocol, our scheme gives each real node only a small number of ``legitimate'' addresses on the Chord ring, preserving Chord's (limited) protection against address spoofing by malicious nodes trying to disrupt the routing layer.

\item but for item balancing (which is needed) nodes can again migrate to arbitrary IDs :(

\end{itemize}}
}

@article{stoicaChordScalablePeertopeer2003,
  title = {Chord: A Scalable Peer-to-Peer Lookup Protocol for {{Internet}} Applications},
  volume = {11},
  issn = {1063-6692},
  doi = {10.1109/TNET.2002.808407},
  shorttitle = {Chord},
  abstract = {A fundamental problem that confronts peer-to-peer applications is the efficient location of the node that stores a desired data item. This paper presents Chord, a distributed lookup protocol that addresses this problem. Chord provides support for just one operation: given a key, it maps the key onto a node. Data location can be easily implemented on top of Chord by associating a key with each data item, and storing the key/data pair at the node to which the key maps. Chord adapts efficiently as nodes join and leave the system, and can answer queries even if the system is continuously changing. Results from theoretical analysis and simulations show that Chord is scalable: Communication cost and the state maintained by each node scale logarithmically with the number of Chord nodes.},
  number = {1},
  journaltitle = {IEEE/ACM Transactions on Networking},
  date = {2003-02},
  pages = {17-32},
  keywords = {Internet,unread,Application software,DHT,file organisation,Peer to peer computing,Routing,Computer science,Chord,Protocols,table lookup,Analytical models,Centralized control,Chord routing protocol,consistent hashing,Costs,data location,distributed protocol,Internet applications,lookup protocol,Network servers,peer-to-peer applications,protocols,routing protocols},
  author = {Stoica, I. and Morris, R. and Liben-Nowell, D. and Karger, D. R. and Kaashoek, M. F. and Dabek, F. and Balakrishnan, H.},
  file = {/home/spiollinux/Zotero/storage/4HG3R5ZT/Stoica et al. - 2003 - Chord a scalable peer-to-peer lookup protocol for.pdf;/home/spiollinux/Zotero/storage/769BWPJT/login.html},
  annotation = {\begin{itemize}

\item just one operation: mapping a key to a node

\item assumption load balancing: due to consisten randomized hashing, each node gets roughly the same number of keys

\begin{itemize}

\item not relevant for this project, as key get different amounts of load

\end{itemize}

\item steady state: O(log N) known routing entries, O(log N) messages for routing

\item Pastry and Tapestry take network topology into account, but are more complex eg. regarding joins

\item correctness robust against partially wrong routing information

\item lookup(key) returns IP address od responsible node -{$>$} reverse-DNS record?

\item successkor(key): first node with ID{$>$}= key

\item routing (finger) table with up to len(ID) entries \textendash{} but only log(n) of them differ -{$>$} optimized storage possible

\begin{itemize}

\item i-th entry is first node that succeeds n by at least 2\^\{i-1\} mod 2\^m

\item first finger = successor

\end{itemize}

\item stabilization protocol: run periodically in background

\begin{itemize}

\item join() alone does not notify rest of network

\item stabilize asks successor for its predecessor and informs successor about existence

\end{itemize}

\item fix\_fingers(): periodically run

\begin{itemize}

\item initialises finger table

\item also updates finger table with new nodes

\end{itemize}

\item check\_predecessor(): run periodically to get rid of failed predecessor

\item n.notify(n'): n is notified by n' that it thinks to be its predecessor

\item if successor not yet correct or keys haven't migrated: temporary failure, needs to be handled by application layer

\item for robustness: not 1 successor, but list of r next sucessors

\item as Chord keeps track of \{r\}, these nodes can be used for redundant storage

\item stabilize: update immediate successor + 1 other entry from successor list or finger table

\item latency optimization: maintain list of fingers + their successors, choose the geographically/ latency closest one

\begin{itemize}

\item needs recursive querying because nodes themselves have the latency information only locally

\end{itemize}

\end{itemize}}
}

@incollection{maymounkovKademliaPeertoPeerInformation2002,
  langid = {english},
  location = {{Berlin, Heidelberg}},
  title = {Kademlia: {{A Peer}}-to-{{Peer Information System Based}} on the {{XOR Metric}}},
  volume = {2429},
  isbn = {978-3-540-44179-3 978-3-540-45748-0},
  url = {http://link.springer.com/10.1007/3-540-45748-8_5},
  shorttitle = {Kademlia},
  abstract = {We describe a peer-to-peer distributed hash table with provable consistency and performance in a fault-prone environment. Our system routes queries and locates nodes using a novel XOR-based metric topology that simplifies the algorithm and facilitates our proof. The topology has the property that every message exchanged conveys or reinforces useful contact information. The system exploits this information to send parallel, asynchronous query messages that tolerate node failures without imposing timeout delays on users.},
  booktitle = {Peer-to-{{Peer Systems}}},
  publisher = {{Springer Berlin Heidelberg}},
  urldate = {2019-03-18},
  date = {2002},
  pages = {53-65},
  keywords = {unread,DHT,Kademlia},
  author = {Maymounkov, Petar and Mazi\`eres, David},
  editor = {Druschel, Peter and Kaashoek, Frans and Rowstron, Antony},
  editorb = {Goos, Gerhard and Hartmanis, Juris and van Leeuwen, Jan},
  editorbtype = {redactor},
  options = {useprefix=true},
  file = {/home/spiollinux/Zotero/storage/TDFCQ2R5/Maymounkov und Mazières - 2002 - Kademlia A Peer-to-Peer Information System Based .pdf},
  doi = {10.1007/3-540-45748-8_5}
}

@incollection{stevensFirstCollisionFull2017,
  langid = {english},
  location = {{Cham}},
  title = {The {{First Collision}} for {{Full SHA}}-1},
  volume = {10401},
  isbn = {978-3-319-63687-0 978-3-319-63688-7},
  url = {http://link.springer.com/10.1007/978-3-319-63688-7_19},
  abstract = {SHA-1 is a widely used 1995 NIST cryptographic hash function standard that was officially deprecated by NIST in 2011 due to fundamental security weaknesses demonstrated in various analyses and theoretical attacks.},
  booktitle = {Advances in {{Cryptology}} \textendash{} {{CRYPTO}} 2017},
  publisher = {{Springer International Publishing}},
  urldate = {2019-03-21},
  date = {2017},
  pages = {570-596},
  author = {Stevens, Marc and Bursztein, Elie and Karpman, Pierre and Albertini, Ange and Markov, Yarik},
  editor = {Katz, Jonathan and Shacham, Hovav},
  file = {/home/spiollinux/Zotero/storage/JJ5WHI94/Stevens et al. - 2017 - The First Collision for Full SHA-1.pdf},
  doi = {10.1007/978-3-319-63688-7_19}
}

@online{NodeInfoSpecification,
  title = {{{NodeInfo}} Specification},
  url = {http://nodeinfo.diaspora.software/},
  urldate = {2019-03-22},
  file = {/home/spiollinux/Zotero/storage/FCWSIM89/nodeinfo.diaspora.software.html}
}

@online{IPv6GlobalUnicast,
  title = {{{IPv6 Global Unicast Address Assignments}} - {{https://www.iana.org/}}},
  url = {https://www.iana.org/assignments/ipv6-unicast-address-assignments/ipv6-unicast-address-assignments.xhtml},
  urldate = {2019-03-22},
  file = {/home/spiollinux/Zotero/storage/K92CLIKY/ipv6-unicast-address-assignments.html}
}

@article{zhuEfficientProximityawareLoad2005,
  langid = {english},
  title = {Efficient, Proximity-Aware Load Balancing for {{DHT}}-Based {{P2P}} Systems},
  volume = {16},
  issn = {1045-9219},
  url = {http://ieeexplore.ieee.org/document/1401878/},
  doi = {10.1109/TPDS.2005.46},
  abstract = {Many solutions have been proposed to tackle the load balancing issue in DHT-based P2P systems. However, all these solutions either ignore the heterogeneity nature of the system, or reassign loads among nodes without considering proximity relationships, or both. In this paper, we present an efficient, proximity-aware load balancing scheme by using the concept of virtual servers. To the best of our knowledge, this is the first work to use proximity information in load balancing. In particular, our main contributions are: 1) Relying on a self-organized, fully distributed k-ary tree structure constructed on top of a DHT, load balance is achieved by aligning those two skews in load distribution and node capacity inherent in P2P systems\textemdash{}that is, have higher capacity nodes carry more loads; 2) proximity information is used to guide virtual server reassignments such that virtual servers are reassigned and transferred between physically close heavily loaded nodes and lightly loaded nodes, thereby minimizing the load movement cost and allowing load balancing to perform efficiently; and 3) our simulations show that our proximity-aware load balancing scheme reduces the load movement cost by 11-65 percent for all the combinations of two representative network topologies, two node capacity profiles, and two load distributions of virtual servers. Moreover, we achieve virtual server reassignments in O\dh{}log N\TH{} time.},
  number = {4},
  journaltitle = {IEEE Transactions on Parallel and Distributed Systems},
  urldate = {2019-03-22},
  date = {2005-04},
  pages = {349-361},
  keywords = {load balancing,survey},
  author = {Zhu, Y. and Hu, Y.},
  file = {/home/spiollinux/Zotero/storage/M4WDG39V/Zhu und Hu - 2005 - Efficient, proximity-aware load balancing for DHT-.pdf},
  annotation = {survey section sieht lesenswert aus

~

\begin{itemize}

\item also moves virtual servers without any security measures

\end{itemize}}
}

@inproceedings{godfreyLoadBalancingDynamic2004,
  langid = {english},
  location = {{Hong Kong, China}},
  title = {Load Balancing in Dynamic Structured {{P2P}} Systems},
  volume = {4},
  isbn = {978-0-7803-8355-5},
  url = {http://ieeexplore.ieee.org/document/1354648/},
  doi = {10.1109/INFCOM.2004.1354648},
  abstract = {Most P2P systems that provide a DHT abstraction distribute objects randomly among ``peer nodes'' in a way that results in some nodes having {$\Theta$}(log N ) times as many objects as the average node. Further imbalance may result due to nonuniform distribution of objects in the identifier space and a high degree of heterogeneity in object loads and node capacities. Additionally, a node's load may vary greatly over time since the system can be expected to experience continuous insertions and deletions of objects, skewed object arrival patterns, and continuous arrival and departure of nodes.},
  eventtitle = {{{IEEE INFOCOM}} 2004},
  booktitle = {{{IEEE INFOCOM}} 2004},
  publisher = {{IEEE}},
  urldate = {2019-03-22},
  date = {2004},
  pages = {2253-2262},
  keywords = {load balancing},
  author = {Godfrey, B. and Lakshminarayanan, K. and Surana, S. and Karp, R. and Stoica, I.},
  file = {/home/spiollinux/Zotero/storage/45P3SZFG/Godfrey et al. - 2004 - Load balancing in dynamic structured P2P systems.pdf},
  annotation = {uses the concept of virtual servers

differentiates between bandwith and storage

\begin{itemize}

\item assumes only 1 bottle-neck resource, multi-resource balancing left for future work

\item movement cost

\item proposes transfering virtual servers \textendash{} but how???

\end{itemize}}
}

@incollection{yuLeopardLocalityAware2005,
  langid = {english},
  location = {{Berlin, Heidelberg}},
  title = {Leopard: {{A Locality Aware Peer}}-to-{{Peer System}} with {{No Hot Spot}}},
  volume = {3462},
  isbn = {978-3-540-25809-4 978-3-540-32017-3},
  url = {http://link.springer.com/10.1007/11422778_3},
  shorttitle = {Leopard},
  abstract = {Recent research [7, 12, 2] has shown that Internet hosts can be efficiently (i.e., without excessive measurements) mapped to a virtual (Euclidean) coordinate system, where the geometric distance between any two nodes in this virtual space approximates their real IP network distance (latency). Based on this result, in this paper, we propose an alternative approach that inherently incorporates a virtual coordinate system into a P2P network. In our system, called Leopard, a node is assigned a coordinate in the so-called node geo space as it joins the network, and obtains neighbor relationships that reflects network proximity from the beginning. The object id space and the node geo space are then ``weaved'' together via a novel technique called geographically-scoped hashing. Through analysis and simulation, we show three major desirable properties of Leopard to exemplify the power of this paradigm shift: i) a constant routing stretch, i.e., IP level network latency of object look-up is proportional to the distance between a requesting node and the target object; ii) always locates a near-by copy when multiple copies exist; and iii) effectively handles ``flash crowd'' traffic with near optimal load balancing.},
  booktitle = {{{NETWORKING}} 2005. {{Networking Technologies}}, {{Services}}, and {{Protocols}}; {{Performance}} of {{Computer}} and {{Communication Networks}}; {{Mobile}} and {{Wireless Communications Systems}}},
  publisher = {{Springer Berlin Heidelberg}},
  urldate = {2019-03-22},
  date = {2005},
  pages = {27-39},
  keywords = {unread,load balancing,flash crowd,proximity-aware,geographically-scoped hashing},
  author = {Yu, Yinzhe and Lee, Sanghwan and Zhang, Zhi-Li},
  editor = {Boutaba, Raouf and Almeroth, Kevin and Puigjaner, Ramon and Shen, Sherman and Black, James P.},
  editorb = {Hutchison, David and Kanade, Takeo and Kittler, Josef and Kleinberg, Jon M. and Mattern, Friedemann and Mitchell, John C. and Naor, Moni and Nierstrasz, Oscar and Pandu Rangan, C. and Steffen, Bernhard and Sudan, Madhu and Terzopoulos, Demetri and Tygar, Dough and Vardi, Moshe Y. and Weikum, Gerhard},
  editorbtype = {redactor},
  file = {/home/spiollinux/Zotero/storage/FAASRFHH/Yu et al. - 2005 - Leopard A Locality Aware Peer-to-Peer System with.pdf},
  doi = {10.1007/11422778_3}
}

@online{prodromouTagsPubHome2017,
  title = {Tags.Pub - {{Home}} ({{Archived}})},
  url = {https://web.archive.org/web/20171220163330/https://tags.pub/},
  shorttitle = {Tags.Pub - {{Home}} ({{Archived}})},
  urldate = {2019-03-25},
  date = {2017-12-20},
  keywords = {ActivityPub,hashtags},
  author = {Prodromou, Evan},
  file = {/home/spiollinux/Zotero/storage/WTRRJ6RY/tags.html}
}

@article{dabekWideareaCooperativeStorage2001,
  langid = {english},
  title = {Wide-Area Cooperative Storage with {{CFS}}},
  abstract = {The Cooperative File System (CFS) is a new peer-to-peer readonly storage system that provides provable guarantees for the efficiency, robustness, and load-balance of file storage and retrieval. CFS does this with a completely decentralized architecture that can scale to large systems. CFS servers provide a distributed hash table (DHash) for block storage. CFS clients interpret DHash blocks as a file system. DHash distributes and caches blocks at a fine granularity to achieve load balance, uses replication for robustness, and decreases latency with server selection. DHash finds blocks using the Chord location protocol, which operates in time logarithmic in the number of servers.},
  journaltitle = {SOSP},
  date = {2001},
  pages = {14},
  keywords = {Chord,load balancing},
  author = {Dabek, Frank and Kaashoek, M Frans and Karger, David and Morris, Robert and Stoica, Ion},
  file = {/home/spiollinux/Zotero/storage/KYZBYWJE/Dabek et al. - Wide-area cooperative storage with CFS.pdf},
  annotation = {introduces "virtual servers" on Chord}
}

@inproceedings{ledlieDistributedSecureLoad2005,
  langid = {english},
  location = {{Miami, FL, USA}},
  title = {Distributed, Secure Load Balancing with Skew, Heterogeneity, and Churn},
  volume = {2},
  isbn = {978-0-7803-8968-7},
  url = {http://nrs.harvard.edu/urn-3:HUL.InstRepos:2962660},
  doi = {10.1109/INFCOM.2005.1498366},
  abstract = {Numerous proposals exist for load balancing in peer-to-peer (p2p) networks. Some focus on namespace balancing, making the distance between nodes as uniform as possible. This technique works well under ideal conditions, but not under those found empirically. Instead, researchers have found heavytailed query distributions (skew), high rates of node join and leave (churn), and wide variation in node network and storage capacity (heterogeneity). Other approaches tackle these less-thanideal conditions, but give up on important security properties. We propose an algorithm that both facilitates good performance and does not dilute security. Our algorithm, k-Choices, achieves load balance by greedily matching nodes' target workloads with actual applied workloads through limited sampling, and limits any fundamental decrease in security by basing each nodes' set of potential identifiers on a single certificate. Our algorithm compares favorably to four others in trace-driven simulations. We have implemented our algorithm and found that it improved aggregate throughput by 20\% in a widely heterogeneous system in our experiments.},
  eventtitle = {Proceedings {{IEEE}} 24th {{Annual Joint Conference}} of the {{IEEE Computer}} and {{Communications Societies}}.},
  booktitle = {Proceedings {{IEEE}} 24th {{Annual Joint Conference}} of the {{IEEE Computer}} and {{Communications Societies}}.},
  publisher = {{IEEE}},
  urldate = {2019-04-04},
  date = {2005},
  pages = {1419-1430},
  keywords = {Chord,load balancing},
  author = {Ledlie, J. and Seltzer, M.},
  file = {/home/spiollinux/Zotero/storage/VCTAD9SK/Ledlie und Seltzer - 2005 - Distributed, secure load balancing with skew, hete.pdf},
  annotation = {He shows that validating nodes must verify all other nodes' credentials simultaneously, an act that may exceed the verifier's resources.A system may acquire a low level of security by requiring that IDs be based on the hash of the node's IP address

~

\begin{itemize}

\item although a centralised certification agency does not suite the fediverse's architecture, instead of the certified number a number derived from domain and IP can be used as a seed for ID generation

\item
Each node can then use this number to generate its own IDs using an ID-generating hash function h. [\ldots{}]\\
k-Choices creates a set of verifiable IDs by generating eachk=h(xcert+c) where c has a well-known bound

\item ~
At each VS insertion,k-Choices minimizes the discrepancy between work and capacity by sampling from a small set of potential IDs. By limiting the number of potential IDs,k-Choices is practical for networks containing malicious participants.

\item estimating costs of join: needs estimation of weight shifted from another node

\begin{itemize}

\item how to estimate this?

\item estimation easy, if all data items have equal size and are distributed equally

\item but if sizes of items differ heavily, required to rely on information of other nodes about what weight they have

\item
We\\
assume that nodes do not lie or that Distributed Algorithmic\\
Mechanism Design techniques could be used to encourage the\\
truthfulness of the information they provide about load

\end{itemize}

\item load metric: find appropriate way of quantifying load even of out-of-band direct ActivityPub communication

\begin{itemize}

\item 2 kinds of load:

\begin{itemize}

\item storage usage

\item network load

\end{itemize}

\item paper: adds 1 load unit for each query, but in my case majority of network load probably comes from communication outside of DHT overlay. That has to be quantified as well

\item bookkeeping of load needs to be done per VS/ per key for proper cost estimation/ guessing

\end{itemize}

\item they claim their approach is suited for routing load balancing, not for storage load balancing. But I think it can be useful for storage imbalance as well

\end{itemize}}
}

@book{tange_ole_2018_1146014,
  title = {{{GNU Parallel}} 2018},
  url = {https://doi.org/10.5281/zenodo.1146014},
  publisher = {{Ole Tange}},
  date = {2018-04},
  author = {Tange, Ole},
  doi = {10.5281/zenodo.1146014}
}

@software{schubertDiasporaTagUsage,
  title = {Diaspora* {{Tag Usage Data}} for Pod.Geraspora.De},
  url = {https://0b101010.codes/research-things/open-data/tree/master/diaspora/tag-usage},
  abstract = {Contains a list of posts associated with authors, pods, and used tags. Allows analysis about how posts and tags spread, and how tags are used in general.},
  author = {Schubert, Dennis}
}

@software{DiasporaProject,
  title = {The Diaspora* {{Project}}},
  url = {https://diasporafoundation.org/},
  urldate = {2019-05-06},
  file = {/home/spiollinux/Zotero/storage/52S85ZVN/diasporafoundation.org.html}
}

@software{Friendica,
  title = {Friendica},
  url = {https://friendi.ca/},
  shorttitle = {Friendica \textendash{} {{A Decentralized Social Network}} - Https},
  urldate = {2019-05-07},
  file = {/home/spiollinux/Zotero/storage/T865AFPA/friendi.ca.html}
}

@article{artigasNovelMethodologyConstructing2005,
  title = {A Novel Methodology for Constructing Secure Multipath Overlays},
  volume = {9},
  issn = {1089-7801},
  doi = {10.1109/MIC.2005.117},
  abstract = {One technique for securely delivering data in structured overlays is to increase the number of disjoint paths among peers. However, most overlays do not offer a substrate to accommodate multiple paths. The binary-equivalence relationship-based Cyclone methodology is decoupled from the overlay architecture, which insulates it from such limitations. It fortifies existing routing algorithms to defend against data-forwarding attacks.},
  number = {6},
  journaltitle = {IEEE Internet Computing},
  date = {2005-11},
  pages = {50-57},
  keywords = {telecommunication security,security of data,Peer to peer computing,P2P,peer-to-peer computing,routing protocols,Availability,binary-equivalence relationship,Buildings,Concrete,Cyclone methodology,Cyclones,Data security,data-forwarding attack,Insulation,overlay architecture,peer to peer networks,Proposals,Robustness,routing algorithm,Routing protocols,secure multipath overlay,secure routing,structured overlay,redundancy,routing attacks},
  author = {Artigas, M. S. and Lopez, P. G. and Skarmeta, A. F. G.},
  file = {/home/spiollinux/Zotero/storage/2DAKI37C/Artigas et al. - 2005 - A novel methodology for constructing secure multip.pdf;/home/spiollinux/Zotero/storage/87NDUBRF/1541946.html},
  annotation = {\begin{itemize}

\item good "problem statement" section describing routing attacks

\end{itemize}}
}

@inproceedings{harvesfEffectReplicaPlacement2006,
  title = {The {{Effect}} of {{Replica Placement}} on {{Routing Robustness}} in {{Distributed Hash Tables}}},
  doi = {10.1109/P2P.2006.44},
  abstract = {To achieve higher efficiency over their unstructured counterparts, structured peer-to-peer systems hold each node responsible for serving a specified set of keys and correctly routing lookups. Unfortunately, malicious participants can abuse these responsibilities to deny access to a set of keys or misroute lookups. We look to address both of these problems through replica placement. Using Chord as an example, we present an equally-spaced replication scheme and prove that it can be tuned to produce any desired number of disjoint routes. To be specific, we prove that d disjoint routes can be produced by placing 2d-1 replicas around a fully populated Chord ring in an equally-spaced fashion. In this situation, we also prove that there exists a route to at least one replica, which contains only uncompromised nodes, even if an attacker controls more than a quarter of the contiguous identifier space in the system. Simulation experiments demonstrate that this scheme performs better than previously proposed replica placement schemes in rings that are sparsely populated, populated in clusters, or populated partially by compromised nodes},
  eventtitle = {Sixth {{IEEE International Conference}} on {{Peer}}-to-{{Peer Computing}} ({{P2P}}'06)},
  booktitle = {Sixth {{IEEE International Conference}} on {{Peer}}-to-{{Peer Computing}} ({{P2P}}'06)},
  date = {2006-09},
  pages = {57-6},
  keywords = {telecommunication security,security of data,distributed hash table,Peer to peer computing,Routing,Scalability,peer-to-peer computing,Buildings,Robustness,redundancy,routing attacks,Chord ring,Control systems,disjoint route,Distributed computing,Domain Name System,equally-spaced replication scheme,Internet telephony,replica placement,Resilience,routing lookup,routing robustness,structured peer-to-peer system,telecommunication network routing},
  author = {Harvesf, C. and Blough, D. M.},
  file = {/home/spiollinux/Zotero/storage/TBHDFZFI/Harvesf und Blough - 2006 - The Effect of Replica Placement on Routing Robustn.pdf;/home/spiollinux/Zotero/storage/MESX9INF/1698591.html}
}

@inproceedings{borisovComputationalPuzzlesSybil2006,
  langid = {english},
  location = {{Cambridge, UK}},
  title = {Computational {{Puzzles}} as {{Sybil Defenses}}},
  isbn = {978-0-7695-2679-9},
  url = {http://ieeexplore.ieee.org/document/1698607/},
  doi = {10.1109/P2P.2006.10},
  abstract = {We consider the problem of defending against Sybil attacks using computational puzzles. A fundamental difficulty in such defenses is enforcing that puzzle solutions not be reused by attackers over time. We propose a fully decentralized scheme to enforce this by continually distributing locally generated challenges that are then incorporated into the puzzle solutions.},
  eventtitle = {Sixth {{IEEE International Conference}} on {{Peer}}-to-{{Peer Computing}} ({{P2P}}'06)},
  booktitle = {Sixth {{IEEE International Conference}} on {{Peer}}-to-{{Peer Computing}} ({{P2P}}'06)},
  publisher = {{IEEE}},
  urldate = {2019-06-06},
  date = {2006},
  pages = {171-176},
  keywords = {Sybil attacks},
  author = {Borisov, N.},
  file = {/home/spiollinux/Zotero/storage/ZTIGQ3YU/Borisov - 2006 - Computational Puzzles as Sybil Defenses.pdf}
}

@inproceedings{DingerHartenstein2006_1000004170,
  langid = {english},
  title = {Defending the {{Sybil Attack}} in {{P2P Networks}}: {{Taxonomy}}, {{Challenges}} and a {{Proposal}} for {{Self}}-{{Registration}}},
  isbn = {0-7695-2567-9},
  doi = {10.1109/ARES.2006.45},
  booktitle = {1st {{International Conference}} on {{Availability}}, {{Reliability}} and {{Security}}, {{ARES}} 2006; {{Vienna}}; {{Austria}}; 20 {{April}} 2006 through 22 {{April}} 2006},
  publisher = {{IEEE Computer Society, Los Almitos (CA)}},
  date = {2006},
  pages = {756-763},
  keywords = {Sybil attacks,Chord},
  author = {Dinger, Jochen and Hartenstein, Hannes},
  file = {/home/spiollinux/Zotero/storage/QS8UNAQJ/Dinger und Hartenstein - 2006 - Defending the Sybil attack in P2P networks taxono.pdf;/home/spiollinux/Zotero/storage/6B76XP4A/1625383.html}
}

@misc{ledlieHarvardTechnicalReport2004,
  langid = {english},
  title = {Harvard {{Technical Report TR}}-31-04: {{Distributed}}, Secure Load Balancing with Skew, Heterogeneity, and Churn},
  url = {https://people.csail.mit.edu/ledlie/lb/kchoices05-tr.pdf},
  shorttitle = {Harvard {{Technical Report TR}}-31-04},
  abstract = {Numerous proposals exist for load balancing in peer-to-peer (p2p) networks. Some focus on namespace balancing, making the distance between nodes as uniform as possible. This technique works well under ideal conditions, but not under those found empirically. Instead, researchers have found heavy-tailed query distributions (skew), high rates of node join and leave (churn), and wide variation in node network and storage capacity (heterogeneity). Other approaches tackle these less-than-ideal conditions, but give up on important security properties. We propose an algorithm that both facilitates good performance and does not dilute security. Our algorithm, kChoices, achieves load balance by greedily matching nodes' target workloads with actual applied workloads through limited sampling, and limits any fundamental decrease in security by basing each nodes' set of potential identifiers on a single certificate. Our algorithm compares favorably to four others in trace-driven simulations. We have implemented our algorithm and found that it improved aggregate throughput by 20\% in a widely heterogeneous system in our experiments.},
  publisher = {{Harvard Technical Report}},
  urldate = {2019-07-10},
  date = {2004-12},
  author = {Ledlie, J. and Seltzer, M.},
  file = {/home/spiollinux/Zotero/storage/PI7EAURK/Ledlie und Seltzer - 2005 - Distributed, secure load balancing with skew, hete.pdf}
}

@online{DistributedSecureLoad,
  title = {Distributed, {{Secure Load Balancing}} with {{Skew}}, {{Heterogeneity}}, and {{Churn}} - {{https://people.csail.mit.edu/}}},
  url = {https://people.csail.mit.edu/ledlie/lb/},
  urldate = {2019-07-11}
}

@inproceedings{webberBearcaps2019,
  location = {{Prague}},
  title = {Bearcaps},
  url = {https://github.com/cwebber/rwot9-prague},
  shorttitle = {{{RWOT9}} in {{Prague}}, {{The Czech Republic}} ({{September}} 2019)},
  eventtitle = {{{RWOT}} 9},
  urldate = {2019-07-18},
  date = {2019-07-01T00:58:29Z},
  author = {Webber, Christopher Lemmer},
  origdate = {2019-07-01T00:58:26Z}
}

@report{snellActivityStreams,
  langid = {english},
  title = {Activity {{Streams}} 2.0},
  url = {https://www.w3.org/TR/activitystreams-core/},
  shorttitle = {Activity {{Streams}} 2.0 - Https},
  institution = {{W3C Social Web Working Group}},
  type = {W3C Recommendation},
  urldate = {2019-07-18},
  author = {Snell, James M and Prodromou, Evan},
  file = {/home/spiollinux/Zotero/storage/W4YPCF2E/activitystreams-core.html}
}