paper_hashtag_federation/talk-slides.tex

492 lines
15 KiB
TeX

% $Header$
% use lualatex for compilation
\documentclass[aspectratio=169,navbar=false]{beamer}
% This file is a solution template for:
% - Talk at a conference/colloquium.
% - Talk length is about 20min.
% - Style is ornate.
% Copyright 2004 by Till Tantau <tantau@users.sourceforge.net>.
%
% In principle, this file can be redistributed and/or modified under
% the terms of the GNU Public License, version 2.
%
% However, this file is supposed to be a template to be modified
% for your own needs. For this reason, if you use this file as a
% template and not specifically distribute it as part of a another
% package/program, I grant the extra permission to freely copy and
% modify this file as you see fit and even to delete this copyright
% notice.
\mode<presentation>
{
\usetheme[cd2018,noddc,darktitlepage]{tud}
\usecolortheme{tud}
% or ...
%\setbeamercovered{transparent}
% or whatever (possibly just delete it)
}
% notes on 2nd screen:
\usepackage{pgfpages}
\setbeameroption{show notes on second screen}
\usepackage[british]{babel}
% or whatever
\usepackage[backend=biber, sorting=none]{biblatex}
\usepackage{ccicons}
\usepackage{wrapfig}
\usepackage{ifluatex}
\ifluatex
\usepackage{fontspec}
%\setmainfont{TeX Gyre Pagella}
%\RequirePackage{unicode-math}
%\setmathfont{XITS Math}
%\setmainfont{Open Sans}
%\setsansfont{Open Sans}
%\setmathfont[range={it}]{Open Sans:style=Italic}
%\setmathfont[range={it}]{Open Sans}
\else
\usepackage[T1]{fontenc}
\usepackage[utf8]{inputenc}
% Or whatever. Note that the encoding and the font should match. If T1
% does not look nice, try deleting the line with the fontenc.
\fi
\title[Decentralised Hashtag Federation] % (optional, use only with long paper titles)
{Decentralised Hashtag Search and Subscription
for Federated Social Networks}
\author
{Trolli Schmittlauch}
% - Give the names in the same order as the appear in the paper.
% - Use the \inst{?} command only if the authors have different
% affiliation.
\institute[] % (optional, but mostly needed)
{
Department of Computer Science\\
Technical University Dresden
}
\date[APConf 2019] % (optional, should be abbreviation of conference name)
{ActivityPubConf 2019}
\datecity{Prague}
% - Either use conference name or its abbreviation.
% - Not really informative to the audience, more for people (including
% yourself) who are reading the slides online
%\subject{Privacy}
% This is only inserted into the PDF information catalog. Can be left
% out.
% If you have a file called "university-logo-filename.xxx", where xxx
% is a graphic format that can be processed by latex or pdflatex,
% resp., then you can add a logo as follows:
% \pgfdeclareimage[height=0.5cm]{university-logo}{university-logo-filename}
% \logo{\pgfuseimage{university-logo}}
% Delete this, if you do not want the table of contents to pop up at
% the beginning of each subsection:
%\AtBeginSubsection[]
%{
% \begin{frame}<beamer>{Outline}
% \tableofcontents[currentsection,currentsubsection]
% \end{frame}
%}
% If you wish to uncover everything in a step-wise fashion, uncomment
% the following command:
%\beamerdefaultoverlayspecification{<+->}
\begin{document}
\maketitle
\note{introduce myself:\\
known as schmittlauch on the Internet\\
student of Computer Science @ TU Dresden\\
interest in federated systems and unusual social networks\\
presenting my work on a study paper from this year}
\begin{frame}{Outline}
\tableofcontents
% You might wish to add the option [pausesections]
\end{frame}
% Structuring a talk is a difficult task and the following structure
% may not be suitable. Here are some rules that apply for this
% solution:
% - Exactly two or three sections (other than the summary).
% - At *most* three subsections per section.
% - Talk about 30s to 2min per frame. So there should be between about
% 15 and 30 frames, all told.
% - A conference audience is likely to know very little of what you
% are going to talk about. So *simplify*!
% - In a 20min talk, getting the main ideas across is hard
% enough. Leave out details, even if it means being less precise than
% you think necessary.
% - If you omit details that are vital to the proof/implementation,
% just say so once. Everybody will be happy with that.
\section{Motivation}
\begin{frame}{Welcome to ActivityPubConf!}{Motivation}
\only<1>{
\includegraphics[width=\textwidth]{figures/toot_nohashtags.png}
\note{Who has been posting about this Conference?}
}
\only<2>{
\includegraphics[width=\textwidth]{figures/toot_hashtags.png}
\note{And who used \#ActivityPubConf?}
}
\end{frame}
\subsection{Importance of \#Hashtags}
\begin{frame}{Importance of \#Hashtags}{}
Hashtags are used for marking posts about certain topics or events:
\note{mark topics of posts, make them discoverable by content. No full text search in fediverse}
\begin{columns}
\begin{column}{0.47\textwidth}
\begin{itemize}
\item<1-> \textbf{events}: \#ActivityPubConf, \#CCCamp19
\item<2-> \textbf{political topics}: \#SaveTheInternet
\item<3-> \textbf{general topics}: \#mastoadmin, \#Tusky
\item<4-> \textbf{ongoing demonstrations}: \#GeziPark, \#WomensMarch
\item<5-> \textbf{social movements}: \#MeToo
\end{itemize}
\end{column}
\begin{column}{0.5\textwidth}
\begin{overlayarea}{\textwidth}{0.6\paperheight}
\center
\only<1>{
\includegraphics[width=\textwidth]{figures/APConfLogo.png}
}
\only<2>{
\includegraphics[height=0.58\paperheight]{figures/hashtag_savetheinternet.jpg}\\
\tiny{\href{https://www.flickr.com/photos/8183946@N05/14733648892}{"Obama in the Backseat: Rally to Save the Internet"} by \href{https://www.flickr.com/photos/8183946@N05}{Free Press Pics} is licensed under \href{https://creativecommons.org/licenses/by-sa/2.0/?ref=ccsearch&atype=rich}{CC BY-SA 2.0} \ccbysa}
}
\only<3>{
\includegraphics[height=0.6\paperheight]{figures/Elephant_Friend_(Greeting).png}
}
\only<4>{
\includegraphics[width=\textwidth]{figures/hashtag_gezipark.jpg}
}
\only<5>{
\includegraphics[height=0.6\paperheight]{figures/hashtag_metoo.jpg}\\
\tiny{\href{https://www.flickr.com/photos/50612692@N04/28039368079}{"IMG\_4263"} by \href{https://www.flickr.com/photos/50612692@N04}{GGAADD} is licensed under \href{https://creativecommons.org/licenses/by-sa/2.0/?ref=ccsearch&atype=rich}{CC BY-SA 2.0} \ccbysa}
}
\end{overlayarea}
\end{column}
\end{columns}
\end{frame}
\subsection{State of Hashtags in the Fediverse}
\begin{frame}{State of Hashtags on the Fediverse}{}
{\center \Large Hashtags are used in the fediverse}
\pause
\vspace{2em}
{\large But do they behave as expected?}
\end{frame}
\begin{frame}
\begin{columns}
\begin{column}{0.5\textwidth}
\begin{figure}
\includegraphics[height=0.65\paperheight]{figures/{activitypubconf_toot.matereal.eu}.png}
\caption{\#activitypubconf on the single-user instance \textit{toot.matereal.eu}}
\end{figure}
\end{column}
\begin{column}{0.5\textwidth}
\begin{figure}
\includegraphics[height=0.65\paperheight]{figures/{activitypubconf_mastodon.social}.png}
\caption{\#activitypubconf on the large instance \textit{mastodon.social}}
\end{figure}
\end{column}
\end{columns}
\end{frame}
\begin{frame}{State of Hashtags on the Fediverse}{Fragmentation}
\begin{itemize}
\item fragmented view on hashtag posts depending on user's instance
\item hashtag search only on locally known posts
\item Result: incentive to cluster on large nodes \(\Leftarrow\) centralisation
\end{itemize}
\end{frame}
\begin{frame}{Reason}{Push-Federation}
\begin{columns}
\begin{column}{0.55\textwidth}
\includegraphics[height=0.6\paperheight]{figures/push_federation.pdf}
\end{column}
\begin{column}{0.45\textwidth}
\begin{itemize}
\only<1>{\item subscription to \texttt{@alice@cyber.space} by contacting instance \texttt{cyber.space}}
\only<2>{\item all future posts by alice are delivered to instances of subscribers, but \textit{not} instances without any subscriber}
\only<3>{\item other ways for posts to reach an instance:\\ boosts, thread resolution}
\end{itemize}
\end{column}
\end{columns}
\end{frame}
\begin{frame}{Current Solutions}
\begin{itemize}
\item Mastodon PubRelay or Pleroma lite-pub relay:
\begin{itemize}
\item centralised actor relaying all incoming posts
\item single point of failure, which relay to choose?
\item relaying all incoming posts \(\Rightarrow\) huge load on small instances
\item only access to posts sent after initial subscribtion
\end{itemize}
\item Diaspora* SocialRelay
\begin{itemize}
\item similar, but allows subscribing to certain tags only
\end{itemize}
\end{itemize}
\end{frame}
\section{System Architecture}
\begin{frame}{System Architecture}{Goals}
\begin{itemize}
\item \textbf{relay \& subscribe}: instances can subscribe to all public posts of a hashtag
\item \textbf{store \& query}: instances can retrieve 1 year of history for a hashtag without subscription
\item fully decentralised, no single point of authority for all tags
\end{itemize}
\end{frame}
\begin{frame}{System Architecture}{adding a DHT backend to the fediverse}
core idea: distribute responsibility for tags among instances using a \textbf{D}istributed \textbf{H}ash \textbf{T}able, \note{distribute responsibility for posts of a hashtag = relaying \& storage}
based on Chord
\note[item]{DHT: structured P2P networks providing efficient (log N) key-value storage and lookup}
\note[item]{self-organising, no central authority}
\end{frame}
\begin{frame}{System Architecture}{adding a DHT backend to the fediverse}
\begin{columns}
\begin{column}{0.4\textwidth}
\begin{itemize}
\item calculate hash value of keys and node IDs
\item place these hashes onto the same circular name space
\item each node keeps routing table of \(\log \#number\_nodes\) entries\note[item]{joining and leaving covered in paper}
\end{itemize}
\end{column}
\begin{column}{0.6\textwidth}
\includegraphics[height=0.62\paperheight]{figures/finger_table_routing_1.pdf}
\end{column}
\end{columns}
\end{frame}
\begin{frame}{System Architecture}{adding a DHT backend to the fediverse}
\begin{columns}
\begin{column}{0.55\textwidth}
\begin{itemize}
\item next nodeID \(\geq\) \texttt{hash(hashtag)} (mod keyspace size) is responsible for handling posts containing \texttt{hashtag}
\item DHT used for iterative lookup of responsible relay/ storage node
\end{itemize}
\end{column}
\begin{column}{0.45\textwidth}
\includegraphics[height=0.62\paperheight]{figures/finger_table_routing_2.pdf}
\end{column}
\end{columns}
\end{frame}
\begin{frame}{Publishing, Relaying and Storage}{lifecycle of posts}
\begin{enumerate}
\item publishing instance looks up responsible relay instance on DHT for each included hashtag
\item publishing instance sends post to responsible relay instance
\item relay instance looks up responsible storage node on DHT
\item relay instance verifies incoming post's signature, then relays post URI (ID) to all subscribers + storage node\note[item]{only post ID relayed, but not full post content. Reasons: LDSignatures not supported everywhere, deniability \& revocation}
\item subscribing instances can now retrieve the full authenticated post from received post URI
\end{enumerate}
\end{frame}
\begin{frame}{Publishing, Relaying and Storage}
\begin{itemize}
\item separate DHTs for relay and storage instances
\item all actions after DHT lookup supposed to be done using ActivityPub via HTTPS
\item subscription to hashtags/ querying posts is done at the responsible instance
\end{itemize}
\end{frame}
\begin{frame}
\note{so far so easy. But load distribution issues}
\begin{itemize}
\item node ID determines set of hashtags handled by instance
\item problem: for security reasons, node \textbf{must not} choose their IDs freely
\item Can instances be overloaded by their assigned hashtag posts?
\end{itemize}
\end{frame}
\begin{frame}{Distribution of Posts per Tag}
\includegraphics[width=0.49\textwidth]{statistics/twitter_hashtags_total.png}
~
\includegraphics[width=0.49\textwidth]{statistics/geraspora_hashtags_total.png}
\note{analysis of a 1 month dump of Twitter, Geraspora (Diaspora) and Friendica posts\\
Twitter: 70\% of posts used just once\\
note the logarithmic axis!}
distribution of posts per hashtag follows a steep power law
\note{So what if a small node gets several large hashtags? => need for load balancing}
\end{frame}
\begin{frame}{Load Balancing}{of hashtags between nodes}
\begin{itemize}
\item \textit{k-choices} algorithm by Ledlie and Seltzer
\item each node can choose from \(\kappa\) possible IDs
\item nodes have a \textbf{capacity} and choose set of active IDs according to lowest mismatch of own and neighbour node capacity
\item querying load of potential IDs before joining, periodic re-balancing
\item independent load balancing of relay and storage nodes due to independent DHTs
\end{itemize}
% for Kolloquium, add simulation result
\end{frame}
\begin{frame}{Redundancy}
\begin{columns}
\begin{column}{0.55\textwidth}
\begin{itemize}
\item store redundant copies of hashtag data at equal distances on Chord ring\note{resilience against node failure, allows data validation through cross-checking}
\item default redundancy: \(2^2 = 4\), scalable in powers of 2
\item \textbf{relay nodes}: hot standby nodes take over in overload situations (load spikes)
\item \textbf{storage nodes}: overloaded nodes can split stored posts by content hash and double redundancy set
\end{itemize}
\end{column}
\begin{column}{0.45\textwidth}
\includegraphics[width=\textwidth]{figures/redundancy_ring.pdf}
\end{column}
\end{columns}
\end{frame}
why even still use classic push federation?
\section{Discussion}
\begin{frame}{Discussion}{I need YOUR feedback}
I want feedback from all of you, no matter whether it's from a \textit{\LARGE technical} or from a \textit{\LARGE social perspective}.
\end{frame}
\subsection{Social Considerations}
\subsection{Technical Considerations}
CloudFlare
load and capacity factor
performance: batching, exponential back-off, no relayable sigs
security: node ID derivation scheme
\section{Summary}
\begin{frame}{Summary}
% Keep the summary *very short*.
\begin{itemize}
\item
things can be \alert{highlighted}.
\end{itemize}
% The following outlook is optional.
\vskip0pt plus.5fill
\begin{itemize}
\item
Outlook
\end{itemize}
\end{frame}
% All of the following is optional and typically not needed.
\appendix
\section<presentation>*{\appendixname}
\subsection<presentation>*{For Further Reading}
\begin{frame}[allowframebreaks]
\frametitle{References}
%\bibliography{literature}
\end{frame}
\begin{frame}{Complete Paper}
\center
\includegraphics[height=0.45\paperheight]{figures/qr_paper.png}\\
\url{https://git.orlives.de/schmittlauch/paper_hashtag_federation/src/branch/master/paper_hashtag_federation.pdf}
\end{frame}
\begin{frame}
\center\huge{Thank you for your attention!}
%\includegraphics[height=0.5\textheight]{figures/nomnompingu.png}\tiny\footnote{CC-BY-SA 3.0 by Elektroll}
\end{frame}
\end{document}