- dropping all history from the university paper in case it contains sensitive data
110 lines
3.5 KiB
Python
Executable file
110 lines
3.5 KiB
Python
Executable file
#!/usr/bin/env python3
|
|
|
|
import numpy as np
|
|
from matplotlib import pyplot as plt, rcParams
|
|
import csv
|
|
import hashlib
|
|
from ipaddress import IPv6Address, AddressValueError
|
|
from collections import deque
|
|
from itertools import chain
|
|
from operator import itemgetter
|
|
|
|
from gmpy2 import mpfr, get_context, cos as gcos, sin as gsin, ceil as gceil
|
|
|
|
ID_BITS = 256
|
|
# set up precision to 256 significant bits
|
|
# reason: 256-bit IDs are converted to a (mpfr) float
|
|
# before division
|
|
get_context().precision = ID_BITS
|
|
# radius of circle representing namespace
|
|
RAD = 50
|
|
C = np.array((RAD+1, RAD+1), dtype=object) # center of circle
|
|
MAX_ID = mpfr(2**ID_BITS-1)
|
|
|
|
|
|
def calc_nodeID(hostname: str, ipaddr: str, vserver: int = 0) -> bytes:
|
|
try:
|
|
ip = IPv6Address(ipaddr)
|
|
except AddressValueError:
|
|
print("invalid IP addr", ipaddr)
|
|
raise ValueError
|
|
# represent vserver number as bytes, in as many bytes as needed
|
|
# for representing that number
|
|
vserver_bytes = vserver.to_bytes(int(np.ceil(vserver.bit_length()/8)), 'big')
|
|
# extract the bytes denoting the max. /64 bit network part
|
|
net_bytes = int(ip).to_bytes(16, 'big')[0:7]
|
|
# domain name is converted to canonical punycode representation
|
|
servername_bytes = (hostname + str(vserver)).encode("idna")
|
|
net_hash = hashlib.shake_128(net_bytes + vserver_bytes).digest(16)
|
|
vservername_hash = hashlib.shake_128(servername_bytes + vserver_bytes).digest(16)
|
|
|
|
# node IDs format:
|
|
# hash(net + vserver_num_in_bytes)[0,63] +
|
|
# hash(servername + vserver_num_in_bytes)[0,127] +
|
|
# hash(net + vserver_num_in_bytes)[64,127]
|
|
nID = net_hash[:8] + vservername_hash + net_hash[8:]
|
|
return (int.from_bytes(nID, 'big'), ipaddr, hostname)
|
|
|
|
|
|
def calc_histogram(data, bins, ofile, end=None, start=0):
|
|
sdata = deque(sorted(data, key=itemgetter(0)))
|
|
if not end:
|
|
end = sdata[-1][0]
|
|
|
|
binsize = gceil((mpfr(end) - mpfr(start)) / bins)
|
|
binlimit = start + binsize
|
|
histx = []
|
|
histy = []
|
|
|
|
histfile = open(ofile, "wt")
|
|
|
|
while start < end:
|
|
binlimit = start + binsize
|
|
count = 0
|
|
thisbin = []
|
|
try:
|
|
while start <= sdata[0][0] and sdata[0][0] < binlimit:
|
|
count += 1
|
|
thisbin.append(sdata.popleft())
|
|
|
|
except IndexError:
|
|
pass
|
|
# plot in the center of the bin
|
|
histx.append(int(start + 0.5 * binsize))
|
|
histy.append(count)
|
|
print("================", start, count, "================", sep="\n", file=histfile)
|
|
for inst in thisbin:
|
|
print(inst[1], inst[2], inst[0], sep=", ", file=histfile)
|
|
print("\n", file=histfile)
|
|
|
|
start = binlimit
|
|
|
|
return (histx, histy)
|
|
|
|
|
|
def write_gnuplot_histdata(data, filename):
|
|
with open(filename, "tw") as datafile:
|
|
for x, y in zip(*data):
|
|
print(x, y, sep="\t", file=datafile)
|
|
|
|
|
|
def histogram_plots():
|
|
with open("instance_scanner/instance_ips.csv", newline='') as csvfile:
|
|
instancereader = csv.reader(csvfile)
|
|
# histogram plot of distribution of vserver0 IDs
|
|
|
|
# using a set comprehension because instance data contains some
|
|
# instances twice, once with canonicalized domain name and once
|
|
# with unicode domain name -> remove duplicate IDs
|
|
ids_single_vserver = sorted({
|
|
calc_nodeID(hn, eval(ips)[0])
|
|
for hn, ips in instancereader}, key=itemgetter(0))
|
|
calc_histogram(ids_single_vserver, 256, end=MAX_ID, ofile="figures/debug_num_single_vs.dat")
|
|
|
|
|
|
def main():
|
|
histogram_plots()
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|