remove broken hashtag counting from Twitter data dump parsing script

2020-07-09 02:21:31 +02:00 · 2020-07-09 02:21:31 +02:00 · 44e7d8c3cd
commit 44e7d8c3cd
parent 97b0694410
1 changed files with 0 additions and 5 deletions
--- a/statistics/twitter/extract_twitter_hashtags.sh
+++ b/statistics/twitter/extract_twitter_hashtags.sh
@ -6,7 +6,6 @@
 #####################

 HISTFILE="./twitter_hashtag_history.csv"
-DISTFILE="./twitter_hashtag_numbers.txt"

 # extract hashtags and time stamp in parallel

@ -15,7 +14,3 @@ echo "$@" | parallel -d ' ' -j+0 --eta "echo \"parsing {}\"; zcat {} |jq -r 'if
 # merge all temporary history files to one
 cat tmp_twitter_history_* > ${HISTFILE}
 # rm "tmp_twitter_history_*"
-
-# count hashtag occurence
-awk -F, '{print tolower($2)}' "${HISTFILE}" | sort | uniq -c > ${DISTFILE}
-# let's hope that sort can manage such huge files in memory