diff --git a/statistics/twitter/extract_twitter_hashtags.sh b/statistics/twitter/extract_twitter_hashtags.sh index 10d3c15..4b2edbb 100755 --- a/statistics/twitter/extract_twitter_hashtags.sh +++ b/statistics/twitter/extract_twitter_hashtags.sh @@ -6,7 +6,6 @@ ##################### HISTFILE="./twitter_hashtag_history.csv" -DISTFILE="./twitter_hashtag_numbers.txt" # extract hashtags and time stamp in parallel @@ -15,7 +14,3 @@ echo "$@" | parallel -d ' ' -j+0 --eta "echo \"parsing {}\"; zcat {} |jq -r 'if # merge all temporary history files to one cat tmp_twitter_history_* > ${HISTFILE} # rm "tmp_twitter_history_*" - -# count hashtag occurence -awk -F, '{print tolower($2)}' "${HISTFILE}" | sort | uniq -c > ${DISTFILE} -# let's hope that sort can manage such huge files in memory