This analysis is based on a blog post and code by Jessica Peterka-Bonetta.
I only made small customizations in the code.
First, let’s reload our dataset.
#install.packages(streamR)
library(streamR)
file <- "../data/stream/stream_got.json"
# file <- "~/Dropbox (UNC Charlotte)/summer-2017-social-media-workshop/data/stream/stream_got.json"
#?parseTweets
tweets <- parseTweets(tweets = file)
tweets$text <- iconv(tweets$text, from="UTF-8", to="ASCII", "byte")
Next, let’s create a dictionary for all of the emojis.
library(tidyverse); library(stringr); library(Unicode)
source("./emoji_functions.R")
emoji_file = "../data/emojis.csv"
emDict <- readr::read_delim(file = emoji_file, delim = ";")
emDict <- emDict %>%
mutate(description = tolower(EN)) %>%
mutate(unicode = as.u_char(unicode))
Then, let’s match the emojis to count the most used emojis.
#helper functions
matchto <- emDict$ftu8
description <- emDict$EN
rank <- emojis_matching(tweets$text, matchto, description) %>%
group_by(description) %>%
summarise(n = sum(count)) %>%
arrange(-n)
head(rank, n = 10)
## # A tibble: 10 x 2
## description n
## <chr> <int>
## 1 face with tears of joy 73
## 2 raising hands 49
## 3 smiling face with heart-eyes 24
## 4 loudly crying face 21
## 5 face screaming in fear 17
## 6 pile of poo 13
## 7 fire 9
## 8 red heart 9
## 9 crown 7
## 10 dragon 7
We can then do an emoji “sentiment analysis”.
To do this, we need this site to provide sentiment scores of the tweets.
library(xml2); library(rvest)
# reference website
url <- "http://kt.ijs.si/data/Emoji_sentiment_ranking/index.html"
# get emoticons
emojis_raw <- url %>%
read_html() %>%
html_table() %>%
data.frame %>%
select(-Image.twemoji., -Sentiment.bar.c.i..95..)
# rename the file
names(emojis_raw) <- c("char", "unicode", "occurrences", "position", "negative", "neutral",
"positive", "sentiment_score", "description", "block")
# change numeric unicode to character unicode to be able to match with emDict
emojis <- emojis_raw %>%
mutate(unicode = as.u_char(unicode)) %>%
mutate(description = tolower(description))
# merge with emDict to get encoding
emojis_merged <- emojis %>%
merge(emDict, by = "unicode")
new_matchto <- emojis_merged$ftu8
new_description <- emojis_merged$description.x
sentiment <- emojis_merged$sentiment_score
sentiments <- emojis_matching(tweets$text, new_matchto, new_description, sentiment) %>%
mutate(sentiment = count*as.numeric(sentiment)) %>%
group_by(text) %>%
summarise(sentiment_score = sum(sentiment), tweet_count = n()) %>%
filter(!is.na(sentiment_score)) %>%
arrange(desc(sentiment_score))
# top 10 most positive tweets by emoji sentiment
head(sentiments, n = 10)
## # A tibble: 10 x 3
## text
## <chr>
## 1 RT @LordSnow: Daenerys has finally made it to Westeros <ed><a0><bd><ed><b9>
## 2 RT @ItsGoTQuote: Some of the #GameofThrones cast for Time Magazine <ed><a0>
## 3 <ed><a0><bd><ed><b8><82><ed><a0><bd><ed><b8><82><ed><a0><bd><ed><b8><82><ed
## 4 "RT @francy_flo: TELL THEM QUEEN <ed><a0><bd><ed><b1><91><e2><9d><84><ef><b
## 5 It warms my heart <ed><a0><bd><ed><b2><9c><ed><a0><bd><ed><b2><99><ed><a0><
## 6 <ed><a0><bd><ed><b8><ad><ed><a0><bd><ed><b8><ad><ed><a0><bd><ed><b8><ad><ed
## 7 RT @darth_daisy_: <ed><a0><bd><ed><b8><ad><ed><a0><bd><ed><b8><ad><ed><a0><
## 8 RT @Rawrist: #GameofThrones is getting intense this season. <ed><a0><bd><ed
## 9 "RT @Emilie__Caron: When Arya Stark commits a mass murder: \n\n#GoTS7\n#Gam
## 10 RT @LordSnow: I am a humble king <ed><a0><bd><ed><b8><89> #GameOfThrones ht
## # ... with 2 more variables: sentiment_score <dbl>, tweet_count <int>