Rewrote entire script to show top positive, and negative changes. See PDF
This commit is contained in:
parent
46b9eec0e5
commit
4880da7235
|
|
@ -1,28 +1,31 @@
|
|||
# Load Libraries
|
||||
# For graphing
|
||||
library("ggplot2")
|
||||
library("tidyverse")
|
||||
library("dplyr")
|
||||
# For the %>% operator, but you can
|
||||
# skip loading this library
|
||||
# skip loading tidyverse
|
||||
# and just use `|>` as
|
||||
# a pipe operator.
|
||||
library("tidyverse")
|
||||
|
||||
|
||||
# Note: Other code is below to do an alternative method,
|
||||
# but the uncommented method is superior.
|
||||
|
||||
# Load CSVs using code.
|
||||
#load CSVs using code.
|
||||
df1 <- read.csv("~/Documents/Stats/4Chan Scraper/Aug 24 2023 18:11:19.csv")
|
||||
df2 <- read.csv("~/Documents/Stats/4Chan Scraper/Aug 25 2023 10:51:42.csv")
|
||||
|
||||
|
||||
# Merge data frame, and take difference b/w day 1 and day 2
|
||||
# subtracting data frames from each other.
|
||||
# n.x = df1
|
||||
# n.y = df2
|
||||
df_merged <- merge(df1, df2, by="word", all=TRUE)
|
||||
df_merged$result <- df_merged$n.y - df_merged$n.x
|
||||
|
||||
|
||||
|
||||
# Feel free to add more "non-words," or "noise,"
|
||||
# Feel free to add more "non-words," or "noise"
|
||||
# to this list as you see fit.
|
||||
df_difference_filter <- df_merged %>%
|
||||
filter(!word == "de"
|
||||
|
|
@ -46,30 +49,31 @@ df_difference_filter <- df_merged %>%
|
|||
& !word == "bb"
|
||||
& !word == "op")
|
||||
|
||||
# Bar graph of difference
|
||||
df_difference_filter %>%
|
||||
# assign NA to Zero
|
||||
df_difference_filter$result[is.na(df_difference_filter$result)] <- 0
|
||||
|
||||
|
||||
# Get bottom 20 (negative) numbers
|
||||
df_bottom <- df_difference_filter %>%
|
||||
top_n(-20)
|
||||
|
||||
# Get top 20 (positive) numbers
|
||||
df_top <- df_difference_filter %>%
|
||||
top_n(20)
|
||||
|
||||
# Bind into new data frame
|
||||
df_merged2 <- rbind(df_top, df_bottom)
|
||||
|
||||
# bar graph of difference between Day 2, and Day 1.
|
||||
df_merged2 %>%
|
||||
top_n(40) %>%
|
||||
mutate(word = reorder(word, result)) %>%
|
||||
drop_na() %>%
|
||||
ggplot(aes(word, result)) +
|
||||
geom_col() +
|
||||
geom_bar(stat = "identity") +
|
||||
labs(
|
||||
title = "Difference of Word Count from Day 1 - Day 2",
|
||||
title = "Difference of Word Count from Day 2 - Day 1",
|
||||
x = "Words",
|
||||
y = "Count") +
|
||||
y = "Count",
|
||||
caption = "Positive integers = More mentions on day 2
|
||||
Negative integers = Less mentions on day 2.") +
|
||||
coord_flip()
|
||||
|
||||
|
||||
# Assign CSV to data frame if loading in manually with the GUI/IDE.
|
||||
# df1 <- `Aug.24.2023.18:11:19`
|
||||
# df2 <- `Aug.25.2023.10:51:42`
|
||||
|
||||
# Sort data alphabetically.
|
||||
# But I don't need to do that.
|
||||
# sort.df1 <- with(df1, df1[order(df1$word) , ])
|
||||
# sort.df2 <- with(df2, df2[order(df2$word) , ])
|
||||
|
||||
# Take 20,000 rows of count data.
|
||||
# But I don't need to do that.
|
||||
# df1_ <- sort.df1[1:20000,2]
|
||||
# df2_ <- sort.df2[1:20000,2]
|
||||
|
|
|
|||
Loading…
Reference in New Issue