Rewrote entire script to show top positive, and negative changes. See PDF

2023-08-25 14:27:46 -03:00 · 2023-08-25 14:27:46 -03:00 · 4880da7235
parent 46b9eec0e5
commit 4880da7235
1 changed files with 30 additions and 26 deletions
--- a/Day.R
+++ b/Day.R
@ -1,28 +1,31 @@
 # Load Libraries
-# For graphing
 library("ggplot2")
+library("tidyverse")
+library("dplyr")
 # For the %>% operator, but you can
-# skip loading this library
+# skip loading tidyverse 
 # and just use `|>` as 
 # a pipe operator.
-library("tidyverse")
+

 # Note: Other code is below to do an alternative method,
 #       but the uncommented method is superior.

-# Load CSVs using code. 
+#load CSVs using code. 
 df1 <- read.csv("~/Documents/Stats/4Chan Scraper/Aug 24 2023 18:11:19.csv")
 df2 <- read.csv("~/Documents/Stats/4Chan Scraper/Aug 25 2023 10:51:42.csv")


 # Merge data frame, and take difference b/w day 1 and day 2
 # subtracting data frames from each other.
+# n.x = df1
+# n.y = df2
 df_merged <- merge(df1, df2, by="word", all=TRUE)
 df_merged$result <- df_merged$n.y - df_merged$n.x



-# Feel free to add more "non-words," or "noise," 
+# Feel free to add more "non-words," or "noise" 
 # to this list as you see fit.
 df_difference_filter <- df_merged %>% 
  filter(!word == "de"
@ -46,30 +49,31 @@ df_difference_filter <- df_merged %>%
         & !word == "bb"
         & !word == "op")

-# Bar graph of difference
-df_difference_filter %>% 
+# assign NA to Zero
+df_difference_filter$result[is.na(df_difference_filter$result)] <- 0
+
+
+# Get bottom 20 (negative) numbers
+df_bottom <- df_difference_filter %>% 
+  top_n(-20)
+
+# Get top 20 (positive) numbers
+df_top <- df_difference_filter %>% 
+  top_n(20)
+
+# Bind into new data frame
+df_merged2 <- rbind(df_top, df_bottom)
+
+# bar graph of difference between Day 2, and Day 1.
+df_merged2 %>% 
  top_n(40) %>% 
  mutate(word = reorder(word, result)) %>% 
-  drop_na() %>% 
  ggplot(aes(word, result)) +
-  geom_col() + 
+  geom_bar(stat = "identity") + 
  labs(
-    title = "Difference of Word Count from Day 1 - Day 2",
+    title = "Difference of Word Count from Day 2 - Day 1",
    x = "Words",
-    y = "Count") +
+    y = "Count",
+    caption = "Positive integers = More mentions on day 2
+     Negative integers = Less mentions on day 2.") +
  coord_flip()
-
-
-# Assign CSV to data frame if loading in manually with the GUI/IDE.
-# df1 <- `Aug.24.2023.18:11:19`
-# df2 <- `Aug.25.2023.10:51:42`
-
-# Sort data alphabetically.
-# But I don't need to do that.
-# sort.df1 <- with(df1,  df1[order(df1$word) , ])
-# sort.df2 <- with(df2,  df2[order(df2$word) , ])
-
-# Take 20,000 rows of count data.
-# But I don't need to do that.
-# df1_ <- sort.df1[1:20000,2]
-# df2_ <- sort.df2[1:20000,2]