From 4880da7235ef7d66786ce9b823ac3350b0788959 Mon Sep 17 00:00:00 2001
From: Lucky <66523959+l-ucky@users.noreply.github.com>
Date: Fri, 25 Aug 2023 14:27:46 -0300
Subject: [PATCH] Rewrote entire script to show top positive, and negative
 changes. See PDF

---
 ...e Between Data Frame Observations by Day.R | 56 ++++++++++---------
 1 file changed, 30 insertions(+), 26 deletions(-)

diff --git a/Difference Between Data Frame Observations by Day.R b/Difference Between Data Frame Observations by Day.R
index a935859..5d39926 100644
--- a/Difference Between Data Frame Observations by Day.R	
+++ b/Difference Between Data Frame Observations by Day.R	
@@ -1,28 +1,31 @@
 # Load Libraries
-# For graphing
 library("ggplot2")
+library("tidyverse")
+library("dplyr")
 # For the %>% operator, but you can
-# skip loading this library
+# skip loading tidyverse 
 # and just use `|>` as 
 # a pipe operator.
-library("tidyverse")
+
 
 # Note: Other code is below to do an alternative method,
 #       but the uncommented method is superior.
 
-# Load CSVs using code. 
+#load CSVs using code. 
 df1 <- read.csv("~/Documents/Stats/4Chan Scraper/Aug 24 2023 18:11:19.csv")
 df2 <- read.csv("~/Documents/Stats/4Chan Scraper/Aug 25 2023 10:51:42.csv")
 
 
 # Merge data frame, and take difference b/w day 1 and day 2
 # subtracting data frames from each other.
+# n.x = df1
+# n.y = df2
 df_merged <- merge(df1, df2, by="word", all=TRUE)
 df_merged$result <- df_merged$n.y - df_merged$n.x
 
 
 
-# Feel free to add more "non-words," or "noise," 
+# Feel free to add more "non-words," or "noise" 
 # to this list as you see fit.
 df_difference_filter <- df_merged %>% 
   filter(!word == "de"
@@ -46,30 +49,31 @@ df_difference_filter <- df_merged %>%
          & !word == "bb"
          & !word == "op")
 
-# Bar graph of difference
-df_difference_filter %>% 
+# assign NA to Zero
+df_difference_filter$result[is.na(df_difference_filter$result)] <- 0
+
+
+# Get bottom 20 (negative) numbers
+df_bottom <- df_difference_filter %>% 
+  top_n(-20)
+
+# Get top 20 (positive) numbers
+df_top <- df_difference_filter %>% 
+  top_n(20)
+
+# Bind into new data frame
+df_merged2 <- rbind(df_top, df_bottom)
+
+# bar graph of difference between Day 2, and Day 1.
+df_merged2 %>% 
   top_n(40) %>% 
   mutate(word = reorder(word, result)) %>% 
-  drop_na() %>% 
   ggplot(aes(word, result)) +
-  geom_col() + 
+  geom_bar(stat = "identity") + 
   labs(
-    title = "Difference of Word Count from Day 1 - Day 2",
+    title = "Difference of Word Count from Day 2 - Day 1",
     x = "Words",
-    y = "Count") +
+    y = "Count",
+    caption = "Positive integers = More mentions on day 2
+     Negative integers = Less mentions on day 2.") +
   coord_flip()
-
-
-# Assign CSV to data frame if loading in manually with the GUI/IDE.
-# df1 <- `Aug.24.2023.18:11:19`
-# df2 <- `Aug.25.2023.10:51:42`
-
-# Sort data alphabetically.
-# But I don't need to do that.
-# sort.df1 <- with(df1,  df1[order(df1$word) , ])
-# sort.df2 <- with(df2,  df2[order(df2$word) , ])
-
-# Take 20,000 rows of count data.
-# But I don't need to do that.
-# df1_ <- sort.df1[1:20000,2]
-# df2_ <- sort.df2[1:20000,2]