From 4880da7235ef7d66786ce9b823ac3350b0788959 Mon Sep 17 00:00:00 2001 From: Lucky <66523959+l-ucky@users.noreply.github.com> Date: Fri, 25 Aug 2023 14:27:46 -0300 Subject: [PATCH] Rewrote entire script to show top positive, and negative changes. See PDF --- ...e Between Data Frame Observations by Day.R | 56 ++++++++++--------- 1 file changed, 30 insertions(+), 26 deletions(-) diff --git a/Difference Between Data Frame Observations by Day.R b/Difference Between Data Frame Observations by Day.R index a935859..5d39926 100644 --- a/Difference Between Data Frame Observations by Day.R +++ b/Difference Between Data Frame Observations by Day.R @@ -1,28 +1,31 @@ # Load Libraries -# For graphing library("ggplot2") +library("tidyverse") +library("dplyr") # For the %>% operator, but you can -# skip loading this library +# skip loading tidyverse # and just use `|>` as # a pipe operator. -library("tidyverse") + # Note: Other code is below to do an alternative method, # but the uncommented method is superior. -# Load CSVs using code. +#load CSVs using code. df1 <- read.csv("~/Documents/Stats/4Chan Scraper/Aug 24 2023 18:11:19.csv") df2 <- read.csv("~/Documents/Stats/4Chan Scraper/Aug 25 2023 10:51:42.csv") # Merge data frame, and take difference b/w day 1 and day 2 # subtracting data frames from each other. +# n.x = df1 +# n.y = df2 df_merged <- merge(df1, df2, by="word", all=TRUE) df_merged$result <- df_merged$n.y - df_merged$n.x -# Feel free to add more "non-words," or "noise," +# Feel free to add more "non-words," or "noise" # to this list as you see fit. df_difference_filter <- df_merged %>% filter(!word == "de" @@ -46,30 +49,31 @@ df_difference_filter <- df_merged %>% & !word == "bb" & !word == "op") -# Bar graph of difference -df_difference_filter %>% +# assign NA to Zero +df_difference_filter$result[is.na(df_difference_filter$result)] <- 0 + + +# Get bottom 20 (negative) numbers +df_bottom <- df_difference_filter %>% + top_n(-20) + +# Get top 20 (positive) numbers +df_top <- df_difference_filter %>% + top_n(20) + +# Bind into new data frame +df_merged2 <- rbind(df_top, df_bottom) + +# bar graph of difference between Day 2, and Day 1. +df_merged2 %>% top_n(40) %>% mutate(word = reorder(word, result)) %>% - drop_na() %>% ggplot(aes(word, result)) + - geom_col() + + geom_bar(stat = "identity") + labs( - title = "Difference of Word Count from Day 1 - Day 2", + title = "Difference of Word Count from Day 2 - Day 1", x = "Words", - y = "Count") + + y = "Count", + caption = "Positive integers = More mentions on day 2 + Negative integers = Less mentions on day 2.") + coord_flip() - - -# Assign CSV to data frame if loading in manually with the GUI/IDE. -# df1 <- `Aug.24.2023.18:11:19` -# df2 <- `Aug.25.2023.10:51:42` - -# Sort data alphabetically. -# But I don't need to do that. -# sort.df1 <- with(df1, df1[order(df1$word) , ]) -# sort.df2 <- with(df2, df2[order(df2$word) , ]) - -# Take 20,000 rows of count data. -# But I don't need to do that. -# df1_ <- sort.df1[1:20000,2] -# df2_ <- sort.df2[1:20000,2]