From 4e448c2d1a0833e4db982aa1a53f12ab30ad0691 Mon Sep 17 00:00:00 2001 From: Lucky <66523959+l-ucky@users.noreply.github.com> Date: Fri, 25 Aug 2023 13:29:30 -0300 Subject: [PATCH] Create Difference Between Data Frame Observations by Day.R --- ...e Between Data Frame Observations by Day.R | 75 +++++++++++++++++++ 1 file changed, 75 insertions(+) create mode 100644 Difference Between Data Frame Observations by Day.R diff --git a/Difference Between Data Frame Observations by Day.R b/Difference Between Data Frame Observations by Day.R new file mode 100644 index 0000000..cab8729 --- /dev/null +++ b/Difference Between Data Frame Observations by Day.R @@ -0,0 +1,75 @@ +# Load Libraries +# For graphing +library("ggplot2") +# For the %>% operator, but you can +# skip loading this library +# and just use `|>` as +# a pipe operator. +library("tidyverse") + +# Note: Other code is below to do an alternative method, +# but the uncommented method is superior. + +#load CSVs using code. +df1 <- read.csv("~/Documents/Stats/4Chan Scraper/Aug 24 2023 18:11:19.csv") +df2 <- read.csv("~/Documents/Stats/4Chan Scraper/Aug 25 2023 10:51:42.csv") + + +# Merge data frame, and take difference b/w day 1 and day 2 +# subtracting data frames from each other. +df_merged <- merge(df1, df2, by="word", all=TRUE) +df_merged$result <- df_merged$n.y - df_merged$n.x + + + +# Feel free to add more "non-words," or "noise" +# to this list as you see fit. +df_difference_filter <- df_merged %>% + filter(!word == "de" + & !word == "je" + & !word == "een" + & !word == "dat" + & !word == "en" + & !word == "eu" + & !word == "te" + & !word == "tu" + & !word == "niet" + & !word == "van" + & !word == "niet" + & !word == "ik" + & !word == "ze" + & !word == "om" + & !word == "met" + & !word == "uk" + & !word == "qt" + & !word == "wat" + & !word == "bb" + & !word == "op") + +# bar graph of difference +df_difference_filter %>% + top_n(40) %>% + mutate(word = reorder(word, result)) %>% + drop_na() %>% + ggplot(aes(word, result)) + + geom_col() + + labs( + title = "difference of word count from day 1 - day 2", + x = "Words", + y = "Count") + + coord_flip() + + +#Assign CSV to data frame if loading in manually with the GUI/IDE. +# df1 <- `Aug.24.2023.18:11:19` +# df2 <- `Aug.25.2023.10:51:42` + +# Sort data alphabetically. +# But I don't need to do that. +# sort.df1 <- with(df1, df1[order(df1$word) , ]) +# sort.df2 <- with(df2, df2[order(df2$word) , ]) + +# Take 20,000 rows of count data. +# But I don't need to do that. +# df1_ <- sort.df1[1:20000,2] +# df2_ <- sort.df2[1:20000,2]