From 8cab01c84032c3d234064fc88fdee6c6291df269 Mon Sep 17 00:00:00 2001 From: Lucky <66523959+l-ucky@users.noreply.github.com> Date: Fri, 1 Sep 2023 23:06:36 -0300 Subject: [PATCH] Removed commented-out code, and author notes. --- scripts/4chan pol ngram Scraper v2.R | 17 ++--------------- 1 file changed, 2 insertions(+), 15 deletions(-) diff --git a/scripts/4chan pol ngram Scraper v2.R b/scripts/4chan pol ngram Scraper v2.R index 2ecf6cb..f7c1aab 100644 --- a/scripts/4chan pol ngram Scraper v2.R +++ b/scripts/4chan pol ngram Scraper v2.R @@ -111,22 +111,9 @@ threads_tibble <- tibble(txt = threads) tidy_pol <- threads_tibble %>% unnest_tokens(word, txt, format = "text", token = "ngrams", n = 2, to_lower = TRUE) - - - tidy_pol_fixed <- tidy_pol %>% filter(str_detect(word, "([a-z]{3,} [a-z]{3,})")) -# Failures -# tidy_pol_fixed2 <- tidy_pol_fixed %>% -# filter(str_detect(word, "[_]{1,}")) -# tidy_pol_fixed <- tidy_pol %>% -# filter(str_detect(word, "([\\w\\S_])")) -# tidy_pol_fixed <- tidy_pol %>% -# filter(!grepl("[a-z] [a-z]", word)) -# tidy_pol_fixed <- tidy_pol %>% -# filter(str_detect(word, "([\\d-] [\\S-])")) - tidy_pol_fixed_separated <- tidy_pol_fixed %>% separate(word, into = c("word1", "word2"), sep = " ") %>% @@ -517,7 +504,7 @@ tidy_pol_fixed_separated <- tidy_pol_fixed %>% & !grepl('[0-9]', word2)) #### below will replace a word with another word #### -#tidy_pol_fixed_separated$word1 <- str_replace(tidy_pol_fixed_separated$word1, "niggers", "nigger") +tidy_pol_fixed_separated$word1 <- str_replace(tidy_pol_fixed_separated$word1, "niggers", "nigger") tidy_pol_fixed_separated$word1 <- str_replace(tidy_pol_fixed_separated$word1, "jews", "jew") tidy_pol_fixed_separated$word1 <- str_replace(tidy_pol_fixed_separated$word1, "jewish", "jew") tidy_pol_fixed_separated$word1 <- str_replace(tidy_pol_fixed_separated$word1, "woman", "women") @@ -557,7 +544,7 @@ tidy_pol_fixed_separated$word1 <- str_replace(tidy_pol_fixed_separated$word1, "c tidy_pol_fixed_separated$word1 <- str_replace(tidy_pol_fixed_separated$word1, "masks", "mask") tidy_pol_fixed_separated$word1 <- str_replace(tidy_pol_fixed_separated$word1, "threadsstop", "threads stop") -#tidy_pol_fixed_separated$word2 <- str_replace(tidy_pol_fixed_separated$word2, "niggers", "nigger") +tidy_pol_fixed_separated$word2 <- str_replace(tidy_pol_fixed_separated$word2, "niggers", "nigger") tidy_pol_fixed_separated$word2 <- str_replace(tidy_pol_fixed_separated$word2, "jews", "jew") tidy_pol_fixed_separated$word2 <- str_replace(tidy_pol_fixed_separated$word2, "jewish", "jew") tidy_pol_fixed_separated$word2 <- str_replace(tidy_pol_fixed_separated$word2, "woman", "women")