plot_tfidf_ngrams.Rd
Plot the n-grams with the highest TF-IDFs
plot_tfidf_ngrams(tfidf_ngrams, title = NULL)
tfidf_ngrams | A data frame from |
---|
A ggplot
(ggplot::geom_col
).
library(experienceAnalysis) books <- janeaustenr::austen_books() # Jane Austen books emma <- paste(books[books$book == "Emma", ], collapse = " ") # String with whole book pp <- paste(books[books$book == "Pride & Prejudice", ], collapse = " ") # String with whole book # Make data frame with books Emma and Pride & Prejudice x <- data.frame( text = c(emma, pp), book = c("Emma", "Pride & Prejudice") ) calc_tfidf_ngrams(x, target_col_name = "book", text_col_name = "text", filter_class = "Emma", ngrams_type = "Bigrams", number_of_ngrams = 5 ) %>% dplyr::filter(ngram != "4 4") %>% # First bigram is useless and distorts the plot plot_tfidf_ngrams(title = "Bigrams with highest TF-IDFs in Emma")