1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 |
################################################ # Loading and Processing Moby Dick ################################################ text_v <- scan("data/plainText/melville.txt", what = "character", sep = "\n") novel_v <- text_v[408:18576] novel <- paste(novel_v, collapse = " ") novel_lower <- tolower(novel) moby_words_l <- strsplit(novel_lower, "\\W") moby_words_v <- unlist(moby_words_l) blanks <- which(moby_words_v == "") moby_words_v <- moby_words_v[-blanks] moby_counts <- table(moby_words_v) sorted_moby_t <- sort(moby_counts, decreasing = TRUE) moby_freq_t <- sorted_moby_t/length(moby_words_v) moby_freq_t[1:10] plot(moby_freq_t[1:10], type="l") ################################################ # Loading and Processing S and S ################################################ austen_v <- scan("data/plainText/austen.txt", what = "character", sep = "\n") austen_novel_v <- austen_v[17:10609] novel_a <- paste(austen_novel_v, collapse = " ") novel_lower_a <- tolower(novel_a) austen_words_l <- strsplit(novel_lower_a, "\\W") austen_words_v <- unlist(austen_words_l) blanks <- which(austen_words_v == "") austen_words_v <- austen_words_v[-blanks] austen_counts <- table(austen_words_v) sorted_austen_t <- sort(austen_counts, decreasing = TRUE) austen_freq_t <- sorted_austen_t/length(austen_words_v) austen_freq_t[1:10] plot(austen_freq_t[1:10], type="l") ###################################################### # Dispersion Plotting ################################################ whale_positions_v <- which(moby_words_v == "whale") ahab_positions_v <- which(moby_words_v == "ahab") dispersion <- rep(0, length(moby_words_v)) dispersion[whale_positions_v] <- 1 par(mfrow=c(2,1)) plot(dispersion, type="h") dispersion_a <- rep(0, length(moby_words_v)) dispersion_a[ahab_positions_v] <- 1 plot(dispersion_a, type="h") |