Day One Code

################################################
# Loading and Processing Moby Dick
################################################
text_v <- scan("data/plainText/melville.txt", what = "character", sep = "\n")
novel_v <- text_v[408:18576]
novel <- paste(novel_v, collapse = " ")
novel_lower <- tolower(novel)
moby_words_l <- strsplit(novel_lower, "\\W")
moby_words_v <- unlist(moby_words_l)
blanks <- which(moby_words_v == "")
moby_words_v <- moby_words_v[-blanks]
moby_counts <- table(moby_words_v)
sorted_moby_t <- sort(moby_counts, decreasing = TRUE)
moby_freq_t <- sorted_moby_t/length(moby_words_v)
moby_freq_t[1:10]
plot(moby_freq_t[1:10], type="l")

################################################
# Loading and Processing S and S
################################################

austen_v <- scan("data/plainText/austen.txt", what = "character", sep = "\n")
austen_novel_v <- austen_v[17:10609]
novel_a <- paste(austen_novel_v, collapse = " ")
novel_lower_a <- tolower(novel_a)
austen_words_l <- strsplit(novel_lower_a, "\\W")
austen_words_v <- unlist(austen_words_l)
blanks <- which(austen_words_v == "")
austen_words_v <- austen_words_v[-blanks]
austen_counts <- table(austen_words_v)
sorted_austen_t <- sort(austen_counts, decreasing = TRUE)
austen_freq_t <- sorted_austen_t/length(austen_words_v)
austen_freq_t[1:10]
plot(austen_freq_t[1:10], type="l")

######################################################
# Dispersion Plotting
################################################

whale_positions_v <- which(moby_words_v == "whale")
ahab_positions_v <- which(moby_words_v == "ahab")
dispersion <- rep(0, length(moby_words_v))
dispersion[whale_positions_v] <- 1
par(mfrow=c(2,1))
plot(dispersion, type="h")
dispersion_a <- rep(0, length(moby_words_v))
dispersion_a[ahab_positions_v] <- 1
plot(dispersion_a, type="h")