diff --git a/src/repertoire.r b/src/repertoire.r index 3be719e..23ab344 100644 --- a/src/repertoire.r +++ b/src/repertoire.r @@ -10,18 +10,30 @@ generate_repertoire <- function(number_of_sequences) { )) } -# TODO save also v_call and j_call -preprocess_data <- function(repertoire, sequencing_runs) { - sequences <- as.character(repertoire$sequence) - reads <- Biostrings::DNAStringSet(rep(sequences, sequencing_runs)) - names(reads) <- seq_len(length(reads)) - reverse_complement <- Biostrings::reverseComplement(reads) - return(reverse_complement) +amplify_rows <- function(data, column, factor) { + if (column == "sequence") { + dna_string <- Biostrings::DNAStringSet(data) + reverse_complement <- Biostrings::reverseComplement(dna_string) + return(Biostrings::DNAStringSet(rep(reverse_complement, factor))) + } + return(as.character(rep(data, factor))) } -save_data <- function(repertoire) { - file_name <- "data/sequence.fasta" - Biostrings::writeXStringSet(repertoire, file_name, format = "fasta") +save_data <- function(data, name) { + if (name == "sequence") { + file_name <- paste("data/", name, ".fasta", sep = "") + Biostrings::writeXStringSet(data, file_name, format = "fasta") + } else { + file_name <- paste("data/", name, ".txt", sep = "") + cat(data, file = file_name, sep = "\n") + } +} + +process_data <- function(repertoire, sequencing_runs) { + columns <- c("sequence", "v_call", "j_call") + data <- repertoire[, columns] + amplified_data <- mapply(data, names(data), sequencing_runs, FUN = amplify_rows) + invisible(mapply(amplified_data, names(amplified_data), FUN = save_data)) } parse_cli_arguments <- function(args) { @@ -36,5 +48,4 @@ arguments <- parse_cli_arguments(args) number_of_sequences <- as.integer(arguments[1]) sequencing_runs <- as.integer(arguments[2]) repertoire <- generate_repertoire(number_of_sequences) -processed_data <- preprocess_data(repertoire, sequencing_runs) -save_data(processed_data) \ No newline at end of file +process_data(repertoire, sequencing_runs)