Save v_call and j_call to a text file

This commit is contained in:
coolneng 2021-03-11 21:03:16 +01:00
parent 7b15df7614
commit 762791829a
Signed by: coolneng
GPG Key ID: 9893DA236405AF57
1 changed files with 23 additions and 12 deletions

View File

@ -10,18 +10,30 @@ generate_repertoire <- function(number_of_sequences) {
)) ))
} }
# TODO save also v_call and j_call amplify_rows <- function(data, column, factor) {
preprocess_data <- function(repertoire, sequencing_runs) { if (column == "sequence") {
sequences <- as.character(repertoire$sequence) dna_string <- Biostrings::DNAStringSet(data)
reads <- Biostrings::DNAStringSet(rep(sequences, sequencing_runs)) reverse_complement <- Biostrings::reverseComplement(dna_string)
names(reads) <- seq_len(length(reads)) return(Biostrings::DNAStringSet(rep(reverse_complement, factor)))
reverse_complement <- Biostrings::reverseComplement(reads) }
return(reverse_complement) return(as.character(rep(data, factor)))
} }
save_data <- function(repertoire) { save_data <- function(data, name) {
file_name <- "data/sequence.fasta" if (name == "sequence") {
Biostrings::writeXStringSet(repertoire, file_name, format = "fasta") file_name <- paste("data/", name, ".fasta", sep = "")
Biostrings::writeXStringSet(data, file_name, format = "fasta")
} else {
file_name <- paste("data/", name, ".txt", sep = "")
cat(data, file = file_name, sep = "\n")
}
}
process_data <- function(repertoire, sequencing_runs) {
columns <- c("sequence", "v_call", "j_call")
data <- repertoire[, columns]
amplified_data <- mapply(data, names(data), sequencing_runs, FUN = amplify_rows)
invisible(mapply(amplified_data, names(amplified_data), FUN = save_data))
} }
parse_cli_arguments <- function(args) { parse_cli_arguments <- function(args) {
@ -36,5 +48,4 @@ arguments <- parse_cli_arguments(args)
number_of_sequences <- as.integer(arguments[1]) number_of_sequences <- as.integer(arguments[1])
sequencing_runs <- as.integer(arguments[2]) sequencing_runs <- as.integer(arguments[2])
repertoire <- generate_repertoire(number_of_sequences) repertoire <- generate_repertoire(number_of_sequences)
processed_data <- preprocess_data(repertoire, sequencing_runs) process_data(repertoire, sequencing_runs)
save_data(processed_data)