From 66b39485a93f130730cf151ec5d731fc0560e1e5 Mon Sep 17 00:00:00 2001 From: coolneng Date: Tue, 23 Mar 2021 19:33:32 +0100 Subject: [PATCH] Save vdj alignment sequences to a CSV --- .gitignore | 2 +- src/repertoire.r | 26 ++++++++++++++------------ 2 files changed, 15 insertions(+), 13 deletions(-) diff --git a/.gitignore b/.gitignore index 919f14d..11cc81f 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,3 @@ -*.txt +*.csv *.fasta *.fastq diff --git a/src/repertoire.r b/src/repertoire.r index 0cb40b8..dcf2909 100644 --- a/src/repertoire.r +++ b/src/repertoire.r @@ -19,21 +19,23 @@ amplify_rows <- function(data, column, factor) { return(rep(data, factor)) } -save_data <- function(data, name) { - if (name == "sequence") { - file_name <- paste("data/", name, ".fasta", sep = "") - Biostrings::writeXStringSet(data, file_name, format = "fasta") - } else { - file_name <- paste("data/", name, ".txt", sep = "") - cat(data, file = file_name, sep = "\n") - } +save_data <- function(data) { + Biostrings::writeXStringSet(data$sequence, "data/sequence.fasta") + vdj_sequences <- data[-1] + write.csv(vdj_sequences, "data/vdj_alignment.csv", row.names = FALSE) } process_data <- function(repertoire, sequencing_runs) { - columns <- c("sequence", "v_call", "j_call") + columns <- c( + "sequence", "v_sequence_alignment", + "d_sequence_alignment", "j_sequence_alignment" + ) data <- repertoire[, columns] - amplified_data <- mapply(data, names(data), sequencing_runs, FUN = amplify_rows) - invisible(mapply(amplified_data, names(amplified_data), FUN = save_data)) + amplified_data <- mapply(data, names(data), + sequencing_runs, + FUN = amplify_rows + ) + save_data(amplified_data) } parse_cli_arguments <- function(args) { @@ -48,4 +50,4 @@ arguments <- parse_cli_arguments(args) number_of_sequences <- as.integer(arguments[1]) sequencing_runs <- as.integer(arguments[2]) repertoire <- generate_repertoire(number_of_sequences) -process_data(repertoire, sequencing_runs) +process_data(repertoire, sequencing_runs) \ No newline at end of file