From 576597cb0464c4580963c20f70fa80aa1ee66754 Mon Sep 17 00:00:00 2001 From: coolneng Date: Mon, 29 Mar 2021 20:30:16 +0200 Subject: [PATCH] Remove redundant sequencing runs argument --- generation.sh | 3 +-- src/repertoire.r | 37 ++++++++++++------------------------- 2 files changed, 13 insertions(+), 27 deletions(-) diff --git a/generation.sh b/generation.sh index 9853ac2..fa6ae17 100755 --- a/generation.sh +++ b/generation.sh @@ -19,6 +19,5 @@ fastq=".fastq" filename="sequence" prefix="curesim_" -Rscript src/repertoire.r "$sequences" "$sequencing_runs" -java -jar tools/CuReSim.jar -m "$read_mean_size" -sd "$read_variance_size" -f "$data_directory$filename$fasta" -o "$data_directory$prefix$filename$fastq" +Rscript src/repertoire.r "$sequences" && java -jar tools/CuReSim.jar -n "$sequencing_runs" -m "$read_mean_size" -sd "$read_variance_size" -f "$data_directory$filename$fasta" -o "$data_directory$prefix$filename$fastq" rm "$data_directory/log.txt" diff --git a/src/repertoire.r b/src/repertoire.r index dcf2909..60c03c3 100644 --- a/src/repertoire.r +++ b/src/repertoire.r @@ -10,44 +10,31 @@ generate_repertoire <- function(number_of_sequences) { )) } -amplify_rows <- function(data, column, factor) { - if (column == "sequence") { - dna_string <- Biostrings::DNAStringSet(data) - reverse_complement <- Biostrings::reverseComplement(dna_string) - return(rep(reverse_complement, factor)) - } - return(rep(data, factor)) -} - save_data <- function(data) { Biostrings::writeXStringSet(data$sequence, "data/sequence.fasta") vdj_sequences <- data[-1] write.csv(vdj_sequences, "data/vdj_alignment.csv", row.names = FALSE) } -process_data <- function(repertoire, sequencing_runs) { +process_data <- function(repertoire) { columns <- c( "sequence", "v_sequence_alignment", "d_sequence_alignment", "j_sequence_alignment" ) data <- repertoire[, columns] - amplified_data <- mapply(data, names(data), - sequencing_runs, - FUN = amplify_rows - ) - save_data(amplified_data) + dna_sequence <- Biostrings::DNAStringSet(data$sequence) + data$sequence <- Biostrings::reverseComplement(dna_sequence) + save_data(data) } -parse_cli_arguments <- function(args) { - if (length(args) != 2) { - stop("usage: repertoire.r ") +parse_cli_arguments <- function() { + args <- commandArgs(trailingOnly = TRUE) + if (length(args) != 1) { + stop("usage: repertoire.r ") } - return(c(args[1], args[2])) + return(args[1]) } -args <- commandArgs(trailingOnly = TRUE) -arguments <- parse_cli_arguments(args) -number_of_sequences <- as.integer(arguments[1]) -sequencing_runs <- as.integer(arguments[2]) -repertoire <- generate_repertoire(number_of_sequences) -process_data(repertoire, sequencing_runs) \ No newline at end of file +args <- parse_cli_arguments() +repertoire <- generate_repertoire(number_of_sequences = as.integer(args[1])) +process_data(repertoire) \ No newline at end of file