diff --git a/generation.sh b/generation.sh index fa6ae17..7586615 100755 --- a/generation.sh +++ b/generation.sh @@ -19,5 +19,5 @@ fastq=".fastq" filename="sequence" prefix="curesim_" -Rscript src/repertoire.r "$sequences" && java -jar tools/CuReSim.jar -n "$sequencing_runs" -m "$read_mean_size" -sd "$read_variance_size" -f "$data_directory$filename$fasta" -o "$data_directory$prefix$filename$fastq" +Rscript src/repertoire.r "$sequences" "$sequencing_runs" && java -jar tools/CuReSim.jar -n "$sequencing_runs" -m "$read_mean_size" -sd "$read_variance_size" -f "$data_directory$filename$fasta" -o "$data_directory$prefix$filename$fastq" rm "$data_directory/log.txt" diff --git a/src/repertoire.r b/src/repertoire.r index 60c03c3..c8107e6 100644 --- a/src/repertoire.r +++ b/src/repertoire.r @@ -10,13 +10,14 @@ generate_repertoire <- function(number_of_sequences) { )) } -save_data <- function(data) { +save_data <- function(data, reads) { Biostrings::writeXStringSet(data$sequence, "data/sequence.fasta") vdj_sequences <- data[-1] - write.csv(vdj_sequences, "data/vdj_alignment.csv", row.names = FALSE) + amplified_vdj <- vdj_sequences[rep(seq_len(nrow(vdj_sequences)), reads), ] + write.csv(amplified_vdj, "data/vdj_alignment.csv", row.names = FALSE) } -process_data <- function(repertoire) { +process_data <- function(repertoire, reads) { columns <- c( "sequence", "v_sequence_alignment", "d_sequence_alignment", "j_sequence_alignment" @@ -24,17 +25,17 @@ process_data <- function(repertoire) { data <- repertoire[, columns] dna_sequence <- Biostrings::DNAStringSet(data$sequence) data$sequence <- Biostrings::reverseComplement(dna_sequence) - save_data(data) + save_data(data, reads) } parse_cli_arguments <- function() { args <- commandArgs(trailingOnly = TRUE) - if (length(args) != 1) { - stop("usage: repertoire.r ") + if (length(args) != 2) { + stop("usage: repertoire.r ") } - return(args[1]) + return(c(args[1], args[2])) } args <- parse_cli_arguments() repertoire <- generate_repertoire(number_of_sequences = as.integer(args[1])) -process_data(repertoire) \ No newline at end of file +process_data(repertoire = repertoire, reads = as.integer(args[2])) \ No newline at end of file