From 42aadb1e28eb0791cddfbec05cedf48271dcf640 Mon Sep 17 00:00:00 2001 From: coolneng Date: Sun, 28 Feb 2021 02:23:58 +0100 Subject: [PATCH] Add sequencing runs CLI argument --- generation.sh | 14 ++++++-------- src/repertoire.r | 35 +++++++++++++++-------------------- 2 files changed, 21 insertions(+), 28 deletions(-) diff --git a/generation.sh b/generation.sh index 10a47bc..af20f37 100755 --- a/generation.sh +++ b/generation.sh @@ -1,21 +1,19 @@ #!/bin/sh usage() { - echo "usage: generation.sh " + echo "usage: generation.sh " exit 1 } -if [ $# != 1 ]; then +if [ $# != 2 ]; then usage fi sequences=$1 +sequencing_runs=$2 data_directory="data/" +file="sequence.fastq" prefix="curesim_" -Rscript src/repertoire.r "$sequences" - -for file in "$data_directory"*.fastq; do - file_name=$(echo "$file" | cut -d / -f 2) - java -jar tools/CuReSim.jar -f "$file" -o "$data_directory$prefix$file_name" -done +Rscript src/repertoire.r "$sequences" "$sequencing_runs" +java -jar tools/CuReSim.jar -f "$data_directory$file" -o "$data_directory$prefix$file" diff --git a/src/repertoire.r b/src/repertoire.r index fda052a..0650f87 100644 --- a/src/repertoire.r +++ b/src/repertoire.r @@ -12,37 +12,32 @@ generate_repertoires <- function(number_of_sequences) { return(b_chain) } -process_chain <- function(repertoire) { +# TODO save also v_call and j_call +preprocess_data <- function(repertoire, sequencing_runs) { sequences <- as.character(repertoire$sequence) - counts <- as.integer(repertoire$counts) - reads <- Biostrings::DNAStringSet(rep(sequences, counts)) + reads <- Biostrings::DNAStringSet(rep(sequences, sequencing_runs)) names(reads) <- seq_len(length(reads)) reverse_complement <- Biostrings::reverseComplement(reads) return(reverse_complement) } -preprocess_data <- function(repertoires) { - filtered_repertoires <- lapply(repertoires, process_chain) - names(filtered_repertoires) <- names(repertoires) - return(filtered_repertoires) -} - -save_data <- function(repertoires) { - for (chain in names(repertoires)) { - file_name <- paste("data/", chain, ".fastq", sep = "") - Biostrings::writeXStringSet(repertoires[[chain]], file_name, format = "fastq") - } +save_data <- function(repertoire) { + file_name <- "data/sequence.fastq" + # TODO Change format to fasta + Biostrings::writeXStringSet(repertoire, file_name, format = "fastq") } parse_cli_arguments <- function(args) { - if (length(args) != 1) { - stop("usage: repertoire.r ") + if (length(args) != 2) { + stop("usage: repertoire.r ") } - return(as.integer(args[1])) + return(c(args[1], args[2])) } args <- commandArgs(trailingOnly = TRUE) -number_of_sequences <- parse_cli_arguments(args) -sim_repertoire <- generate_repertoires(number_of_sequences) -processed_data <- preprocess_data(sim_repertoire) +parameters <- parse_cli_arguments(args) +number_of_sequences <- as.integer(parameters[1]) +sequencing_runs <- as.integer(parameters[2]) +repertoire <- generate_repertoires(number_of_sequences) +processed_data <- preprocess_data(repertoire, sequencing_runs) save_data(processed_data) \ No newline at end of file