Generate FASTQ files from the simulated repertoire

This commit is contained in:
coolneng 2021-04-22 13:36:43 +02:00
parent 4adb92e901
commit ca62737884
Signed by: coolneng
GPG Key ID: 9893DA236405AF57
3 changed files with 18 additions and 16 deletions

View File

@ -11,14 +11,11 @@ fi
sequences=$1 sequences=$1
number_of_reads=$2 number_of_reads=$2
read_mean_size=350
read_variance_size=0.0
data_directory="data/" data_directory="data/"
fasta=".fasta"
fastq=".fastq" fastq=".fastq"
filename="sequence" filename="sequence"
prefix="curesim_" prefix="curesim_"
Rscript src/repertoire.r "$sequences" && Rscript src/repertoire.r "$sequences" "$number_of_reads" &&
java -jar tools/CuReSim.jar -n $((number_of_reads * sequences)) -m "$read_mean_size" -sd "$read_variance_size" -f "$data_directory$filename$fasta" -o "$data_directory$prefix$filename$fastq" java -jar tools/CuReSim.jar -f "$data_directory$filename$fastq" -o "$data_directory$prefix$filename$fastq"
rm "$data_directory/log.txt" rm "$data_directory/log.txt"

View File

@ -55,7 +55,6 @@ get_hvr_sequences <- function(sequences, vdj_segments) {
df <- fetch_vj_sequences(sequences, vdj_segments) df <- fetch_vj_sequences(sequences, vdj_segments)
v_alignment <- parallel::mcmapply(sequences, df$v_seq, FUN = align_sequence) v_alignment <- parallel::mcmapply(sequences, df$v_seq, FUN = align_sequence)
j_alignment <- parallel::mcmapply(sequences, df$j_seq, FUN = align_sequence) j_alignment <- parallel::mcmapply(sequences, df$j_seq, FUN = align_sequence)
print(v_alignment)
} }
data <- parse_data(file = "data/curesim_sequence.fastq") data <- parse_data(file = "data/curesim_sequence.fastq")

View File

@ -11,26 +11,32 @@ generate_repertoire <- function(number_of_sequences) {
} }
save_data <- function(data) { save_data <- function(data) {
Biostrings::writeXStringSet(data$sequence, "data/sequence.fasta") Biostrings::writeXStringSet(data$sequence,
Biostrings::writeXStringSet(data$junction, "data/HVR.fasta") "data/sequence.fastq",
format = "fastq"
)
Biostrings::writeXStringSet(data$junction, "data/HVR.fastq", format = "fastq")
} }
process_data <- function(data) { process_data <- function(data, reads) {
dna_sequence <- Biostrings::DNAStringSet(data$sequence) dna_sequence <- Biostrings::DNAStringSet(data$sequence)
data$sequence <- Biostrings::reverseComplement(dna_sequence) data$sequence <- Biostrings::reverseComplement(dna_sequence)
names(data$sequence) <- paste(rownames(data), data$v_call, data$j_call, " ") names(data$sequence) <- paste(rownames(data), data$v_call, data$j_call, " ")
return(data) data$junction <- Biostrings::DNAStringSet(data$junction)
names(data$junction) <- rownames(data)
amplified_data <- data[rep(seq_len(nrow(data)), reads), ]
return(amplified_data)
} }
parse_cli_arguments <- function() { parse_cli_arguments <- function() {
args <- commandArgs(trailingOnly = TRUE) args <- commandArgs(trailingOnly = TRUE)
if (length(args) != 1) { if (length(args) != 2) {
stop("usage: repertoire.r <number of sequences>") stop("usage: repertoire.r <number of sequences> <number of reads>")
} }
return(args[1]) return(args)
} }
argument <- parse_cli_arguments() args <- parse_cli_arguments()
repertoire <- generate_repertoire(number_of_sequences = as.integer(argument)) repertoire <- generate_repertoire(number_of_sequences = as.integer(args[1]))
data <- process_data(data = repertoire) data <- process_data(data = repertoire, reads = args[2])
save_data(data) save_data(data)