Amplify VDJ sequences to simplify parsing

This commit is contained in:
coolneng 2021-03-29 22:57:36 +02:00
parent 576597cb04
commit f81e4af94e
Signed by: coolneng
GPG Key ID: 9893DA236405AF57
2 changed files with 10 additions and 9 deletions

View File

@ -19,5 +19,5 @@ fastq=".fastq"
filename="sequence"
prefix="curesim_"
Rscript src/repertoire.r "$sequences" && java -jar tools/CuReSim.jar -n "$sequencing_runs" -m "$read_mean_size" -sd "$read_variance_size" -f "$data_directory$filename$fasta" -o "$data_directory$prefix$filename$fastq"
Rscript src/repertoire.r "$sequences" "$sequencing_runs" && java -jar tools/CuReSim.jar -n "$sequencing_runs" -m "$read_mean_size" -sd "$read_variance_size" -f "$data_directory$filename$fasta" -o "$data_directory$prefix$filename$fastq"
rm "$data_directory/log.txt"

View File

@ -10,13 +10,14 @@ generate_repertoire <- function(number_of_sequences) {
))
}
save_data <- function(data) {
save_data <- function(data, reads) {
Biostrings::writeXStringSet(data$sequence, "data/sequence.fasta")
vdj_sequences <- data[-1]
write.csv(vdj_sequences, "data/vdj_alignment.csv", row.names = FALSE)
amplified_vdj <- vdj_sequences[rep(seq_len(nrow(vdj_sequences)), reads), ]
write.csv(amplified_vdj, "data/vdj_alignment.csv", row.names = FALSE)
}
process_data <- function(repertoire) {
process_data <- function(repertoire, reads) {
columns <- c(
"sequence", "v_sequence_alignment",
"d_sequence_alignment", "j_sequence_alignment"
@ -24,17 +25,17 @@ process_data <- function(repertoire) {
data <- repertoire[, columns]
dna_sequence <- Biostrings::DNAStringSet(data$sequence)
data$sequence <- Biostrings::reverseComplement(dna_sequence)
save_data(data)
save_data(data, reads)
}
parse_cli_arguments <- function() {
args <- commandArgs(trailingOnly = TRUE)
if (length(args) != 1) {
stop("usage: repertoire.r <number of sequences>")
if (length(args) != 2) {
stop("usage: repertoire.r <number of sequences> <sequencing runs>")
}
return(args[1])
return(c(args[1], args[2]))
}
args <- parse_cli_arguments()
repertoire <- generate_repertoire(number_of_sequences = as.integer(args[1]))
process_data(repertoire)
process_data(repertoire = repertoire, reads = as.integer(args[2]))