Generate FASTQ files from the simulated repertoire
This commit is contained in:
parent
4adb92e901
commit
ca62737884
|
@ -11,14 +11,11 @@ fi
|
||||||
|
|
||||||
sequences=$1
|
sequences=$1
|
||||||
number_of_reads=$2
|
number_of_reads=$2
|
||||||
read_mean_size=350
|
|
||||||
read_variance_size=0.0
|
|
||||||
data_directory="data/"
|
data_directory="data/"
|
||||||
fasta=".fasta"
|
|
||||||
fastq=".fastq"
|
fastq=".fastq"
|
||||||
filename="sequence"
|
filename="sequence"
|
||||||
prefix="curesim_"
|
prefix="curesim_"
|
||||||
|
|
||||||
Rscript src/repertoire.r "$sequences" &&
|
Rscript src/repertoire.r "$sequences" "$number_of_reads" &&
|
||||||
java -jar tools/CuReSim.jar -n $((number_of_reads * sequences)) -m "$read_mean_size" -sd "$read_variance_size" -f "$data_directory$filename$fasta" -o "$data_directory$prefix$filename$fastq"
|
java -jar tools/CuReSim.jar -f "$data_directory$filename$fastq" -o "$data_directory$prefix$filename$fastq"
|
||||||
rm "$data_directory/log.txt"
|
rm "$data_directory/log.txt"
|
||||||
|
|
|
@ -55,7 +55,6 @@ get_hvr_sequences <- function(sequences, vdj_segments) {
|
||||||
df <- fetch_vj_sequences(sequences, vdj_segments)
|
df <- fetch_vj_sequences(sequences, vdj_segments)
|
||||||
v_alignment <- parallel::mcmapply(sequences, df$v_seq, FUN = align_sequence)
|
v_alignment <- parallel::mcmapply(sequences, df$v_seq, FUN = align_sequence)
|
||||||
j_alignment <- parallel::mcmapply(sequences, df$j_seq, FUN = align_sequence)
|
j_alignment <- parallel::mcmapply(sequences, df$j_seq, FUN = align_sequence)
|
||||||
print(v_alignment)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
data <- parse_data(file = "data/curesim_sequence.fastq")
|
data <- parse_data(file = "data/curesim_sequence.fastq")
|
||||||
|
|
|
@ -11,26 +11,32 @@ generate_repertoire <- function(number_of_sequences) {
|
||||||
}
|
}
|
||||||
|
|
||||||
save_data <- function(data) {
|
save_data <- function(data) {
|
||||||
Biostrings::writeXStringSet(data$sequence, "data/sequence.fasta")
|
Biostrings::writeXStringSet(data$sequence,
|
||||||
Biostrings::writeXStringSet(data$junction, "data/HVR.fasta")
|
"data/sequence.fastq",
|
||||||
|
format = "fastq"
|
||||||
|
)
|
||||||
|
Biostrings::writeXStringSet(data$junction, "data/HVR.fastq", format = "fastq")
|
||||||
}
|
}
|
||||||
|
|
||||||
process_data <- function(data) {
|
process_data <- function(data, reads) {
|
||||||
dna_sequence <- Biostrings::DNAStringSet(data$sequence)
|
dna_sequence <- Biostrings::DNAStringSet(data$sequence)
|
||||||
data$sequence <- Biostrings::reverseComplement(dna_sequence)
|
data$sequence <- Biostrings::reverseComplement(dna_sequence)
|
||||||
names(data$sequence) <- paste(rownames(data), data$v_call, data$j_call, " ")
|
names(data$sequence) <- paste(rownames(data), data$v_call, data$j_call, " ")
|
||||||
return(data)
|
data$junction <- Biostrings::DNAStringSet(data$junction)
|
||||||
|
names(data$junction) <- rownames(data)
|
||||||
|
amplified_data <- data[rep(seq_len(nrow(data)), reads), ]
|
||||||
|
return(amplified_data)
|
||||||
}
|
}
|
||||||
|
|
||||||
parse_cli_arguments <- function() {
|
parse_cli_arguments <- function() {
|
||||||
args <- commandArgs(trailingOnly = TRUE)
|
args <- commandArgs(trailingOnly = TRUE)
|
||||||
if (length(args) != 1) {
|
if (length(args) != 2) {
|
||||||
stop("usage: repertoire.r <number of sequences>")
|
stop("usage: repertoire.r <number of sequences> <number of reads>")
|
||||||
}
|
}
|
||||||
return(args[1])
|
return(args)
|
||||||
}
|
}
|
||||||
|
|
||||||
argument <- parse_cli_arguments()
|
args <- parse_cli_arguments()
|
||||||
repertoire <- generate_repertoire(number_of_sequences = as.integer(argument))
|
repertoire <- generate_repertoire(number_of_sequences = as.integer(args[1]))
|
||||||
data <- process_data(data = repertoire)
|
data <- process_data(data = repertoire, reads = args[2])
|
||||||
save_data(data)
|
save_data(data)
|
Loading…
Reference in New Issue