locigenesis/src/repertoire.r

43 lines
1.3 KiB
R
Raw Normal View History

library(immuneSIM)
library(Biostrings)
2021-02-25 20:00:35 +01:00
generate_repertoires <- function(number_of_sequences) {
b_chain <- immuneSIM(
number_of_seqs = number_of_sequences,
species = "hs",
receptor = "tr",
chain = "b",
verbose = TRUE
)
2021-02-28 02:23:28 +01:00
return(b_chain)
2021-02-25 20:00:35 +01:00
}
2021-02-28 02:23:58 +01:00
# TODO save also v_call and j_call
preprocess_data <- function(repertoire, sequencing_runs) {
sequences <- as.character(repertoire$sequence)
2021-02-28 02:23:58 +01:00
reads <- Biostrings::DNAStringSet(rep(sequences, sequencing_runs))
names(reads) <- seq_len(length(reads))
reverse_complement <- Biostrings::reverseComplement(reads)
return(reverse_complement)
}
2021-02-28 02:23:58 +01:00
save_data <- function(repertoire) {
file_name <- "data/sequence.fastq"
# TODO Change format to fasta
Biostrings::writeXStringSet(repertoire, file_name, format = "fastq")
2021-02-26 02:20:11 +01:00
}
parse_cli_arguments <- function(args) {
2021-02-28 02:23:58 +01:00
if (length(args) != 2) {
stop("usage: repertoire.r <number of sequences> <sequencing_runs>")
}
2021-02-28 02:23:58 +01:00
return(c(args[1], args[2]))
}
args <- commandArgs(trailingOnly = TRUE)
2021-02-28 02:23:58 +01:00
parameters <- parse_cli_arguments(args)
number_of_sequences <- as.integer(parameters[1])
sequencing_runs <- as.integer(parameters[2])
repertoire <- generate_repertoires(number_of_sequences)
processed_data <- preprocess_data(repertoire, sequencing_runs)
2021-02-26 02:20:11 +01:00
save_data(processed_data)