Filter the relevant columns from immuneSIM output
This commit is contained in:
parent
2cece5e53e
commit
b035c496f7
|
@ -6,6 +6,7 @@ mkShell {
|
||||||
buildInputs = [
|
buildInputs = [
|
||||||
R
|
R
|
||||||
rPackages.immuneSIM
|
rPackages.immuneSIM
|
||||||
|
rPackages.Biostrings
|
||||||
jdk
|
jdk
|
||||||
# Develoment tools
|
# Develoment tools
|
||||||
rPackages.languageserver
|
rPackages.languageserver
|
||||||
|
|
|
@ -1,4 +1,5 @@
|
||||||
library(immuneSIM)
|
library(immuneSIM)
|
||||||
|
library(Biostrings)
|
||||||
|
|
||||||
generate_repertoires <- function(number_of_sequences) {
|
generate_repertoires <- function(number_of_sequences) {
|
||||||
a_chain <- immuneSIM(
|
a_chain <- immuneSIM(
|
||||||
|
@ -18,6 +19,21 @@ generate_repertoires <- function(number_of_sequences) {
|
||||||
return(list("a_chain" = a_chain, "b_chain" = b_chain))
|
return(list("a_chain" = a_chain, "b_chain" = b_chain))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
process_chain <- function(repertoire) {
|
||||||
|
sequences <- as.character(repertoire$sequence)
|
||||||
|
counts <- as.integer(repertoire$counts)
|
||||||
|
reads <- Biostrings::DNAStringSet(rep(sequences, counts))
|
||||||
|
names(reads) <- seq_len(length(reads))
|
||||||
|
reverse_complement <- Biostrings::reverseComplement(reads)
|
||||||
|
return(reverse_complement)
|
||||||
|
}
|
||||||
|
|
||||||
|
preprocess_data <- function(repertoires) {
|
||||||
|
filtered_repertoires <- lapply(repertoires, process_chain)
|
||||||
|
names(filtered_repertoires) <- names(repertoires)
|
||||||
|
return(filtered_repertoires)
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
parse_cli_arguments <- function(args) {
|
parse_cli_arguments <- function(args) {
|
||||||
if (length(args) != 1) {
|
if (length(args) != 1) {
|
||||||
|
@ -29,3 +45,4 @@ parse_cli_arguments <- function(args) {
|
||||||
args <- commandArgs(trailingOnly = TRUE)
|
args <- commandArgs(trailingOnly = TRUE)
|
||||||
number_of_sequences <- parse_cli_arguments(args)
|
number_of_sequences <- parse_cli_arguments(args)
|
||||||
sim_repertoire <- generate_repertoires(number_of_sequences)
|
sim_repertoire <- generate_repertoires(number_of_sequences)
|
||||||
|
processed_data <- preprocess_data(sim_repertoire)
|
||||||
|
|
Loading…
Reference in New Issue