Filter the relevant columns from immuneSIM output

This commit is contained in:
coolneng 2021-02-26 02:19:40 +01:00
parent 2cece5e53e
commit b035c496f7
Signed by: coolneng
GPG Key ID: 9893DA236405AF57
2 changed files with 18 additions and 0 deletions

View File

@ -6,6 +6,7 @@ mkShell {
buildInputs = [ buildInputs = [
R R
rPackages.immuneSIM rPackages.immuneSIM
rPackages.Biostrings
jdk jdk
# Develoment tools # Develoment tools
rPackages.languageserver rPackages.languageserver

View File

@ -1,4 +1,5 @@
library(immuneSIM) library(immuneSIM)
library(Biostrings)
generate_repertoires <- function(number_of_sequences) { generate_repertoires <- function(number_of_sequences) {
a_chain <- immuneSIM( a_chain <- immuneSIM(
@ -18,6 +19,21 @@ generate_repertoires <- function(number_of_sequences) {
return(list("a_chain" = a_chain, "b_chain" = b_chain)) return(list("a_chain" = a_chain, "b_chain" = b_chain))
} }
process_chain <- function(repertoire) {
sequences <- as.character(repertoire$sequence)
counts <- as.integer(repertoire$counts)
reads <- Biostrings::DNAStringSet(rep(sequences, counts))
names(reads) <- seq_len(length(reads))
reverse_complement <- Biostrings::reverseComplement(reads)
return(reverse_complement)
}
preprocess_data <- function(repertoires) {
filtered_repertoires <- lapply(repertoires, process_chain)
names(filtered_repertoires) <- names(repertoires)
return(filtered_repertoires)
}
parse_cli_arguments <- function(args) { parse_cli_arguments <- function(args) {
if (length(args) != 1) { if (length(args) != 1) {
@ -29,3 +45,4 @@ parse_cli_arguments <- function(args) {
args <- commandArgs(trailingOnly = TRUE) args <- commandArgs(trailingOnly = TRUE)
number_of_sequences <- parse_cli_arguments(args) number_of_sequences <- parse_cli_arguments(args)
sim_repertoire <- generate_repertoires(number_of_sequences) sim_repertoire <- generate_repertoires(number_of_sequences)
processed_data <- preprocess_data(sim_repertoire)