From 3a10380d8c461f4bf38655e2008bf55aa648da1a Mon Sep 17 00:00:00 2001 From: coolneng Date: Thu, 25 Mar 2021 21:53:49 +0100 Subject: [PATCH] Construct a dataframe containing the HVR region --- src/alignment.r | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/src/alignment.r b/src/alignment.r index ea6d966..a0a2d57 100644 --- a/src/alignment.r +++ b/src/alignment.r @@ -1,12 +1,23 @@ library(Biostrings) -library(fastqcr) +library(parallel) + +construct_dataframe <- function(data) { + vdj_string_set <- lapply(data, FUN = Biostrings::DNAStringSet) + vdj_dataframe <- as.data.frame(vdj_string_set) + vdj_dataframe$hvr_region <- paste(vdj_dataframe$v_sequence, + vdj_dataframe$d_sequence, vdj_dataframe$j_sequence, + sep = "" + ) + return(vdj_dataframe) +} parse_data <- function(files) { reversed_sequences <- Biostrings::readQualityScaledDNAStringSet(files[1]) sequences <- Biostrings::reverseComplement(reversed_sequences) vdj_alignment <- read.csv(files[2]) - vdj_string_set <- lapply(vdj_alignment, FUN = Biostrings::DNAStringSet) - return(list(sequences, vdj_string_set)) + vdj_dataframe <- construct_dataframe(vdj_alignment) + return(list(sequences, vdj_dataframe)) +} } align_sequences <- function(sequences, vdj_segments) {