diff --git a/src/alignment.r b/src/alignment.r index ea6d966..a0a2d57 100644 --- a/src/alignment.r +++ b/src/alignment.r @@ -1,12 +1,23 @@ library(Biostrings) -library(fastqcr) +library(parallel) + +construct_dataframe <- function(data) { + vdj_string_set <- lapply(data, FUN = Biostrings::DNAStringSet) + vdj_dataframe <- as.data.frame(vdj_string_set) + vdj_dataframe$hvr_region <- paste(vdj_dataframe$v_sequence, + vdj_dataframe$d_sequence, vdj_dataframe$j_sequence, + sep = "" + ) + return(vdj_dataframe) +} parse_data <- function(files) { reversed_sequences <- Biostrings::readQualityScaledDNAStringSet(files[1]) sequences <- Biostrings::reverseComplement(reversed_sequences) vdj_alignment <- read.csv(files[2]) - vdj_string_set <- lapply(vdj_alignment, FUN = Biostrings::DNAStringSet) - return(list(sequences, vdj_string_set)) + vdj_dataframe <- construct_dataframe(vdj_alignment) + return(list(sequences, vdj_dataframe)) +} } align_sequences <- function(sequences, vdj_segments) {