diff --git a/src/alignment.r b/src/alignment.r index a0a2d57..596c06a 100644 --- a/src/alignment.r +++ b/src/alignment.r @@ -5,7 +5,8 @@ construct_dataframe <- function(data) { vdj_string_set <- lapply(data, FUN = Biostrings::DNAStringSet) vdj_dataframe <- as.data.frame(vdj_string_set) vdj_dataframe$hvr_region <- paste(vdj_dataframe$v_sequence, - vdj_dataframe$d_sequence, vdj_dataframe$j_sequence, + vdj_dataframe$d_sequence, + vdj_dataframe$j_sequence, sep = "" ) return(vdj_dataframe) @@ -18,11 +19,26 @@ parse_data <- function(files) { vdj_dataframe <- construct_dataframe(vdj_alignment) return(list(sequences, vdj_dataframe)) } + +align_sequence <- function(sequence, vdj_segment) { + return(Biostrings::pairwiseAlignment( + pattern = sequence, + subject = vdj_segment, + type = "global-local", + gapOpening = 1 + )) } -align_sequences <- function(sequences, vdj_segments) { - +perform_alignment <- function(sequences, vdj_segments) { + sequence_alignment <- mcmapply(sequences, + vdj_segments$hvr_region, + FUN = align_sequence, + mc.cores = 4 + ) + return(sequence_alignment) } input_files <- c("data/curesim_sequence.fastq", "data/vdj_alignment.csv") -data <- parse_data(input_files) \ No newline at end of file +data <- parse_data(input_files) +alignment <- perform_alignment(sequences = data[[1]], vdj_segments = data[[2]]) +print(alignment) \ No newline at end of file