From 81a57657fecfa3adc55bdcf183f8914225a1614e Mon Sep 17 00:00:00 2001 From: coolneng Date: Mon, 3 May 2021 21:51:32 +0200 Subject: [PATCH] Fix HVR end position computation --- src/alignment.r | 35 ++++++++++++++++++----------------- 1 file changed, 18 insertions(+), 17 deletions(-) diff --git a/src/alignment.r b/src/alignment.r index 83d8eeb..5d9adcc 100644 --- a/src/alignment.r +++ b/src/alignment.r @@ -70,31 +70,32 @@ get_cys_coordinates <- function(alignment) { return(list("start" = cys_start, "end" = cys_end)) } -# TODO Refactor this mess -get_hvr_sequences <- function(sequences, vdj_segments) { +get_hvr_sequences <- function(sequences, vdj_segments, cores = detectCores()) { df <- fetch_vj_sequences(sequences, vdj_segments) - v_alignment <- parallel::mcmapply(sequences, df$v_seq, FUN = align_sequence) - j_alignment <- parallel::mcmapply(sequences, df$j_seq, FUN = align_sequence) + v_alignment <- parallel::mcmapply(sequences, + df$v_seq, + FUN = align_sequence, + mc.cores = cores + ) cys_coordinates <- parallel::mclapply(v_alignment, FUN = get_cys_coordinates) cys_df <- as.data.frame(do.call(rbind, cys_coordinates)) + remaining <- Biostrings::subseq(sequences, start = unlist(cys_df$end)) + j_alignment <- parallel::mcmapply(remaining, + df$j_seq, + FUN = align_sequence, + mc.cores = cores + ) j_start <- parallel::mclapply( j_alignment, - function(x) start(Biostrings::Views(x)) - ) - hvr <- Biostrings::subseq(sequences, - start = unlist(cys_df$start), - end = unlist(j_start) + 2 + function(x) start(Biostrings::Views(x)), + mc.cores = cores ) + hvr_start <- unlist(cys_df$start) + hvr_end <- unlist(cys_df$start) + unlist(j_start) + 2 + hvr <- Biostrings::subseq(sequences, start = hvr_start, end = hvr_end) return(hvr) } -save_data <- function(data) { - Biostrings::writeXStringSet(data, "data/CuReSim-HVR.fastq", format = "fastq") -} - data <- parse_data(file = "data/curesim_sequence.fastq") -hvr <- get_hvr_sequences( - sequences = data[[1]], - vdj_segments = data[[2]] -) +hvr <- get_hvr_sequences(sequences = data[[1]], vdj_segments = data[[2]]) Biostrings::writeXStringSet(hvr, "data/CuReSim-HVR.fastq", format = "fastq") \ No newline at end of file