From c250c139dde4b9bac2c7ed3b772794d6b169cb50 Mon Sep 17 00:00:00 2001 From: coolneng Date: Tue, 27 Apr 2021 19:34:01 +0200 Subject: [PATCH] Implement cysteine location in v_alignment --- src/alignment.r | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/src/alignment.r b/src/alignment.r index 0920b0d..11f4808 100644 --- a/src/alignment.r +++ b/src/alignment.r @@ -50,15 +50,26 @@ align_sequence <- function(sequence, vdj_segment) { )) } +locate_cysteine <- function(sequence, v_segment) { + codons <- Biostrings::DNAStringSet(c("TGT", "TGC")) + matches <- Biostrings::matchPDict( + pdict = codons, + subject = DNAString(toString(v_segment)) + ) + position <- as.data.frame(intersect(matches[[1]], matches[[2]])) + return(position) +} + # TODO Extract CDR3 get_hvr_sequences <- function(sequences, vdj_segments) { df <- fetch_vj_sequences(sequences, vdj_segments) v_alignment <- parallel::mcmapply(sequences, df$v_seq, FUN = align_sequence) - j_alignment <- parallel::mcmapply(sequences, df$j_seq, FUN = align_sequence) + hvr_start <- parallel::mcmapply(sequences, v_alignment, FUN = locate_cysteine) + hvr_start_df <- as.data.frame(t(hvr_start)) } data <- parse_data(file = "data/curesim_sequence.fastq") hvr_sequences <- get_hvr_sequences( sequences = data[[1]], vdj_segments = data[[2]] -) \ No newline at end of file +)