Implement HVR sequence alignment

This commit is contained in:
coolneng 2021-03-27 09:36:59 +01:00
parent 3a10380d8c
commit 13f453718d
Signed by: coolneng
GPG Key ID: 9893DA236405AF57
1 changed files with 20 additions and 4 deletions

View File

@ -5,7 +5,8 @@ construct_dataframe <- function(data) {
vdj_string_set <- lapply(data, FUN = Biostrings::DNAStringSet) vdj_string_set <- lapply(data, FUN = Biostrings::DNAStringSet)
vdj_dataframe <- as.data.frame(vdj_string_set) vdj_dataframe <- as.data.frame(vdj_string_set)
vdj_dataframe$hvr_region <- paste(vdj_dataframe$v_sequence, vdj_dataframe$hvr_region <- paste(vdj_dataframe$v_sequence,
vdj_dataframe$d_sequence, vdj_dataframe$j_sequence, vdj_dataframe$d_sequence,
vdj_dataframe$j_sequence,
sep = "" sep = ""
) )
return(vdj_dataframe) return(vdj_dataframe)
@ -18,11 +19,26 @@ parse_data <- function(files) {
vdj_dataframe <- construct_dataframe(vdj_alignment) vdj_dataframe <- construct_dataframe(vdj_alignment)
return(list(sequences, vdj_dataframe)) return(list(sequences, vdj_dataframe))
} }
align_sequence <- function(sequence, vdj_segment) {
return(Biostrings::pairwiseAlignment(
pattern = sequence,
subject = vdj_segment,
type = "global-local",
gapOpening = 1
))
} }
align_sequences <- function(sequences, vdj_segments) { perform_alignment <- function(sequences, vdj_segments) {
sequence_alignment <- mcmapply(sequences,
vdj_segments$hvr_region,
FUN = align_sequence,
mc.cores = 4
)
return(sequence_alignment)
} }
input_files <- c("data/curesim_sequence.fastq", "data/vdj_alignment.csv") input_files <- c("data/curesim_sequence.fastq", "data/vdj_alignment.csv")
data <- parse_data(input_files) data <- parse_data(input_files)
alignment <- perform_alignment(sequences = data[[1]], vdj_segments = data[[2]])
print(alignment)