Construct a dataframe containing the HVR region

This commit is contained in:
coolneng 2021-03-25 21:53:49 +01:00
parent 8f5b9ee698
commit 3a10380d8c
Signed by: coolneng
GPG Key ID: 9893DA236405AF57
1 changed files with 14 additions and 3 deletions

View File

@ -1,12 +1,23 @@
library(Biostrings) library(Biostrings)
library(fastqcr) library(parallel)
construct_dataframe <- function(data) {
vdj_string_set <- lapply(data, FUN = Biostrings::DNAStringSet)
vdj_dataframe <- as.data.frame(vdj_string_set)
vdj_dataframe$hvr_region <- paste(vdj_dataframe$v_sequence,
vdj_dataframe$d_sequence, vdj_dataframe$j_sequence,
sep = ""
)
return(vdj_dataframe)
}
parse_data <- function(files) { parse_data <- function(files) {
reversed_sequences <- Biostrings::readQualityScaledDNAStringSet(files[1]) reversed_sequences <- Biostrings::readQualityScaledDNAStringSet(files[1])
sequences <- Biostrings::reverseComplement(reversed_sequences) sequences <- Biostrings::reverseComplement(reversed_sequences)
vdj_alignment <- read.csv(files[2]) vdj_alignment <- read.csv(files[2])
vdj_string_set <- lapply(vdj_alignment, FUN = Biostrings::DNAStringSet) vdj_dataframe <- construct_dataframe(vdj_alignment)
return(list(sequences, vdj_string_set)) return(list(sequences, vdj_dataframe))
}
} }
align_sequences <- function(sequences, vdj_segments) { align_sequences <- function(sequences, vdj_segments) {