Compare commits

...

2 Commits

Author SHA1 Message Date
coolneng 83819b296b
Save vj_sequences in a dataframe 2021-04-22 01:18:25 +02:00
coolneng a7c1df5ce2
Refactor get_vj_sequence function 2021-04-22 01:17:35 +02:00
1 changed files with 19 additions and 19 deletions

View File

@ -11,6 +11,13 @@ parse_data <- function(file) {
return(list(sequences, vj_segments)) return(list(sequences, vj_segments))
} }
parse_metadata <- function(metadata) {
id_elements <- unlist(strsplit(metadata, split = " "))
v_identifier <- id_elements[2]
j_identifier <- id_elements[3]
return(list(v_id = v_identifier, j_id = j_identifier))
}
match_id_sequence <- function(names, vdj_segments, id) { match_id_sequence <- function(names, vdj_segments, id) {
matches <- grep(names, pattern = id) matches <- grep(names, pattern = id)
row <- matches[1] row <- matches[1]
@ -18,22 +25,20 @@ match_id_sequence <- function(names, vdj_segments, id) {
} }
get_vj_sequence <- function(metadata, names, vdj_segments) { get_vj_sequence <- function(metadata, names, vdj_segments) {
id_elements <- unlist(strsplit(metadata, split = " ")) identifiers <- parse_metadata(metadata)
v_identifier <- id_elements[2] v_sequence <- match_id_sequence(names, vdj_segments, id = identifiers["v_id"])
j_identifier <- id_elements[3] j_sequence <- match_id_sequence(names, vdj_segments, id = identifiers["j_id"])
v_sequence <- match_id_sequence(names, vdj_segments, id = v_identifier) return(list(v_seq = v_sequence, j_seq = j_sequence))
j_sequence <- match_id_sequence(names, vdj_segments, id = j_identifier)
return(c(v_sequence, j_sequence))
} }
fetch_vj_sequences <- function(sequences, vdj_segments) { fetch_vj_sequences <- function(sequences, vdj_segments) {
vj_sequences <- mclapply(names(sequences), vj_sequences <- sapply(names(sequences),
names(vdj_segments), names(vdj_segments),
vdj_segments, vdj_segments,
FUN = get_vj_sequence, FUN = get_vj_sequence
mc.cores = detectCores()
) )
return(c(vj_sequences[1], vj_sequences[2])) results <- data.frame(t(vj_sequences))
return(results)
} }
align_sequence <- function(sequence, vdj_segment) { align_sequence <- function(sequence, vdj_segment) {
@ -47,15 +52,10 @@ align_sequence <- function(sequence, vdj_segment) {
# TODO Extract CDR3 # TODO Extract CDR3
get_hvr_sequences <- function(sequences, vdj_segments) { get_hvr_sequences <- function(sequences, vdj_segments) {
vj_sequences <- fetch_vj_sequences(sequences, vdj_segments) df <- fetch_vj_sequences(sequences, vdj_segments)
v_alignment <- parallel::mcmapply(sequences, v_alignment <- parallel::mcmapply(sequences, df$v_seq, FUN = align_sequence)
vj_sequences[1], j_alignment <- parallel::mcmapply(sequences, df$j_seq, FUN = align_sequence)
FUN = align_sequence print(v_alignment)
)
j_alignment <- parallel::mcmapply(sequences,
vj_sequences[2],
FUN = align_sequence
)
} }
data <- parse_data(file = "data/curesim_sequence.fastq") data <- parse_data(file = "data/curesim_sequence.fastq")