From 7ed975c7ce5688901a60667f34e42d4b60dde738 Mon Sep 17 00:00:00 2001 From: coolneng Date: Wed, 22 Dec 2021 18:36:32 +0100 Subject: [PATCH] Refactor regex parenthesis removal into a function --- parser.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/parser.py b/parser.py index ea46e90..447e43a 100644 --- a/parser.py +++ b/parser.py @@ -6,6 +6,9 @@ from re import sub from pandas import DataFrame, read_html, Series +def remove_parenthesis(identifier): + sequence_str = "".join(map(str, identifier)) + return sub(r"[()]", "", sequence_str) def extract_adapters(files) -> Tuple[Series, List]: @@ -33,8 +36,7 @@ def preprocess_dataframe(adapters) -> Series: def save_to_file(filename, adapters) -> None: with open(filename, "w") as f: for index, value in adapters.iteritems(): - sequence_str = "".join(map(str, index)) - sequence_id = sub(r"[()]", "", sequence_str) + sequence_id = remove_parenthesis(index) fasta_entry = f">{sequence_id}\n{value}\n" f.write(fasta_entry)