diff --git a/parser.py b/parser.py index ea46e90..447e43a 100644 --- a/parser.py +++ b/parser.py @@ -6,6 +6,9 @@ from re import sub from pandas import DataFrame, read_html, Series +def remove_parenthesis(identifier): + sequence_str = "".join(map(str, identifier)) + return sub(r"[()]", "", sequence_str) def extract_adapters(files) -> Tuple[Series, List]: @@ -33,8 +36,7 @@ def preprocess_dataframe(adapters) -> Series: def save_to_file(filename, adapters) -> None: with open(filename, "w") as f: for index, value in adapters.iteritems(): - sequence_str = "".join(map(str, index)) - sequence_id = sub(r"[()]", "", sequence_str) + sequence_id = remove_parenthesis(index) fasta_entry = f">{sequence_id}\n{value}\n" f.write(fasta_entry)