Compare commits

...

2 Commits

Author SHA1 Message Date
coolneng 7ed975c7ce
Refactor regex parenthesis removal into a function 2021-12-22 18:36:32 +01:00
coolneng fa23b1a950
Remove find_html_files function 2021-12-22 18:36:13 +01:00
1 changed files with 5 additions and 6 deletions

View File

@ -6,9 +6,9 @@ from re import sub
from pandas import DataFrame, read_html, Series from pandas import DataFrame, read_html, Series
def find_html_files(path) -> List: def remove_parenthesis(identifier):
file_list = glob(path + "/*fastqc.html") sequence_str = "".join(map(str, identifier))
return file_list return sub(r"[()]", "", sequence_str)
def extract_adapters(files) -> Tuple[Series, List]: def extract_adapters(files) -> Tuple[Series, List]:
@ -36,8 +36,7 @@ def preprocess_dataframe(adapters) -> Series:
def save_to_file(filename, adapters) -> None: def save_to_file(filename, adapters) -> None:
with open(filename, "w") as f: with open(filename, "w") as f:
for index, value in adapters.iteritems(): for index, value in adapters.iteritems():
sequence_str = "".join(map(str, index)) sequence_id = remove_parenthesis(index)
sequence_id = sub(r"[()]", "", sequence_str)
fasta_entry = f">{sequence_id}\n{value}\n" fasta_entry = f">{sequence_id}\n{value}\n"
f.write(fasta_entry) f.write(fasta_entry)
@ -51,7 +50,7 @@ def parse_arguments():
def main(): def main():
args = parse_arguments() args = parse_arguments()
file_list = find_html_files(args.input) file_list = glob(args.input + "/*fastqc.html")
adapters, stats = extract_adapters(file_list) adapters, stats = extract_adapters(file_list)
save_to_file(args.output, adapters) save_to_file(args.output, adapters)
print( print(