from glob import glob from typing import List from pandas import DataFrame, read_html def find_html_files(path) -> List: file_list = glob(path + "/*fastqc.html") return file_list def extract_adapters(files) -> DataFrame: all_adapters = DataFrame() for entry in files: tables = read_html(entry) adapter_sequences = tables[1].Sequence all_adapters = all_adapters.append(adapter_sequences) return all_adapters.dropna() def save_to_file(filename, adapters) -> None: adapters.to_csv(filename, index=False, header=False, sep="\n") def main(): file_list = find_html_files("data") adapters = extract_adapters(file_list) save_to_file("placeholder.txt", adapters) if __name__ == "__main__": main()