Save output as a FASTA file

This commit is contained in:
coolneng 2021-11-13 17:44:20 +01:00
parent 02a561b4f6
commit 0ed071c69c
Signed by: coolneng
GPG Key ID: 9893DA236405AF57
2 changed files with 16 additions and 5 deletions

View File

View File

@ -1,8 +1,8 @@
from argparse import ArgumentParser
from glob import glob
from typing import List, Union
from typing import List
from pandas import DataFrame, read_html
from pandas import DataFrame, read_html, Series
def find_html_files(path) -> List:
@ -10,17 +10,28 @@ def find_html_files(path) -> List:
return file_list
def extract_adapters(files) -> Union[DataFrame, None]:
def extract_adapters(files) -> Series:
all_adapters = DataFrame()
for entry in files:
tables = read_html(entry)
adapter_sequences = tables[1].Sequence
all_adapters = all_adapters.append(adapter_sequences)
return all_adapters.dropna()
processed_adapters = preprocess_dataframe(all_adapters)
return processed_adapters
def preprocess_dataframe(adapters) -> Series:
na_free_adapters = adapters.dropna(axis=1)
stacked_adapters = na_free_adapters.stack()
duplicate_free_adapters = stacked_adapters.drop_duplicates()
return duplicate_free_adapters
def save_to_file(filename, adapters) -> None:
adapters.to_csv(filename, index=False, header=False, sep="\n")
with open(filename, "w") as f:
for index, value in adapters.iteritems():
fasta_entry = f">{index}\n{value}\n"
f.write(fasta_entry)
def parse_arguments():