Save output as a FASTA file
This commit is contained in:
parent
02a561b4f6
commit
0ed071c69c
|
@ -1,8 +1,8 @@
|
|||
from argparse import ArgumentParser
|
||||
from glob import glob
|
||||
from typing import List, Union
|
||||
from typing import List
|
||||
|
||||
from pandas import DataFrame, read_html
|
||||
from pandas import DataFrame, read_html, Series
|
||||
|
||||
|
||||
def find_html_files(path) -> List:
|
||||
|
@ -10,17 +10,28 @@ def find_html_files(path) -> List:
|
|||
return file_list
|
||||
|
||||
|
||||
def extract_adapters(files) -> Union[DataFrame, None]:
|
||||
def extract_adapters(files) -> Series:
|
||||
all_adapters = DataFrame()
|
||||
for entry in files:
|
||||
tables = read_html(entry)
|
||||
adapter_sequences = tables[1].Sequence
|
||||
all_adapters = all_adapters.append(adapter_sequences)
|
||||
return all_adapters.dropna()
|
||||
processed_adapters = preprocess_dataframe(all_adapters)
|
||||
return processed_adapters
|
||||
|
||||
|
||||
def preprocess_dataframe(adapters) -> Series:
|
||||
na_free_adapters = adapters.dropna(axis=1)
|
||||
stacked_adapters = na_free_adapters.stack()
|
||||
duplicate_free_adapters = stacked_adapters.drop_duplicates()
|
||||
return duplicate_free_adapters
|
||||
|
||||
|
||||
def save_to_file(filename, adapters) -> None:
|
||||
adapters.to_csv(filename, index=False, header=False, sep="\n")
|
||||
with open(filename, "w") as f:
|
||||
for index, value in adapters.iteritems():
|
||||
fasta_entry = f">{index}\n{value}\n"
|
||||
f.write(fasta_entry)
|
||||
|
||||
|
||||
def parse_arguments():
|
||||
|
|
Loading…
Reference in New Issue