Document the functions
This commit is contained in:
parent
7ed975c7ce
commit
93a403182b
18
parser.py
18
parser.py
|
@ -7,11 +7,17 @@ from pandas import DataFrame, read_html, Series
|
|||
|
||||
|
||||
def remove_parenthesis(identifier):
|
||||
"""
|
||||
Remove parenthesis from the sequence identifier
|
||||
"""
|
||||
sequence_str = "".join(map(str, identifier))
|
||||
return sub(r"[()]", "", sequence_str)
|
||||
|
||||
|
||||
def extract_adapters(files) -> Tuple[Series, List]:
|
||||
"""
|
||||
Extract the adapters sequences and statistics from the files
|
||||
"""
|
||||
all_adapters = DataFrame()
|
||||
for entry in files:
|
||||
tables = read_html(entry)
|
||||
|
@ -27,6 +33,9 @@ def extract_adapters(files) -> Tuple[Series, List]:
|
|||
|
||||
|
||||
def preprocess_dataframe(adapters) -> Series:
|
||||
"""
|
||||
Remove empty sequences and duplicates
|
||||
"""
|
||||
na_free_adapters = adapters.dropna(axis=1)
|
||||
stacked_adapters = na_free_adapters.stack()
|
||||
duplicate_free_adapters = stacked_adapters.drop_duplicates()
|
||||
|
@ -34,6 +43,9 @@ def preprocess_dataframe(adapters) -> Series:
|
|||
|
||||
|
||||
def save_to_file(filename, adapters) -> None:
|
||||
"""
|
||||
Save the adapter sequences as a FASTA file
|
||||
"""
|
||||
with open(filename, "w") as f:
|
||||
for index, value in adapters.iteritems():
|
||||
sequence_id = remove_parenthesis(index)
|
||||
|
@ -42,6 +54,9 @@ def save_to_file(filename, adapters) -> None:
|
|||
|
||||
|
||||
def parse_arguments():
|
||||
"""
|
||||
Parse the command-line arguments
|
||||
"""
|
||||
parser = ArgumentParser()
|
||||
parser.add_argument("input", help="directory containing the fastqc reports")
|
||||
parser.add_argument("output", help="file where to export the sequences")
|
||||
|
@ -49,6 +64,9 @@ def parse_arguments():
|
|||
|
||||
|
||||
def main():
|
||||
"""
|
||||
Extract the adapters from FASTQC reports to a FASTA file and show sequence length statistics
|
||||
"""
|
||||
args = parse_arguments()
|
||||
file_list = glob(args.input + "/*fastqc.html")
|
||||
adapters, stats = extract_adapters(file_list)
|
||||
|
|
Loading…
Reference in New Issue