Module monodikit.analysis.synopsis
Expand source code
import subprocess
class Synopsis:
@staticmethod
def run_mafft(input_data):
"""
Runs the MAFFT multiple sequence alignment tool on the given input data.
Args:
input_data (dict): Dictionary with sequence names as keys and sequences as values.
Returns:
list: List of dictionaries containing sequence names and aligned sequences.
"""
input_content = Synopsis.parse_mafft_input(input_data)
with open("temp.fasta", "w") as f:
f.write(input_content)
command = f"mafft --auto --anysymbol temp.fasta"
out = subprocess.run(command, shell=True, capture_output=True)
return Synopsis.parse_mafft_output(str(out.stdout))
@staticmethod
def parse_mafft_input(input_data):
"""
Parses the input data into the format expected by MAFFT.
Args:
input_data (dict): Dictionary with sequence names as keys and sequences as values.
Returns:
str: MAFFT-compatible formatted input.
"""
output = ""
for key in input_data:
output += f"> {key}\n"
output += f"{input_data[key]}\n"
return output
@staticmethod
def parse_mafft_output(mafft_output):
"""
Parses the MAFFT output into a list of aligned sequences with their names.
Args:
mafft_output (str): MAFFT output as a string.
Returns:
list: List of dictionaries containing sequence names and aligned sequences.
"""
sequences = []
blocks = mafft_output.strip().split('>')[1:]
print("BLOCKS", blocks)
for block in blocks:
lines = block.strip().split('\\n')
name = lines[0].strip().split()[0]
sequence = ''.join(line.strip() for line in lines[1:])
sequences.append({'name': name, 'sequence': sequence})
return sequences
@staticmethod
def visualize_alignments(alignments, highlight=None, offset=0):
"""
Generates an HTML table to visualize sequence alignments.
Args:
alignments (list): List of dictionaries containing sequence names and aligned sequences.
highlight (list): List of lists specifying positions to highlight for each sequence. The result object of a search can be used.
offset (int): Offset for highlighted positions.
Returns:
str: HTML formatted table for visualization.
"""
html_output = "<table border='0'>"
html_output += "<tr><th>Name</th><th>Sequence</th></tr>"
for i, alignment in enumerate(alignments):
name = alignment['name']
sequence = ""
if highlight:
highlight_offset = [n + offset for n in highlight[i]]
char_count = 0
for char in alignment['sequence']:
if char == "-":
sequence += "-"
else:
if char_count in highlight_offset:
sequence += f"<span style='color: red'>{char}</span>"
else:
sequence += char
char_count += 1
else:
sequence = ",".join(list(alignment['sequence']))
html_output += f'<tr><td>{name}</td><td style="text-align: left; font-family: Volpiano; font-size:3em; white-space: nowrap;">{sequence}</td></tr>'
html_output += "</table>"
return html_output
Classes
class Synopsis
-
Expand source code
class Synopsis: @staticmethod def run_mafft(input_data): """ Runs the MAFFT multiple sequence alignment tool on the given input data. Args: input_data (dict): Dictionary with sequence names as keys and sequences as values. Returns: list: List of dictionaries containing sequence names and aligned sequences. """ input_content = Synopsis.parse_mafft_input(input_data) with open("temp.fasta", "w") as f: f.write(input_content) command = f"mafft --auto --anysymbol temp.fasta" out = subprocess.run(command, shell=True, capture_output=True) return Synopsis.parse_mafft_output(str(out.stdout)) @staticmethod def parse_mafft_input(input_data): """ Parses the input data into the format expected by MAFFT. Args: input_data (dict): Dictionary with sequence names as keys and sequences as values. Returns: str: MAFFT-compatible formatted input. """ output = "" for key in input_data: output += f"> {key}\n" output += f"{input_data[key]}\n" return output @staticmethod def parse_mafft_output(mafft_output): """ Parses the MAFFT output into a list of aligned sequences with their names. Args: mafft_output (str): MAFFT output as a string. Returns: list: List of dictionaries containing sequence names and aligned sequences. """ sequences = [] blocks = mafft_output.strip().split('>')[1:] print("BLOCKS", blocks) for block in blocks: lines = block.strip().split('\\n') name = lines[0].strip().split()[0] sequence = ''.join(line.strip() for line in lines[1:]) sequences.append({'name': name, 'sequence': sequence}) return sequences @staticmethod def visualize_alignments(alignments, highlight=None, offset=0): """ Generates an HTML table to visualize sequence alignments. Args: alignments (list): List of dictionaries containing sequence names and aligned sequences. highlight (list): List of lists specifying positions to highlight for each sequence. The result object of a search can be used. offset (int): Offset for highlighted positions. Returns: str: HTML formatted table for visualization. """ html_output = "<table border='0'>" html_output += "<tr><th>Name</th><th>Sequence</th></tr>" for i, alignment in enumerate(alignments): name = alignment['name'] sequence = "" if highlight: highlight_offset = [n + offset for n in highlight[i]] char_count = 0 for char in alignment['sequence']: if char == "-": sequence += "-" else: if char_count in highlight_offset: sequence += f"<span style='color: red'>{char}</span>" else: sequence += char char_count += 1 else: sequence = ",".join(list(alignment['sequence'])) html_output += f'<tr><td>{name}</td><td style="text-align: left; font-family: Volpiano; font-size:3em; white-space: nowrap;">{sequence}</td></tr>' html_output += "</table>" return html_output
Static methods
def parse_mafft_input(input_data)
-
Parses the input data into the format expected by MAFFT.
Args
input_data
:dict
- Dictionary with sequence names as keys and sequences as values.
Returns
str
- MAFFT-compatible formatted input.
Expand source code
@staticmethod def parse_mafft_input(input_data): """ Parses the input data into the format expected by MAFFT. Args: input_data (dict): Dictionary with sequence names as keys and sequences as values. Returns: str: MAFFT-compatible formatted input. """ output = "" for key in input_data: output += f"> {key}\n" output += f"{input_data[key]}\n" return output
def parse_mafft_output(mafft_output)
-
Parses the MAFFT output into a list of aligned sequences with their names.
Args
mafft_output
:str
- MAFFT output as a string.
Returns
list
- List of dictionaries containing sequence names and aligned sequences.
Expand source code
@staticmethod def parse_mafft_output(mafft_output): """ Parses the MAFFT output into a list of aligned sequences with their names. Args: mafft_output (str): MAFFT output as a string. Returns: list: List of dictionaries containing sequence names and aligned sequences. """ sequences = [] blocks = mafft_output.strip().split('>')[1:] print("BLOCKS", blocks) for block in blocks: lines = block.strip().split('\\n') name = lines[0].strip().split()[0] sequence = ''.join(line.strip() for line in lines[1:]) sequences.append({'name': name, 'sequence': sequence}) return sequences
def run_mafft(input_data)
-
Runs the MAFFT multiple sequence alignment tool on the given input data.
Args
input_data
:dict
- Dictionary with sequence names as keys and sequences as values.
Returns
list
- List of dictionaries containing sequence names and aligned sequences.
Expand source code
@staticmethod def run_mafft(input_data): """ Runs the MAFFT multiple sequence alignment tool on the given input data. Args: input_data (dict): Dictionary with sequence names as keys and sequences as values. Returns: list: List of dictionaries containing sequence names and aligned sequences. """ input_content = Synopsis.parse_mafft_input(input_data) with open("temp.fasta", "w") as f: f.write(input_content) command = f"mafft --auto --anysymbol temp.fasta" out = subprocess.run(command, shell=True, capture_output=True) return Synopsis.parse_mafft_output(str(out.stdout))
def visualize_alignments(alignments, highlight=None, offset=0)
-
Generates an HTML table to visualize sequence alignments.
Args
alignments
:list
- List of dictionaries containing sequence names and aligned sequences.
highlight
:list
- List of lists specifying positions to highlight for each sequence. The result object of a search can be used.
offset
:int
- Offset for highlighted positions.
Returns
str
- HTML formatted table for visualization.
Expand source code
@staticmethod def visualize_alignments(alignments, highlight=None, offset=0): """ Generates an HTML table to visualize sequence alignments. Args: alignments (list): List of dictionaries containing sequence names and aligned sequences. highlight (list): List of lists specifying positions to highlight for each sequence. The result object of a search can be used. offset (int): Offset for highlighted positions. Returns: str: HTML formatted table for visualization. """ html_output = "<table border='0'>" html_output += "<tr><th>Name</th><th>Sequence</th></tr>" for i, alignment in enumerate(alignments): name = alignment['name'] sequence = "" if highlight: highlight_offset = [n + offset for n in highlight[i]] char_count = 0 for char in alignment['sequence']: if char == "-": sequence += "-" else: if char_count in highlight_offset: sequence += f"<span style='color: red'>{char}</span>" else: sequence += char char_count += 1 else: sequence = ",".join(list(alignment['sequence'])) html_output += f'<tr><td>{name}</td><td style="text-align: left; font-family: Volpiano; font-size:3em; white-space: nowrap;">{sequence}</td></tr>' html_output += "</table>" return html_output