Module monodikit.analysis.search

Expand source code
from .utility import Utility
class Segment:
    """  Represents a segment found in a Search.  """
    def __init__(self, indices, window, window_offset):
        self.indices = indices
        self.window = window
        self.window_offset = window_offset

class SearchResult:
    """ Represents a search result with information of chant and one or multiple found segments. """
    def __init__(self, chant, segments):
        self.chant = chant
        self.segments = segments

class Search:
    def __init__(self):
        pass

    @staticmethod
    def search_in_window(corpus, pattern, window, preprocess=None):
        """
        Searches for a pattern within a window in a given corpus.

        Args:
            corpus (Corpus): The corpus containing the documents to search.
            pattern (str): The pattern to search for.
            window (int): The window length for the search.
            preprocess (str): The window length for the search.

        Returns:
            list: List of search results containing segments and corresponding chant information.
        """
        search_result = []
        if preprocess == "intervals":
            data = [(document, Utility.compute_intervals(document.flat_neume_components)) for document in corpus.documents]
            for document in data:

                data_str = [f"{intv}" for intv in document[1]]

                segments = Search.search_pattern_in_window(data_str, pattern, window)
                if len(segments):
                    result_object = SearchResult(document[0], segments)
                    search_result.append(result_object)
        else:
            data = [(document, document.flat_neume_components) for document in corpus.documents]
            for document in data:

                data_str = [f"{note.base}{note.octave}" for note in document[1]]
                segments = Search.search_pattern_in_window(data_str, pattern, window)
                if len(segments):
                    result_object = SearchResult(document[0], segments)
                    search_result.append(result_object)
        return search_result

    @staticmethod
    def search_pattern_in_window(text, pattern, window_length):
        """
        Searches for a pattern within a window of text.

        Args:
            text (str): The text to search within.
            pattern (str): The pattern to search for.
            window_length (int): The window length for the search.

        Returns:
            list: List of segments containing window offset, indices, and the window.
        """
        text_length = len(text)
        segments = []
        for i in range(text_length - window_length + 1):
            window = text[i:i + window_length]
            window_indices = Search.is_pattern_in_window(window, pattern)
            if window_indices:
                segment = Segment(window_indices, window, i)
                segments.append(segment)
        return segments

    @staticmethod
    def is_pattern_in_window(window, pattern):
        """
        Checks if a pattern is present in the given window.

        Args:
            window (str): The window to search within.
            pattern (str): The pattern to search for.

        Returns:
            list: List of indices where the pattern is found in the window.
        """
        indices = []
        window_index = 0
        pattern_index = 0
        while window_index < len(window) and pattern_index < len(pattern):
            if window[window_index] == pattern[pattern_index]:
                indices.append(window_index)
                pattern_index += 1
            window_index += 1
        if pattern_index == len(pattern):
            return indices
        else:
            return None

    @staticmethod
    def visualize_html(results):
        """
        Generates an HTML representation of search results.

        Args:
            results (list): List of search results.

        Returns:
            str: HTML formatted representation of the search results.
        """
        html = "<html><head><style>.notation {font-family: Volpiano; font-size:2em;white-space:nowrap}</style></head><body>"
        html += (f"<h2>Results</h2>"
                f"<table>")

        for result in results:
            notation = ""
            notation += '<td class="notation">'
            close_span = False
            for index, pitch in enumerate(result.chant.volpiano):
                for segment in result.segments:
                    if close_span:
                        notation += "</span>"
                        close_span = False
                    if segment.window_offset + len(segment.window)-1 == index:
                        notation += '</td><td class="notation">'
                    if segment.window_offset == index:
                        notation += '</td><td class="notation">'

                    for id in segment.indices:
                        if (id + segment.window_offset) == index:
                            notation += '<span style="color: red">'
                            close_span = True
                notation += pitch
                notation += "-"
            notation += "</td>"
            html += (
                f'<tr><td>{result.chant.meta.initial_text} – {result.chant.meta.document_id} - {result.chant.meta.genre} </td>'
                f'{notation}</tr>')
        html += "</table></body></html>"
        return html

    @staticmethod
    def to_mafft_input(results, context=0):
        """
        Converts search results to input format for MAFFT.

        Args:
            results (list): List of search results.
            context (int): Additional context to include in the input.

        Returns:
            dict: Dictionary with formatted input for MAFFT.
        """
        mafft_input = {}
        for result in results:
            i = 1
            for segment in result.segments:
                i += 1
                name = f'{result.chant.meta.document_id.replace(" ", "")}-{i}'
                volp = result.chant.volpiano
                sl = slice(segment.window_offset - context, segment.window_offset + len(segment.window) + context)
                mafft_input[name] = "".join(volp[sl])
        return mafft_input

Classes

class Search
Expand source code
class Search:
    def __init__(self):
        pass

    @staticmethod
    def search_in_window(corpus, pattern, window, preprocess=None):
        """
        Searches for a pattern within a window in a given corpus.

        Args:
            corpus (Corpus): The corpus containing the documents to search.
            pattern (str): The pattern to search for.
            window (int): The window length for the search.
            preprocess (str): The window length for the search.

        Returns:
            list: List of search results containing segments and corresponding chant information.
        """
        search_result = []
        if preprocess == "intervals":
            data = [(document, Utility.compute_intervals(document.flat_neume_components)) for document in corpus.documents]
            for document in data:

                data_str = [f"{intv}" for intv in document[1]]

                segments = Search.search_pattern_in_window(data_str, pattern, window)
                if len(segments):
                    result_object = SearchResult(document[0], segments)
                    search_result.append(result_object)
        else:
            data = [(document, document.flat_neume_components) for document in corpus.documents]
            for document in data:

                data_str = [f"{note.base}{note.octave}" for note in document[1]]
                segments = Search.search_pattern_in_window(data_str, pattern, window)
                if len(segments):
                    result_object = SearchResult(document[0], segments)
                    search_result.append(result_object)
        return search_result

    @staticmethod
    def search_pattern_in_window(text, pattern, window_length):
        """
        Searches for a pattern within a window of text.

        Args:
            text (str): The text to search within.
            pattern (str): The pattern to search for.
            window_length (int): The window length for the search.

        Returns:
            list: List of segments containing window offset, indices, and the window.
        """
        text_length = len(text)
        segments = []
        for i in range(text_length - window_length + 1):
            window = text[i:i + window_length]
            window_indices = Search.is_pattern_in_window(window, pattern)
            if window_indices:
                segment = Segment(window_indices, window, i)
                segments.append(segment)
        return segments

    @staticmethod
    def is_pattern_in_window(window, pattern):
        """
        Checks if a pattern is present in the given window.

        Args:
            window (str): The window to search within.
            pattern (str): The pattern to search for.

        Returns:
            list: List of indices where the pattern is found in the window.
        """
        indices = []
        window_index = 0
        pattern_index = 0
        while window_index < len(window) and pattern_index < len(pattern):
            if window[window_index] == pattern[pattern_index]:
                indices.append(window_index)
                pattern_index += 1
            window_index += 1
        if pattern_index == len(pattern):
            return indices
        else:
            return None

    @staticmethod
    def visualize_html(results):
        """
        Generates an HTML representation of search results.

        Args:
            results (list): List of search results.

        Returns:
            str: HTML formatted representation of the search results.
        """
        html = "<html><head><style>.notation {font-family: Volpiano; font-size:2em;white-space:nowrap}</style></head><body>"
        html += (f"<h2>Results</h2>"
                f"<table>")

        for result in results:
            notation = ""
            notation += '<td class="notation">'
            close_span = False
            for index, pitch in enumerate(result.chant.volpiano):
                for segment in result.segments:
                    if close_span:
                        notation += "</span>"
                        close_span = False
                    if segment.window_offset + len(segment.window)-1 == index:
                        notation += '</td><td class="notation">'
                    if segment.window_offset == index:
                        notation += '</td><td class="notation">'

                    for id in segment.indices:
                        if (id + segment.window_offset) == index:
                            notation += '<span style="color: red">'
                            close_span = True
                notation += pitch
                notation += "-"
            notation += "</td>"
            html += (
                f'<tr><td>{result.chant.meta.initial_text} – {result.chant.meta.document_id} - {result.chant.meta.genre} </td>'
                f'{notation}</tr>')
        html += "</table></body></html>"
        return html

    @staticmethod
    def to_mafft_input(results, context=0):
        """
        Converts search results to input format for MAFFT.

        Args:
            results (list): List of search results.
            context (int): Additional context to include in the input.

        Returns:
            dict: Dictionary with formatted input for MAFFT.
        """
        mafft_input = {}
        for result in results:
            i = 1
            for segment in result.segments:
                i += 1
                name = f'{result.chant.meta.document_id.replace(" ", "")}-{i}'
                volp = result.chant.volpiano
                sl = slice(segment.window_offset - context, segment.window_offset + len(segment.window) + context)
                mafft_input[name] = "".join(volp[sl])
        return mafft_input

Static methods

def is_pattern_in_window(window, pattern)

Checks if a pattern is present in the given window.

Args

window : str
The window to search within.
pattern : str
The pattern to search for.

Returns

list
List of indices where the pattern is found in the window.
Expand source code
@staticmethod
def is_pattern_in_window(window, pattern):
    """
    Checks if a pattern is present in the given window.

    Args:
        window (str): The window to search within.
        pattern (str): The pattern to search for.

    Returns:
        list: List of indices where the pattern is found in the window.
    """
    indices = []
    window_index = 0
    pattern_index = 0
    while window_index < len(window) and pattern_index < len(pattern):
        if window[window_index] == pattern[pattern_index]:
            indices.append(window_index)
            pattern_index += 1
        window_index += 1
    if pattern_index == len(pattern):
        return indices
    else:
        return None
def search_in_window(corpus, pattern, window, preprocess=None)

Searches for a pattern within a window in a given corpus.

Args

corpus : Corpus
The corpus containing the documents to search.
pattern : str
The pattern to search for.
window : int
The window length for the search.
preprocess : str
The window length for the search.

Returns

list
List of search results containing segments and corresponding chant information.
Expand source code
@staticmethod
def search_in_window(corpus, pattern, window, preprocess=None):
    """
    Searches for a pattern within a window in a given corpus.

    Args:
        corpus (Corpus): The corpus containing the documents to search.
        pattern (str): The pattern to search for.
        window (int): The window length for the search.
        preprocess (str): The window length for the search.

    Returns:
        list: List of search results containing segments and corresponding chant information.
    """
    search_result = []
    if preprocess == "intervals":
        data = [(document, Utility.compute_intervals(document.flat_neume_components)) for document in corpus.documents]
        for document in data:

            data_str = [f"{intv}" for intv in document[1]]

            segments = Search.search_pattern_in_window(data_str, pattern, window)
            if len(segments):
                result_object = SearchResult(document[0], segments)
                search_result.append(result_object)
    else:
        data = [(document, document.flat_neume_components) for document in corpus.documents]
        for document in data:

            data_str = [f"{note.base}{note.octave}" for note in document[1]]
            segments = Search.search_pattern_in_window(data_str, pattern, window)
            if len(segments):
                result_object = SearchResult(document[0], segments)
                search_result.append(result_object)
    return search_result
def search_pattern_in_window(text, pattern, window_length)

Searches for a pattern within a window of text.

Args

text : str
The text to search within.
pattern : str
The pattern to search for.
window_length : int
The window length for the search.

Returns

list
List of segments containing window offset, indices, and the window.
Expand source code
@staticmethod
def search_pattern_in_window(text, pattern, window_length):
    """
    Searches for a pattern within a window of text.

    Args:
        text (str): The text to search within.
        pattern (str): The pattern to search for.
        window_length (int): The window length for the search.

    Returns:
        list: List of segments containing window offset, indices, and the window.
    """
    text_length = len(text)
    segments = []
    for i in range(text_length - window_length + 1):
        window = text[i:i + window_length]
        window_indices = Search.is_pattern_in_window(window, pattern)
        if window_indices:
            segment = Segment(window_indices, window, i)
            segments.append(segment)
    return segments
def to_mafft_input(results, context=0)

Converts search results to input format for MAFFT.

Args

results : list
List of search results.
context : int
Additional context to include in the input.

Returns

dict
Dictionary with formatted input for MAFFT.
Expand source code
@staticmethod
def to_mafft_input(results, context=0):
    """
    Converts search results to input format for MAFFT.

    Args:
        results (list): List of search results.
        context (int): Additional context to include in the input.

    Returns:
        dict: Dictionary with formatted input for MAFFT.
    """
    mafft_input = {}
    for result in results:
        i = 1
        for segment in result.segments:
            i += 1
            name = f'{result.chant.meta.document_id.replace(" ", "")}-{i}'
            volp = result.chant.volpiano
            sl = slice(segment.window_offset - context, segment.window_offset + len(segment.window) + context)
            mafft_input[name] = "".join(volp[sl])
    return mafft_input
def visualize_html(results)

Generates an HTML representation of search results.

Args

results : list
List of search results.

Returns

str
HTML formatted representation of the search results.
Expand source code
@staticmethod
def visualize_html(results):
    """
    Generates an HTML representation of search results.

    Args:
        results (list): List of search results.

    Returns:
        str: HTML formatted representation of the search results.
    """
    html = "<html><head><style>.notation {font-family: Volpiano; font-size:2em;white-space:nowrap}</style></head><body>"
    html += (f"<h2>Results</h2>"
            f"<table>")

    for result in results:
        notation = ""
        notation += '<td class="notation">'
        close_span = False
        for index, pitch in enumerate(result.chant.volpiano):
            for segment in result.segments:
                if close_span:
                    notation += "</span>"
                    close_span = False
                if segment.window_offset + len(segment.window)-1 == index:
                    notation += '</td><td class="notation">'
                if segment.window_offset == index:
                    notation += '</td><td class="notation">'

                for id in segment.indices:
                    if (id + segment.window_offset) == index:
                        notation += '<span style="color: red">'
                        close_span = True
            notation += pitch
            notation += "-"
        notation += "</td>"
        html += (
            f'<tr><td>{result.chant.meta.initial_text} – {result.chant.meta.document_id} - {result.chant.meta.genre} </td>'
            f'{notation}</tr>')
    html += "</table></body></html>"
    return html
class SearchResult (chant, segments)

Represents a search result with information of chant and one or multiple found segments.

Expand source code
class SearchResult:
    """ Represents a search result with information of chant and one or multiple found segments. """
    def __init__(self, chant, segments):
        self.chant = chant
        self.segments = segments
class Segment (indices, window, window_offset)

Represents a segment found in a Search.

Expand source code
class Segment:
    """  Represents a segment found in a Search.  """
    def __init__(self, indices, window, window_offset):
        self.indices = indices
        self.window = window
        self.window_offset = window_offset