Source code for qumin.representations.contexts

# -*- coding: utf-8 -*-
# !/usr/bin/env python3
"""author: Sacha Beniamine.

This module implements patterns' contexts, which are series of phonological restrictions."""

import logging

from copy import deepcopy
from .alignment import align_right, align_left, align_multi
from .quantity import one, optional, some, kleenestar, Quantity, quantity_largest, quantity_sum

log = logging.getLogger("Qumin")


def _align_edges(*args, **kwargs):
    """Align at both edges.

    Aligns an equal number of characters left and right, and all the rest in the center.
    A tuple of ("",Quantity(min_center_len,max_center_len)) is added at the center
    to ensure the correctness of future merges.
    """
    lengths = [len(x) for x in args]
    minl = min(lengths)
    if all([l == lengths[0] for l in lengths]):
        return list(zip(*args))
    else:
        cut = minl // 2

    left = []
    right = []
    center = set()
    center_lens = []
    for i, element in enumerate(args):
        l = len(element)
        left.append(element[:cut])
        right.append(element[l - cut:])
        center.update(element[cut:l - cut])
        center_lens.append(l - (2 * cut))

    if center:
        center.add(("", Quantity(min(center_lens), max(center_lens))))

    return list(zip(*left)) + [tuple(center)] + list(zip(*right))


class _ContextMember(object):
    """One part of a Context, between the edges of the word and/or a blank."""

    def __init__(self, elements, inv, opt=False, blank=False):
        self.blank = blank
        if elements == [] or elements[-1] == "{}":
            self.blank = True
            elements = elements[:-1]
        self.restrictions = elements
        self.opt = False  # opt
        self.empty = self.restrictions == []
        self._repr = self.to_str(inv, mode=0)
        self._str = self.to_str(inv, mode=2)

    def __deepcopy__(self, memo):
        """ Deep copy of this contxt member."""
        cls = self.__class__
        copy = cls.__new__(cls)
        copy.blank = self.blank
        copy.restrictions = deepcopy(self.restrictions, memo)
        copy.opt = self.opt
        copy.empty = self.empty
        return copy

    def __len__(self):
        return len(self.restrictions)

    def __getitem__(self, i):
        return self.restrictions[i]

    def __str__(self):
        return self._str

    def __eq__(self, other):
        return repr(self) == repr(other)

    def __repr__(self):
        return self._repr

    def to_str(self, inv, mode=2):
        def to_features(s):
            if isinstance(s, str):
                return s
            return inv.features_str(s)

        # Format modes:
        format_modes = [inv.regex,  # str (0):      ([Ei]...) - with parenthesis, regex
                        inv.pretty_str,  # pretty  (1):  {e, i}
                        inv.shortest,  # display (2): shortest possible string
                        to_features]  # features: [+syll, -open]
        format_blanks = ["{}", "_", "_", "_"]
        format_segment = format_modes[mode]
        blankchar = format_blanks[mode]
        formatted = ""
        if mode == 0 or (self.opt and not self.empty):
            formatted += "("

        for segment, quantifier in self.restrictions:
            if segment != "":
                formatted += format_segment(segment)
                formatted += str(quantifier)
        if self.opt and not self.empty:
            formatted += ")" + str(optional)
        elif mode == 0:
            formatted += ")"
        if self.blank:
            formatted += blankchar
        return formatted

    def simplify(self, inv):
        """ Reduce a context in place for indexes i-1 to i.
        Simplifies a context's regex.

        A sequence of two identical segments with quantifiers $Q_{1}$ and $Q_{2}$
        is simplified as follows : $aQ_{1}aQ_{2} = aQ_{3}$
        for example  [aei]?[aei]+ is simplifiedto [aei]+

        If the result is one segment long, then if the context member is optional,
        the quantifier is reduced too.
        for example ([aei]+)? is simplified to [aei]*

        ================= === === === ===
         Q_1 / Q_2             ?   +   *
        ================= === === === ===
           (one)           +   +   +   +
         ? (optional)      +   *   +   *
         + (some)          +   +   +   +
         * (kleenestar)    +   *   +   *
        ================= === === === ===
        """
        equal_reduction = {
            (one, one): some,
            (one, optional): some,
            (one, some): some,
            (one, kleenestar): some,
            (optional, optional): kleenestar,
            (optional, some): some,
            (optional, kleenestar): kleenestar,
            (some, some): some,
            (some, kleenestar): some,
            (optional, one): some,
            (some, one): some,
            (kleenestar, one): some,
            (some, optional): some,
            (kleenestar, optional): kleenestar,
            (kleenestar, some): some,
            (kleenestar, kleenestar): kleenestar,
        }
        reduction = {(optional, some): some,
                     (optional, kleenestar): kleenestar,
                     (kleenestar, some): some,
                     (some, some): some,
                     (kleenestar, kleenestar): kleenestar}
        i = 1  # starts at 1, because at each step, we simplify from i-1 to i
        while i < len(self.restrictions):

            (s1, q1), (s2, q2) = self.restrictions[i - 1:i + 1]

            # print("Quantifiers [{}]~[{}]".format(repr(q1),repr(q2)),"in reduction ?",(q1, q2) in reduction)
            # print(s1,"<",s2,"?",Segment.if(s1, s2))
            if s1 == s2 and (q1, q2) in equal_reduction:
                self.restrictions[i - 1:i + 1] = [(s2, equal_reduction[(q1, q2)])]
                if i > 1:
                    i -= 1
            elif (q1, q2) in reduction and inv.inf(s1, s2):
                self.restrictions[i - 1:i + 1] = [(s2, reduction[(q1, q2)])]
                if i > 1:
                    i -= 1
            else:
                i += 1
        if len(self.restrictions) == 1 and self.opt:
            self.opt = False
            s, q = self.restrictions[0]
            self.restrictions[0] = (s, q & optional)


[docs] class Context(object): """Context for an alternation pattern""" def __init__(self, segments, inv): if not segments: self.elements = _ContextMember([], inv, blank=True) elif type(segments[0]) is _ContextMember: self.elements = segments else: prev = 0 self.elements = [] for i in range(len(segments)): if segments[i] == "{}": self.elements.append(_ContextMember(segments[prev:i + 1], inv)) prev = i + 1 if prev < len(segments): self.elements.append(_ContextMember(segments[prev:], inv)) def __len__(self): return len(self.elements) def __iter__(self): return iter(self.elements) def __getitem__(self, i): return self.elements[i]
[docs] def feat_str(self, inv): return "".join([e.to_str(inv, mode=1) for e in self.elements])
def __repr__(self): return "".join(map(repr, self.elements)) def __str__(self): return "".join(map(str, self.elements))
[docs] def to_str(self, inv, mode=2): return "".join(member.to_str(inv, mode=mode) for i, member in enumerate(self.elements))
@classmethod def _align(cls, contexts): """ Align contexts segment by segment in order to merge (generator).""" l = max(len(c) for c in contexts) leftblank = False align_funcs = {(False, True): align_right, (True, False): align_left, (False, False): align_multi, # This is an identity pattern, alignment is arbitrary. (True, True): _align_edges} for i in range(l): restrictions = [] opt = False rightblank = False for c in contexts: if i >= len(c) or c[i].empty: opt = True restrictions.append([("", optional)]) else: rightblank = rightblank or c[i].blank # shouldn't even need to accumulate it opt = opt or c[i].opt if c[i].restrictions not in restrictions: # don't put in several times the same context member ! restrictions.append(c[i].restrictions) align_func = align_funcs[(leftblank, rightblank)] yield align_func(*restrictions, fillvalue=("", optional)), opt, rightblank leftblank = rightblank
[docs] @classmethod def merge(cls, contexts, inv): """ Merge contexts to generalize them. Merge contexts and combine their restrictions into a new context. Arguments: contexts: iterable of Contexts. inv: Inventory instance Returns: a merged context """ new_context = [] for group, opt, blank in cls._align(contexts): context_members = [] buffer_sources = [] # for debug purposes buffer_segments = [] buffer_quantities = [] for aligned_segments in group: has_empty = ('', optional) in aligned_segments # Add to buffer if there is an empty segment if has_empty: segments, quantities = zip(*aligned_segments) quantity = quantity_largest(quantities) buffer_segments.extend([s for s in segments if s]) buffer_quantities.append(quantity) buffer_sources.append(aligned_segments) # Use buffer else: if buffer_segments: s1, q1 = inv.meet(*buffer_segments), quantity_sum(buffer_quantities) context_members.append((s1, q1)) # re-init buffer buffer_segments = [] buffer_quantities = [] buffer_sources = [] # Not buffer segments, quantities = zip(*aligned_segments) segment = inv.meet(*[s for s in segments if s]) quantity = quantity_largest(quantities) context_members.append((segment, quantity)) if buffer_segments: s1, q1 = inv.meet(*buffer_segments), quantity_sum(buffer_quantities) context_members.append((s1, q1)) new_context_member = _ContextMember(context_members, inv, blank=blank, opt=opt) new_context_member.simplify(inv) new_context.append(new_context_member) return Context(new_context, inv)