Source code for qumin.lattice.lattice

# !usr/bin/python3
# -*- coding: utf-8 -*-

import matplotlib
from matplotlib import pyplot as plt
from collections import defaultdict
from os.path import join, dirname

from ..clustering.node import Node
from ..clustering import find_microclasses

import logging

matplotlib.use("agg", force=True)
log = logging.getLogger()

try:
    import mpld3
except:
    mpld3 = None
    log.warning("Warning: mpld3 could not be imported. No html export possible.")

from concepts import Context
import pandas as pd
from tqdm import tqdm

axes = {'facecolor': 'None', 'edgecolor': 'None', 'linewidth': 0}
grid = {'alpha': 0, 'linewidth': 0}
lines = {'linewidth': 0.5}

matplotlib.rc('lines', **lines)
matplotlib.rc('axes', **axes)
matplotlib.rc('grid', **grid)


def _load_external_text(filename):
    return "\n".join(
        open(join(dirname(__file__), filename), "r", encoding="utf-8").readlines())


def _node_to_label_IC(node, comp=None):
    """
    Default function to render node labels on inflection class lattice.

    Arguments:
        node (qumin.clustering.node.Node): the node whose label we render.
    """
    objs = node.attributes.get("objects", node.labels)
    if not objs:
        objs = node.labels
    size = str(node.attributes.get("size", "unknown nb of")) + " lexèmes"
    header = "<table><thead><th colspan=2> Ex:" + objs[0] + ", " + size + " </th></thead>"
    if "common" in node.attributes:
        line = "<tr><th>{}</th><td>{}</td></tr>"
        line2 = "<tr class='alternate'><th>{}</th><td>{}</td></tr>"
        line_no_head = "<tr><td colspan=2>{}</td></tr>"
        common = ""
        for properties in node.attributes["common"]:
            for prop in properties.split(";"):
                if "=" in prop:
                    attr, val = prop.split("=")
                    val = val.strip("<>")
                    alt = val.split("/")[0].strip()
                    if alt == "⇌":
                        val = "syncretic"
                    if comp and attr.startswith(comp):
                        common += line2.format(attr[len(comp):], val)
                    else:
                        common += line.format(attr, val)
                else:
                    common += line_no_head.format(prop)
        if not common:
            common = "<tr><td colspan=2>Empty</td></tr>"

        return header + common + "</table>"
    return ""


[docs] def context_from_pandas(df): """ Converts a onehot encoding stored as a :class:`pandas.DataFrame` to a :class:`concepts.Context`. Uses the following recipe: https://concepts.readthedocs.io/en/stable/advanced.html#context-from-pandas-dataframe Arguments: df (pandas.DataFrame): onehot encoding of objects (rows) and attributes (columns). Filled with 1/0, True/False or "X"/"". Returns: concepts.Context: a Context representing the input data. """ log.debug('Creating a Context object from the onehot encoding...') return Context(df.index.tolist(), list(df), list(df.fillna(False).astype(bool).itertuples(index=False, name=None)))
[docs] class ICLattice(object): """ Inflection class Lattice representation. This is a wrapper around (:class:`concepts.Context`). It builds on concepts' lattice class, but adopts a custom representation of lattices, using Qumin's `:class:`qumin.clustering.node.Node` class. Attributes: context (concepts.Context): stores the underlying Context. lattice (concepts.lattices.Lattice): shortcut to the underlying lattice. nodes (qumin.clustering.node.Node): Qumin's lattice is represented as a root node. Each node contains its children. This is the lattice we will draw. comp: ?. leaves: dictionary of object properties (typically microclasses). The format is: {<object label>: {'size': <size>, 'other': <prop>}}. If the size argument is provided, it will be used to render the nodes. """
[docs] def __init__(self, onehot_encoding, leaves, annotate=None, comp_prefix=None, aoc=False, keep_names=False): """ Arguments: onehot_encoding (pandas.DataFrame): onehot encoding of objects (rows) and attributes (columns). Filled with 1/0, True/False, "X"/"". leaves (dict): Dictionary of microclasses. annotate (dict): Extra annotations to add on lattice. Of the form: {<object label>: <annotation>} aoc (bool): Whether to limit ourselves to Attribute or Object Concepts. keep_names: ? TODO, this option is apparently not implemented. kwargs: all other keyword arguments are passed to table_to_context """ self.comp = comp_prefix # whether there are two sets of properties. log.info("Converting the onehot encoding to a Context object...") self.context = context_from_pandas(onehot_encoding) log.info("Converting the Context object to a Lattice object... If this step lasts too long, consider using a sample of lexemes/cells.") self.lattice = self.context.lattice self.leaves = leaves if annotate: for label in annotate: if label in self.lattice.supremum.extent: self.lattice[[label]]._extra_qumin_annotation = annotate[label] log.info("Converting the Lattice object to a Qumin node representation...") if aoc: self.nodes = self._lattice_to_nodeAOC() else: self.nodes = self._lattice_to_node() # Set display font = {'family': 'DejaVu Sans', 'weight': 'normal', 'size': 9} matplotlib.rc('font', **font)
def _pat_range(self): mini = maxi = 1 for extent, intent in self.lattice: l = len(self.lattice[intent].properties) if l < mini: mini = l elif l > maxi: maxi = l return mini, maxi def _make_nodes(self, concepts, prb): """ Create nodes from concepts. Arguments: concepts: prb: tqdm progress bar status. Returns: dict: A dictionary of nodes, where keys are concept extents. """ nodes = {} for concept in concepts: extent = concept.extent intent = concept.intent properties = concept.properties objects = concept.objects size = sum( self.leaves[label]['size'] for label in extent if label in self.leaves and 'size' in self.leaves[label]) annotations = getattr(concept, '_extra_qumin_annotation', {}) nodes[extent] = Node(extent, intent=intent, size=size, common=properties, objects=objects, macroclass=False, **annotations) prb.update(1) return nodes def _lattice_to_node(self, keep_infimum=False): """ Converts the lattice to a :class:`qumin.clustering.node.Node`. Stores only the root node. Arguments: keep_infimum (bool): Whether to keep infimum, that is the lower node. Defaults to False. Returns: qumin.clustering.node.Node: The root node (contains the full hierarchy). """ concepts = sorted([v for v in self.lattice if keep_infimum or v.extent != ()], key=lambda x: len(x.extent), reverse=True) with tqdm(total=len(concepts) * 2) as prb: # Creating nodes nodes = self._make_nodes(concepts, prb) # Creating arcs for vertice in concepts: for daughter in vertice.lower_neighbors: if keep_infimum or daughter.extent != (): nodes[vertice.extent].children.append(nodes[daughter.extent]) prb.update(1) root = nodes[self.lattice.supremum.extent] return root def _lattice_to_nodeAOC(self): """ Converts the lattice to a :class:`qumin.clustering.node.Node`. Stores only the root node. Do not keep concepts which are neither objects nor attributes. Returns: qumin.clustering.node.Node: The root node (contains the full hierarchy). """ supremum = self.lattice.supremum infimum = self.lattice.infimum def concept_sorter(concept): return (len(concept.extent), -len(list(concept.upset()))) # select concept in AOC aoc = {c for c in self.lattice if (c == supremum or c.properties or c.objects) and c != infimum} # Make links (long way) concepts = sorted(aoc, key=concept_sorter, reverse=True) downsets = {c: set(c.downset()) for c in self.lattice} children = defaultdict(set) l = len(concepts) with tqdm(total=l * 3) as prb: # Compute descendants in aoc poset for i in range(l): concept = concepts[i] span = set() for candidate in sorted((downsets[concept] & aoc) - {concept}, key=concept_sorter, reverse=True): if candidate not in span: children[concept.extent].add(candidate) span.update(downsets[candidate]) prb.update(1) # Make and link Node objects # Creating nodes nodes = self._make_nodes(concepts, prb) # Creating arcs for vertice in concepts: for daughter in children[vertice.extent]: nodes[vertice.extent].children.append(nodes[daughter.extent]) prb.update(1) return nodes[supremum.extent]
[docs] @classmethod def from_patterns(cls, patterns, paradigms, **kwargs): """ Creates a Lattice from a patterns object. Arguments: patterns (representations.patternstore.PatternStore): the patterns computed for the paradigms. paradigms (qumin.representations.paradigms.Paradigms): the paradigms. """ log.info("Building the lattice...") microclasses = find_microclasses(paradigms, patterns) microclasses = {lex: {'size': len(microclass)} for lex, microclass in microclasses.items()} onehot_encoding = patterns.incidence_table(lexemes=list(microclasses)) lattice = cls(onehot_encoding, microclasses, **kwargs) return lattice
[docs] def parents(self, identifier): """Return all direct parents of a node which corresponds to the identifier.""" return list(self.lattice[identifier].upper_neighbors)
[docs] def ancestors(self, identifier): """Return all ancestors of a node which corresponds to the identifier.""" concept = self.lattice[identifier] return [c for c in concept.upset() if c != concept]
[docs] def stats(self): """ Returns some stats about the classification size and shape. Based on self.nodes, not self.lattice: stats are different depending on AOC/not AOC. """ def height(node): if not node.children: node.attributes["height"] = 1 return 1 else: if "height" in node.attributes: return node.attributes["height"] h = max(height(child) for child in node.children) + 1 node.attributes["height"] = h return h nb_arcs = sum(len(x.children) for x in self.nodes) nb_noeuds = len([x for x in self.nodes]) stats_lattice = {"Microclasses": len(self.leaves), "Base": len(self.lattice.atoms), "Height": height(self.nodes), "Degree": nb_arcs / (nb_noeuds - 2), # -2 car on ignore supremum et infimum "Nodes": nb_noeuds - 1 # -1 car on ignore infimum } if self.comp: left = 0 right = 0 both = 0 for node in self.nodes: cmp = sum(att.startswith(self.comp) for att in node.attributes["common"]) if cmp > 0: if cmp < len(node.attributes["common"]): both += 1 else: left += 1 else: right += 1 log.info("Concepts définissant des propriétés " "de la classification de gauche (-b): %s", left) log.info("Concepts définissant des propriétés " "de la classification de droite: %s", right) log.info("Concepts définissant des propriétés " "des deux classifications: %s", both) return pd.Series(stats_lattice)
[docs] def draw_nodes(self, figsize=(24, 12), scale=False, colormap="Blues", point=None, **kwargs): """ Draw the root node using :class:`qumin.clustering.node.Node`'s drawing function. Arguments: figsize (tuple): Size of the figure. scale (bool): Whether to display a colorbar. Defaults to False. colormap (str): point (?) **kwargs: All keyword arguments will be passed to `qumin.clustering.node.Node.draw()`. """ # Shorthand node = self.nodes # Display defaults mini, maxi = self._pat_range() cm = matplotlib.cm.get_cmap(colormap) cnorm = matplotlib.colors.Normalize(vmin=mini, vmax=maxi) smap = matplotlib.cm.ScalarMappable(norm=cnorm, cmap=cm) colors = ['#444444', '#aaaaaa'] # Set rendering functions that will be passed downstream. def custom_zorder(node): return len(node.attributes["common"]) def leaves_label(node): n = " ({})".format(str(node.attributes.get("size", 1))) return ", ".join(node.labels) + n def point_function(node): default = {"color": colors[0], "edgecolors": colors[0], "zorder": 3, "marker": matplotlib.markers.MarkerStyle(marker="o")} if self.comp: cmp = sum(att.startswith(self.comp) for att in node.attributes["common"]) if cmp > 0: # bicolor marker if cmp < len(node.attributes["common"]): default["facecolor"] = colors[1] default["linewidth"] = 3 del default["color"] else: # marker in color 2 default["facecolor"] = colors[1] default["edgecolor"] = colors[1] del default["color"] default["s"] = 20 + ((node.attributes.get("size", 0) + 1) / ( self.nodes.attributes.get("size", 0) + 1)) * 100 node.attributes["point_settings"] = default return default def default_edge_attr(node, child): return {"color": colors[0], "zorder": custom_zorder(node)} # Send settings and draw. params = dict(leavesfunc=leaves_label, nodefunc=lambda n: "", edge_attributes=default_edge_attr, point=point_function if point else None, horizontal=False, square=False, layout="qumin", ) params.update(kwargs) fig = plt.figure(figsize=figsize) # for export: 12,6 lines, ordered_nodes = node.draw(**params) if scale: colors = [smap.to_rgba(i) for i in range(mini, maxi)] smap.set_array(colors) plt.colorbar(smap, norm=cnorm) return fig, lines, ordered_nodes
[docs] def draw(self, filename, title="Lattice", **kwargs): """ Wrapper around :method:`ICLattice.draw_nodes`. Arguments: filename (str): filename of the exported plot. title (str): title of the plot of the file. **kwargs: keyword arguments are passed to `ÌCLattice.draw_nodes()`. """ fig, lines, ordered_nodes = self.draw_nodes(**kwargs) if title is not None: fig.suptitle(title) log.info("Drawing figure to: {}".format(filename)) axis = plt.gca() axis.set_axis_off() plt.subplots_adjust(top=1, bottom=0, right=1, left=0, hspace=0, wspace=0) plt.margins(0, 0) axis.xaxis.set_major_locator(plt.NullLocator()) axis.yaxis.set_major_locator(plt.NullLocator()) plt.savefig(filename, bbox_inches='tight', pad_inches=0)
[docs] def to_html(self, filename, node_formatter=_node_to_label_IC, **kwargs): """Draw an interactive lattice using :class:`qumin.clustering.node.Node`'s drawing function and mpld3. Arguments: filename (str): filename of the exported html page. node_formatter (Callable): custom function to format nodes **kwargs: keyword arguments are passed to `ÌCLattice.draw_nodes()`. """ log.info("Exporting HTML figure to: {}".format(filename)) css = _load_external_text("table.css") fig, lines, ordered_nodes = self.draw_nodes(figsize=(20, 9), n=4, scale=False, point={"s": 50}, # TODO: Something wrong here interactive=True, **kwargs) paths = list( filter(lambda obj: type(obj) is matplotlib.collections.PathCollection, fig.axes[0].get_children(), )) lines = list(filter(lambda obj: type(obj) is matplotlib.lines.Line2D and len(obj.get_xdata(orig=True)) > 1, fig.axes[0].get_children())) points_ids = [] corrd_to_points = {} for p, v in zip(paths, ordered_nodes): x, y = v.attributes["_x"], v.attributes["_y"] p_id = mpld3.utils.get_id(p) # ,"pts") corrd_to_points[(x, y)] = p_id label = node_formatter(v, comp=self.comp) points_ids.append(p_id) tooltip = mpld3.plugins.PointHTMLTooltip(p, [label], css=css) mpld3.plugins.connect(fig, tooltip) point_to_artists = defaultdict(set) lines = sorted(lines, key=lambda l: min(l.get_ydata())) for l in lines: parent, child = zip(*l.get_data(orig=True)) childp = corrd_to_points[tuple(child)] parentp = corrd_to_points[tuple(parent)] if 0 in child: point_to_artists[childp] = set() point_to_artists[parentp].add(childp) point_to_artists[parentp].add(mpld3.utils.get_id(l)) point_to_artists[parentp].update(point_to_artists[childp]) point_to_artists = {p: list(point_to_artists[p]) for p in point_to_artists} # root = corrd_to_points[(node.attributes["_x"],node.attributes["_y"])] mpld3.plugins.connect(fig, _HighlightSubTrees(points_ids, dict(point_to_artists))) mpld3.save_html(fig, str(filename), template_type="simple")
if not mpld3:
[docs] def to_html_disabled(*args, **kwargs): log.warning("mpld3 could not be imported. No html export possible.")
ICLattice.to_html = to_html_disabled else: class _HighlightSubTrees(mpld3.plugins.PluginBase): """A plugin to highlight lines on hover""" JAVASCRIPT = _load_external_text("HighlightSubTrees.js") def __init__(self, points_ids, point_to_artists): self.css_ = """ path.unfocus {-webkit-transition: all 0.5s ease; -moz-transition: all 0.5s ease; -o-transition: all 0.5s ease; transition: all 0.5s ease; fill-opacity: 0.1 !important; stroke-opacity:0.1 !important;} .mpld3-ygrid, .mpld3-xgrid, .mpld3-yaxis, .mpld3-xaxis {display: none !important} """ self.dict_ = {"type": "highlightsubtrees", "points_ids": points_ids, "points_to_artist": point_to_artists, "min": 0.2, "max": 1}