# !usr/bin/python3
# -*- coding: utf-8 -*-
import matplotlib
from matplotlib import pyplot as plt
from collections import defaultdict
from os.path import join, dirname
from ..clustering.node import Node
from ..clustering import find_microclasses
import logging
matplotlib.use("agg", force=True)
log = logging.getLogger()
try:
import mpld3
except:
mpld3 = None
log.warning("Warning: mpld3 could not be imported. No html export possible.")
from concepts import Context
import pandas as pd
from tqdm import tqdm
axes = {'facecolor': 'None', 'edgecolor': 'None', 'linewidth': 0}
grid = {'alpha': 0, 'linewidth': 0}
lines = {'linewidth': 0.5}
matplotlib.rc('lines', **lines)
matplotlib.rc('axes', **axes)
matplotlib.rc('grid', **grid)
def _load_external_text(filename):
return "\n".join(
open(join(dirname(__file__), filename), "r", encoding="utf-8").readlines())
def _node_to_label_IC(node, comp=None):
"""
Default function to render node labels on inflection class lattice.
Arguments:
node (qumin.clustering.node.Node): the node whose label we render.
"""
objs = node.attributes.get("objects", node.labels)
if not objs:
objs = node.labels
size = str(node.attributes.get("size", "unknown nb of")) + " lexèmes"
header = "<table><thead><th colspan=2> Ex:" + objs[0] + ", " + size + " </th></thead>"
if "common" in node.attributes:
line = "<tr><th>{}</th><td>{}</td></tr>"
line2 = "<tr class='alternate'><th>{}</th><td>{}</td></tr>"
line_no_head = "<tr><td colspan=2>{}</td></tr>"
common = ""
for properties in node.attributes["common"]:
for prop in properties.split(";"):
if "=" in prop:
attr, val = prop.split("=")
val = val.strip("<>")
alt = val.split("/")[0].strip()
if alt == "⇌":
val = "syncretic"
if comp and attr.startswith(comp):
common += line2.format(attr[len(comp):], val)
else:
common += line.format(attr, val)
else:
common += line_no_head.format(prop)
if not common:
common = "<tr><td colspan=2>Empty</td></tr>"
return header + common + "</table>"
return ""
[docs]
def context_from_pandas(df):
"""
Converts a onehot encoding stored as a :class:`pandas.DataFrame`
to a :class:`concepts.Context`.
Uses the following recipe:
https://concepts.readthedocs.io/en/stable/advanced.html#context-from-pandas-dataframe
Arguments:
df (pandas.DataFrame): onehot encoding of objects (rows) and
attributes (columns). Filled with 1/0, True/False or "X"/"".
Returns:
concepts.Context: a Context representing the input data.
"""
log.debug('Creating a Context object from the onehot encoding...')
return Context(df.index.tolist(),
list(df),
list(df.fillna(False).astype(bool).itertuples(index=False, name=None)))
[docs]
class ICLattice(object):
"""
Inflection class Lattice representation.
This is a wrapper around (:class:`concepts.Context`).
It builds on concepts' lattice class, but adopts a custom representation of lattices,
using Qumin's `:class:`qumin.clustering.node.Node` class.
Attributes:
context (concepts.Context): stores the underlying Context.
lattice (concepts.lattices.Lattice): shortcut to the underlying lattice.
nodes (qumin.clustering.node.Node): Qumin's lattice is represented as a root node.
Each node contains its children. This is the lattice we will draw.
comp: ?.
leaves: dictionary of object properties (typically microclasses). The format is:
{<object label>: {'size': <size>, 'other': <prop>}}. If the size argument is provided,
it will be used to render the nodes.
"""
[docs]
def __init__(self, onehot_encoding, leaves,
annotate=None, comp_prefix=None,
aoc=False, keep_names=False):
"""
Arguments:
onehot_encoding (pandas.DataFrame): onehot encoding of objects (rows) and
attributes (columns). Filled with 1/0, True/False, "X"/"".
leaves (dict): Dictionary of microclasses.
annotate (dict): Extra annotations to add on lattice.
Of the form: {<object label>: <annotation>}
aoc (bool): Whether to limit ourselves to Attribute or Object Concepts.
keep_names: ? TODO, this option is apparently not implemented.
kwargs: all other keyword arguments are passed to table_to_context
"""
self.comp = comp_prefix # whether there are two sets of properties.
log.info("Converting the onehot encoding to a Context object...")
self.context = context_from_pandas(onehot_encoding)
log.info("Converting the Context object to a Lattice object... If this step lasts too long, consider using a sample of lexemes/cells.")
self.lattice = self.context.lattice
self.leaves = leaves
if annotate:
for label in annotate:
if label in self.lattice.supremum.extent:
self.lattice[[label]]._extra_qumin_annotation = annotate[label]
log.info("Converting the Lattice object to a Qumin node representation...")
if aoc:
self.nodes = self._lattice_to_nodeAOC()
else:
self.nodes = self._lattice_to_node()
# Set display
font = {'family': 'DejaVu Sans',
'weight': 'normal',
'size': 9}
matplotlib.rc('font', **font)
def _pat_range(self):
mini = maxi = 1
for extent, intent in self.lattice:
l = len(self.lattice[intent].properties)
if l < mini:
mini = l
elif l > maxi:
maxi = l
return mini, maxi
def _make_nodes(self, concepts, prb):
"""
Create nodes from concepts.
Arguments:
concepts:
prb: tqdm progress bar status.
Returns:
dict: A dictionary of nodes, where keys are concept extents.
"""
nodes = {}
for concept in concepts:
extent = concept.extent
intent = concept.intent
properties = concept.properties
objects = concept.objects
size = sum(
self.leaves[label]['size'] for label in extent
if label in self.leaves and 'size' in self.leaves[label])
annotations = getattr(concept, '_extra_qumin_annotation', {})
nodes[extent] = Node(extent, intent=intent,
size=size, common=properties,
objects=objects,
macroclass=False, **annotations)
prb.update(1)
return nodes
def _lattice_to_node(self, keep_infimum=False):
"""
Converts the lattice to a :class:`qumin.clustering.node.Node`.
Stores only the root node.
Arguments:
keep_infimum (bool): Whether to keep infimum, that is the lower node.
Defaults to False.
Returns:
qumin.clustering.node.Node: The root node (contains the full hierarchy).
"""
concepts = sorted([v for v in self.lattice if keep_infimum or v.extent != ()],
key=lambda x: len(x.extent), reverse=True)
with tqdm(total=len(concepts) * 2) as prb:
# Creating nodes
nodes = self._make_nodes(concepts, prb)
# Creating arcs
for vertice in concepts:
for daughter in vertice.lower_neighbors:
if keep_infimum or daughter.extent != ():
nodes[vertice.extent].children.append(nodes[daughter.extent])
prb.update(1)
root = nodes[self.lattice.supremum.extent]
return root
def _lattice_to_nodeAOC(self):
"""
Converts the lattice to a :class:`qumin.clustering.node.Node`.
Stores only the root node. Do not keep concepts which are neither
objects nor attributes.
Returns:
qumin.clustering.node.Node: The root node (contains the full hierarchy).
"""
supremum = self.lattice.supremum
infimum = self.lattice.infimum
def concept_sorter(concept):
return (len(concept.extent), -len(list(concept.upset())))
# select concept in AOC
aoc = {c for c in self.lattice
if (c == supremum or c.properties or c.objects) and c != infimum}
# Make links (long way)
concepts = sorted(aoc, key=concept_sorter, reverse=True)
downsets = {c: set(c.downset()) for c in self.lattice}
children = defaultdict(set)
l = len(concepts)
with tqdm(total=l * 3) as prb:
# Compute descendants in aoc poset
for i in range(l):
concept = concepts[i]
span = set()
for candidate in sorted((downsets[concept] & aoc) - {concept},
key=concept_sorter, reverse=True):
if candidate not in span:
children[concept.extent].add(candidate)
span.update(downsets[candidate])
prb.update(1)
# Make and link Node objects
# Creating nodes
nodes = self._make_nodes(concepts, prb)
# Creating arcs
for vertice in concepts:
for daughter in children[vertice.extent]:
nodes[vertice.extent].children.append(nodes[daughter.extent])
prb.update(1)
return nodes[supremum.extent]
[docs]
@classmethod
def from_patterns(cls, patterns, paradigms, **kwargs):
"""
Creates a Lattice from a patterns object.
Arguments:
patterns (representations.patternstore.PatternStore): the patterns computed for the paradigms.
paradigms (qumin.representations.paradigms.Paradigms): the paradigms.
"""
log.info("Building the lattice...")
microclasses = find_microclasses(paradigms, patterns)
microclasses = {lex: {'size': len(microclass)}
for lex, microclass in microclasses.items()}
onehot_encoding = patterns.incidence_table(lexemes=list(microclasses))
lattice = cls(onehot_encoding,
microclasses,
**kwargs)
return lattice
[docs]
def parents(self, identifier):
"""Return all direct parents of a node which corresponds to the identifier."""
return list(self.lattice[identifier].upper_neighbors)
[docs]
def ancestors(self, identifier):
"""Return all ancestors of a node which corresponds to the identifier."""
concept = self.lattice[identifier]
return [c for c in concept.upset() if c != concept]
[docs]
def stats(self):
"""
Returns some stats about the classification size and shape.
Based on self.nodes, not self.lattice: stats are different depending on AOC/not AOC.
"""
def height(node):
if not node.children:
node.attributes["height"] = 1
return 1
else:
if "height" in node.attributes:
return node.attributes["height"]
h = max(height(child) for child in node.children) + 1
node.attributes["height"] = h
return h
nb_arcs = sum(len(x.children) for x in self.nodes)
nb_noeuds = len([x for x in self.nodes])
stats_lattice = {"Microclasses": len(self.leaves),
"Base": len(self.lattice.atoms),
"Height": height(self.nodes),
"Degree": nb_arcs / (nb_noeuds - 2),
# -2 car on ignore supremum et infimum
"Nodes": nb_noeuds - 1 # -1 car on ignore infimum
}
if self.comp:
left = 0
right = 0
both = 0
for node in self.nodes:
cmp = sum(att.startswith(self.comp) for att in node.attributes["common"])
if cmp > 0:
if cmp < len(node.attributes["common"]):
both += 1
else:
left += 1
else:
right += 1
log.info("Concepts définissant des propriétés "
"de la classification de gauche (-b): %s", left)
log.info("Concepts définissant des propriétés "
"de la classification de droite: %s", right)
log.info("Concepts définissant des propriétés "
"des deux classifications: %s", both)
return pd.Series(stats_lattice)
[docs]
def draw_nodes(self, figsize=(24, 12), scale=False,
colormap="Blues", point=None,
**kwargs):
"""
Draw the root node using :class:`qumin.clustering.node.Node`'s drawing function.
Arguments:
figsize (tuple): Size of the figure.
scale (bool): Whether to display a colorbar. Defaults to False.
colormap (str):
point (?)
**kwargs: All keyword arguments will be passed to `qumin.clustering.node.Node.draw()`.
"""
# Shorthand
node = self.nodes
# Display defaults
mini, maxi = self._pat_range()
cm = matplotlib.cm.get_cmap(colormap)
cnorm = matplotlib.colors.Normalize(vmin=mini, vmax=maxi)
smap = matplotlib.cm.ScalarMappable(norm=cnorm, cmap=cm)
colors = ['#444444', '#aaaaaa']
# Set rendering functions that will be passed downstream.
def custom_zorder(node):
return len(node.attributes["common"])
def leaves_label(node):
n = " ({})".format(str(node.attributes.get("size", 1)))
return ", ".join(node.labels) + n
def point_function(node):
default = {"color": colors[0],
"edgecolors": colors[0],
"zorder": 3,
"marker": matplotlib.markers.MarkerStyle(marker="o")}
if self.comp:
cmp = sum(att.startswith(self.comp) for att in node.attributes["common"])
if cmp > 0: # bicolor marker
if cmp < len(node.attributes["common"]):
default["facecolor"] = colors[1]
default["linewidth"] = 3
del default["color"]
else: # marker in color 2
default["facecolor"] = colors[1]
default["edgecolor"] = colors[1]
del default["color"]
default["s"] = 20 + ((node.attributes.get("size", 0) + 1) / (
self.nodes.attributes.get("size", 0) + 1)) * 100
node.attributes["point_settings"] = default
return default
def default_edge_attr(node, child):
return {"color": colors[0], "zorder": custom_zorder(node)}
# Send settings and draw.
params = dict(leavesfunc=leaves_label,
nodefunc=lambda n: "",
edge_attributes=default_edge_attr,
point=point_function if point else None,
horizontal=False, square=False, layout="qumin",
)
params.update(kwargs)
fig = plt.figure(figsize=figsize) # for export: 12,6
lines, ordered_nodes = node.draw(**params)
if scale:
colors = [smap.to_rgba(i) for i in range(mini, maxi)]
smap.set_array(colors)
plt.colorbar(smap, norm=cnorm)
return fig, lines, ordered_nodes
[docs]
def draw(self, filename, title="Lattice", **kwargs):
"""
Wrapper around :method:`ICLattice.draw_nodes`.
Arguments:
filename (str): filename of the exported plot.
title (str): title of the plot of the file.
**kwargs: keyword arguments are passed to `ÌCLattice.draw_nodes()`.
"""
fig, lines, ordered_nodes = self.draw_nodes(**kwargs)
if title is not None:
fig.suptitle(title)
log.info("Drawing figure to: {}".format(filename))
axis = plt.gca()
axis.set_axis_off()
plt.subplots_adjust(top=1, bottom=0, right=1, left=0,
hspace=0, wspace=0)
plt.margins(0, 0)
axis.xaxis.set_major_locator(plt.NullLocator())
axis.yaxis.set_major_locator(plt.NullLocator())
plt.savefig(filename, bbox_inches='tight', pad_inches=0)
[docs]
def to_html(self, filename, node_formatter=_node_to_label_IC, **kwargs):
"""Draw an interactive lattice using :class:`qumin.clustering.node.Node`'s drawing function and mpld3.
Arguments:
filename (str): filename of the exported html page.
node_formatter (Callable): custom function to format nodes
**kwargs: keyword arguments are passed to `ÌCLattice.draw_nodes()`.
"""
log.info("Exporting HTML figure to: {}".format(filename))
css = _load_external_text("table.css")
fig, lines, ordered_nodes = self.draw_nodes(figsize=(20, 9),
n=4,
scale=False,
point={"s": 50},
# TODO: Something wrong here
interactive=True, **kwargs)
paths = list(
filter(lambda obj: type(obj) is matplotlib.collections.PathCollection,
fig.axes[0].get_children(), ))
lines = list(filter(lambda obj: type(obj) is matplotlib.lines.Line2D and
len(obj.get_xdata(orig=True)) > 1,
fig.axes[0].get_children()))
points_ids = []
corrd_to_points = {}
for p, v in zip(paths, ordered_nodes):
x, y = v.attributes["_x"], v.attributes["_y"]
p_id = mpld3.utils.get_id(p) # ,"pts")
corrd_to_points[(x, y)] = p_id
label = node_formatter(v, comp=self.comp)
points_ids.append(p_id)
tooltip = mpld3.plugins.PointHTMLTooltip(p, [label], css=css)
mpld3.plugins.connect(fig, tooltip)
point_to_artists = defaultdict(set)
lines = sorted(lines, key=lambda l: min(l.get_ydata()))
for l in lines:
parent, child = zip(*l.get_data(orig=True))
childp = corrd_to_points[tuple(child)]
parentp = corrd_to_points[tuple(parent)]
if 0 in child:
point_to_artists[childp] = set()
point_to_artists[parentp].add(childp)
point_to_artists[parentp].add(mpld3.utils.get_id(l))
point_to_artists[parentp].update(point_to_artists[childp])
point_to_artists = {p: list(point_to_artists[p]) for p in point_to_artists}
# root = corrd_to_points[(node.attributes["_x"],node.attributes["_y"])]
mpld3.plugins.connect(fig, _HighlightSubTrees(points_ids, dict(point_to_artists)))
mpld3.save_html(fig, str(filename), template_type="simple")
if not mpld3:
[docs]
def to_html_disabled(*args, **kwargs):
log.warning("mpld3 could not be imported. No html export possible.")
ICLattice.to_html = to_html_disabled
else:
class _HighlightSubTrees(mpld3.plugins.PluginBase):
"""A plugin to highlight lines on hover"""
JAVASCRIPT = _load_external_text("HighlightSubTrees.js")
def __init__(self, points_ids, point_to_artists):
self.css_ = """
path.unfocus
{-webkit-transition: all 0.5s ease;
-moz-transition: all 0.5s ease;
-o-transition: all 0.5s ease;
transition: all 0.5s ease;
fill-opacity: 0.1 !important;
stroke-opacity:0.1 !important;}
.mpld3-ygrid, .mpld3-xgrid, .mpld3-yaxis, .mpld3-xaxis
{display: none !important}
"""
self.dict_ = {"type": "highlightsubtrees",
"points_ids": points_ids,
"points_to_artist": point_to_artists,
"min": 0.2,
"max": 1}