# coding: utf-8
# Distributed under the terms of the MIT License.
""" This submodule implements the base PhaseDiagram creator that interfaces
with QueryConvexHull and EnsembleHull.
"""
from traceback import print_exc
import bisect
import scipy.spatial
import numpy as np
from matador.utils.hull_utils import (
barycentric2cart,
vertices2plane,
vertices2line,
FakeHull,
is_point_in_triangle,
)
from matador.utils.chem_utils import get_formula_from_stoich
from matador.utils.cursor_utils import (
get_array_from_cursor,
display_results,
set_cursor_from_array,
)
EPS = 1e-12
[docs]class PhaseDiagram:
"""This class encapsulates the actual phase data, e.g. the actual
energy and compositions found to be stable.
Attributes:
structures (numpy.ndarray): the array passed to init used to
make the hull, with the first (num_species-1) columns
containing normalised concentrations, and the final column
containing formation energy.
convex_hull (scipy.spatial.ConvexHull): the actual convex hull
returned by SciPy.
formation_key (list): index/key specification of formation energy
per atom from top level of each document.
"""
def __init__(self, cursor, formation_key, dimension):
"""Compute the convex hull of data passed, to retrieve hull
distances and thus stable structures.
Parameters:
cursor (list[dict]): list of matador documents to make
phase diagram from.
formation_key (str or list): location of the formation energy
inside each document, either a single key or iterable of
keys to use with `recursive_get`.
"""
self._dimension = dimension
self.cursor = cursor
self.formation_key = formation_key
structures = np.hstack(
(
get_array_from_cursor(cursor, "concentration").reshape(
len(cursor), dimension - 1
),
get_array_from_cursor(cursor, self.formation_key).reshape(
len(cursor), 1
),
)
)
# define self._structure_slice as the filtered array of points actually used to create the convex hull
# which can include/exclude points from the passed structures. This array is the one indexed by
# vertices/simplices in ConvexHull
if self._dimension == 3:
# add a point "above" the hull
# for simple removal of extraneous vertices (e.g. top of 2D hull)
dummy_point = [0.333, 0.333, 1e5]
# if ternary, use all structures, not just those with negative eform for compatibility reasons
self._structure_slice = np.vstack((structures, dummy_point))
else:
# filter out those with positive formation energy, to reduce expense computing hull
self._structure_slice = structures[np.where(structures[:, -1] <= 0 + EPS)]
# filter out "duplicates" in _structure_slice
# this prevents breakages if no structures are on the hull and chempots are duplicated
# but it might be faster to hardcode this case individually
self._structure_slice = np.unique(self._structure_slice, axis=0)
# if we only have the chempots (or worse) with negative formation energy, don't even make the hull
if len(self._structure_slice) <= dimension:
if len(self._structure_slice) < dimension:
raise RuntimeError(
"No chemical potentials on hull... either mysterious use of custom chempots, or worry!"
)
self.convex_hull = FakeHull()
else:
try:
self.convex_hull = scipy.spatial.ConvexHull(self._structure_slice)
except scipy.spatial.qhull.QhullError:
print(self._structure_slice)
print("Error with QHull, plotting formation energies only...")
print_exc()
self.convex_hull = FakeHull()
# remove vertices that have positive formation energy
filtered_vertices = [
vertex
for vertex in self.convex_hull.vertices
if self._structure_slice[vertex, -1] <= 0 + EPS
]
bad_simplices = set()
for ind, simplex in enumerate(self.convex_hull.simplices):
for vertex in simplex:
if vertex not in filtered_vertices:
bad_simplices.add(ind)
filtered_simplices = [
simplex
for ind, simplex in enumerate(self.convex_hull.simplices)
if ind not in bad_simplices
]
self.convex_hull = FakeHull()
self.convex_hull.points = self._structure_slice
self.convex_hull.vertices = list(filtered_vertices)
self.convex_hull.simplices = list(filtered_simplices)
self.hull_dist = self.get_hull_distances(structures, precompute=True)
set_cursor_from_array(self.cursor, self.hull_dist, "hull_distance")
self.structures = structures
self.stable_structures = [
doc for doc in self.cursor if doc["hull_distance"] < EPS
]
def __str__(self):
"""Print underlying phase diagram."""
return display_results(
self.cursor,
hull=True,
colour=False,
energy_key=self.formation_key,
sort=False,
return_str=True,
)
[docs] def get_hull_distances(self, structures, precompute=False, **kwargs):
"""Returns array of distances to pre-computed binary or ternary
hull, from array containing concentrations and energies.
Parameters:
structures (numpy.ndarray): N x n array of concentrations and
enthalpies for N structures, with up to 2 columns of
concentrations and the last column containing the
structure's formation enthalpy.
Keyword arguments:
precompute (bool): whether or not to bootstrap hull
distances from previously computed values at the same
stoichiometry.
Returns:
numpy.ndarray: N-dim array storing distances to
the hull for N structures,
"""
if precompute:
# dict with formula keys, containing tuple of pre-computed enthalpy/atom and hull distance
cached_formula_dists = dict()
cache_hits = 0
cache_misses = 0
if isinstance(structures, list):
structures = np.asarray(structures)
# if only chem pots on hull, dist = energy
if len(self._structure_slice) == self._dimension:
hull_dist = np.ones((len(structures)))
hull_dist = structures[:, -1]
# if binary hull, do binary search
elif self._dimension == 2:
tie_line_comp = self._structure_slice[self.convex_hull.vertices, 0]
tie_line_energy = self._structure_slice[self.convex_hull.vertices, -1]
tie_line_comp = np.asarray(tie_line_comp)
tie_line_energy = tie_line_energy[np.argsort(tie_line_comp)]
tie_line_comp = tie_line_comp[np.argsort(tie_line_comp)]
hull_dist = np.empty((len(structures)))
hull_dist.fill(np.nan)
if precompute:
for ind, _ in enumerate(structures):
formula = get_formula_from_stoich(
self.cursor[ind]["stoichiometry"], sort=True, tex=False
)
if formula in cached_formula_dists:
hull_dist[ind] = (
structures[ind, -1]
- cached_formula_dists[formula][0]
+ cached_formula_dists[formula][1]
)
cache_hits += 1
else:
i = bisect.bisect_left(tie_line_comp, structures[ind, 0])
gradient, intercept = vertices2line(
[
[tie_line_comp[i - 1], tie_line_energy[i - 1]],
[tie_line_comp[i], tie_line_energy[i]],
]
)
# calculate hull_dist
hull_dist[ind] = structures[ind, -1] - (
gradient * structures[ind, 0] + intercept
)
cached_formula_dists[formula] = (
structures[ind, -1],
hull_dist[ind],
)
cache_misses += 1
else:
for ind, _ in enumerate(structures):
i = bisect.bisect_left(tie_line_comp, structures[ind, 0])
gradient, intercept = vertices2line(
[
[tie_line_comp[i - 1], tie_line_energy[i - 1]],
[tie_line_comp[i], tie_line_energy[i]],
]
)
# calculate hull_dist
hull_dist[ind] = structures[ind, -1] - (
gradient * structures[ind, 0] + intercept
)
# if ternary, use barycentric coords
elif self._dimension == 3:
# loop through structures and find which plane they correspond to
# using barycentric coordinates, if a formula has already been
# computed then calculate delta relative to that and skip
self.convex_hull.planes = [
[self._structure_slice[vertex] for vertex in simplex]
for simplex in self.convex_hull.simplices
]
structures_finished = [False] * len(structures)
hull_dist = np.empty(len(structures))
hull_dist.fill(np.nan)
cart_planes_inv = []
planes_height_fn = []
for ind, plane in enumerate(self.convex_hull.planes):
cart_planes = barycentric2cart(plane).T
cart_planes[-1, :] = 1
# if projection of triangle in 2D is a line, do binary search
if np.linalg.det(cart_planes) == 0:
cart_planes_inv.append(None)
planes_height_fn.append(None)
else:
cart_planes_inv.append(np.linalg.inv(cart_planes))
planes_height_fn.append(vertices2plane(plane))
for idx, structure in enumerate(structures):
for ind, plane in enumerate(self.convex_hull.planes):
if cart_planes_inv[ind] is None:
continue
if (
precompute
and get_formula_from_stoich(
self.cursor[idx]["stoichiometry"], sort=True, tex=False
)
in cached_formula_dists
):
formula = get_formula_from_stoich(
self.cursor[idx]["stoichiometry"], sort=True, tex=False
)
if formula in cached_formula_dists:
cache_hits += 1
hull_dist[idx] = (
structures[idx, -1]
- cached_formula_dists[formula][0]
+ cached_formula_dists[formula][1]
)
structures_finished[idx] = True
elif is_point_in_triangle(
structure, cart_planes_inv[ind], preprocessed_triangle=True
):
structures_finished[idx] = True
hull_dist[idx] = planes_height_fn[ind](structure)
if precompute:
cached_formula_dists[
get_formula_from_stoich(
self.cursor[idx]["stoichiometry"],
sort=True,
tex=False,
)
] = (structure[-1], hull_dist[idx])
cache_misses += 1
break
# mask values very close to 0 with 0
hull_dist[np.where(np.abs(hull_dist) < EPS)] = 0
failed_structures = []
for ind, structure in enumerate(structures_finished):
if not structure:
failed_structures.append(ind)
if failed_structures:
raise RuntimeError(
"There were issues calculating the hull distance for {} structures.".format(
len(failed_structures)
)
)
# otherwise, set to zero until proper N-d distance can be implemented
else:
raise NotImplementedError(
"Unable to compute {dimension}-dimensional hull distances (yet) "
"consider breaking your phase diagram into a pseudo-ternary or pseudo-binary system."
)
if np.isnan(hull_dist).any():
raise RuntimeError(
f"Some hull distances failed, found NaNs at {np.isnan(hull_dist, where=True)}"
)
return hull_dist