# coding: utf-8
# Distributed under the terms of the MIT License.
""" This submodule defines classes for computing, combining
and convolving pair distribution functions.
"""
from itertools import combinations_with_replacement
import itertools
import copy
from math import ceil, copysign
import time
import numpy as np
import numba
from matador.utils.cell_utils import frac2cart, cart2volume
from matador.utils.cell_utils import standardize_doc_cell
from matador.utils.chem_utils import get_stoich
from matador.fingerprints.fingerprint import Fingerprint, FingerprintFactory
[docs]class PDF(Fingerprint):
"""This class implements the calculation and comparison of pair
distribution functions.
Attributes:
r_space (ndarray) : 1-D array containing real space grid
gr (ndarray): 1-D array containing total PDF
dr (float): real-space grid spacing in Å
rmax (float): extent of real-space grid in Å
label (str): structure label
elem_gr (dict): dict with pairs of element symbol keys,
containing 1-D arrays of projected PDFs (if calculated)
number_density (float): number density for renormalisation and
comparison with other PDFs
kwargs (dict): arguments used to create PDF
"""
def __init__(self, doc, lazy=False, **kwargs):
"""Initialise parameters and run PDF (unless lazy=True).
Parameters:
doc (dict) : matador document to calculate PDF of
Keyword Arguments:
dr (float) : bin width for PDF (Angstrom) (DEFAULT: 0.01)
gaussian_width (float) : width of Gaussian smearing (Angstrom) (DEFAULT: 0.01)
num_images (int/str) : number of unit cell images include in PDF calculation (DEFAULT: 'auto')
max_num_images (int) : cutoff number of unit cells before crashing (DEFAULT: 50)
rmax (float) : maximum distance cutoff for PDF (Angstrom) (DEFAULT: 15)
projected (bool) : optionally calculate the element-projected PDF
standardize (bool) : standardize cell before calculating PDF
lazy (bool) : if True, calculator is not called when initializing PDF object
timing (bool) : if True, print the total time taken to calculate the PDF
"""
prop_defaults = {
"dr": 0.01,
"gaussian_width": 0.1,
"rmax": 15,
"num_images": "auto",
"style": "smear",
"debug": False,
"timing": False,
"low_mem": False,
"projected": True,
"max_num_images": 50,
"standardize": True,
}
# read and store kwargs
self.kwargs = prop_defaults
self.kwargs.update(
{key: kwargs[key] for key in kwargs if kwargs[key] is not None}
)
# useful data for labelling
structure = copy.deepcopy(doc)
self.spg = structure.get("space_group", "")
if self.kwargs.get("standardize"):
structure = standardize_doc_cell(structure)
self.spg = structure["space_group"]
self.stoichiometry = structure.get(
"stoichiometry", get_stoich(structure["atom_types"])
)
# private variables
self._num_images = self.kwargs.get("num_images")
self._lattice = np.asarray(structure["lattice_cart"])
self._poscart = np.asarray(
frac2cart(structure["lattice_cart"], structure["positions_frac"])
).reshape(-1, 3)
self._types = structure["atom_types"]
self._num_atoms = len(self._poscart)
self._volume = cart2volume(self._lattice)
self._image_vec = None
# public variables
self.rmax = self.kwargs.get("rmax")
self.number_density = self._num_atoms / self._volume
self.dr = self.kwargs.get("dr")
self.r_space = None
self.gr = None
self.elem_gr = None
self.label = None
if self.kwargs.get("label"):
self.label = self.kwargs["label"]
elif "text_id" in structure:
self.label = " ".join(structure["text_id"])
if not lazy:
if self.kwargs.get("timing"):
start = time.time()
self.calc_pdf()
if self.kwargs.get("timing"):
end = time.time()
print("PDF calculated in {:.3f} s".format(end - start))
[docs] def calc_pdf(self):
"""Wrapper to calculate PDF with current settings."""
if self._image_vec is None:
if self.kwargs.get("debug"):
start = time.time()
self._set_image_trans_vectors()
if self.kwargs.get("debug"):
end = time.time()
print("Image vectors length = {}".format(len(self._image_vec)))
print("Set image trans vectors in {} s".format(end - start))
if self.kwargs.get("projected"):
if self.kwargs.get("debug"):
start = time.time()
if self.elem_gr is None:
self._calc_projected_pdf()
if self.kwargs.get("debug"):
end = time.time()
print("Calculated projected PDF {} s".format(end - start))
if self.gr is None:
self._calc_unprojected_pdf()
[docs] def calculate(self):
"""Alias for `self.calc_pdf`."""
self.calc_pdf()
def _calc_distances(self, poscart, poscart_b=None):
"""Calculate PBC distances with cdist.
Parameters:
poscart (numpy.ndarray): array of absolute atomic coordinates.
Keyword Arguments:
poscart_b (numpy.ndarray): absolute positions of a second type of atoms,
where only A-B distances will be calculated.
Returns:
numpy.ndarray: pair d_ij matrix with values > rmax < 1e-12 removed.
"""
from matador.utils.cell_utils import calc_pairwise_distances_pbc
return calc_pairwise_distances_pbc(
poscart,
self._image_vec,
self._lattice,
self.rmax,
filter_zero=True,
compress=True,
poscart_b=poscart_b,
debug=self.kwargs.get("debug"),
)
def _calc_unprojected_pdf(self):
"""Wrapper function to calculate distances and output
a broadened and normalised PDF. Sets self.gr and self.r_space
to G(r) and r respectively.
"""
if self.elem_gr is not None:
self._calc_unprojected_pdf_from_projected()
else:
distances = self._calc_distances(self._poscart)
self.r_space = np.arange(0, self.rmax + self.dr, self.dr)
self.gr = self._get_broadened_normalised_pdf(
distances,
style=self.kwargs.get("style"),
gaussian_width=self.kwargs.get("gaussian_width"),
)
def _calc_projected_pdf(self):
"""Calculate broadened and normalised element-projected PDF of a matador document.
Sets self.elem_gr of e.g. Li2Zn3 to
{
('Li', 'Li'): G_{Li-Li}(r),
('Li', 'Zn'): G_{Li-Zn}(r),
('Zn', 'Zn'): G_{Zn-Zn}(r)
}
"""
# initalise dict of element pairs with correct keys
style = self.kwargs.get("style")
gw = self.kwargs.get("gaussian_width")
self.r_space = np.arange(0, self.rmax + self.dr, self.dr)
elem_gr = dict()
for comb in combinations_with_replacement(set(self._types), 2):
elem_gr[tuple(set(comb))] = np.zeros_like(self.r_space)
for elem_type in elem_gr:
poscart = [
self._poscart[i]
for i in range(len(self._poscart))
if self._types[i] == elem_type[0]
]
poscart_b = (
[
self._poscart[i]
for i in range(len(self._poscart))
if self._types[i] == elem_type[1]
]
if len(elem_type) == 2
else None
)
distances = self._calc_distances(poscart, poscart_b=poscart_b)
elem_gr[elem_type] = len(elem_type) * self._get_broadened_normalised_pdf(
distances, style=style, gaussian_width=gw
)
self.elem_gr = elem_gr
def _calc_unprojected_pdf_from_projected(self):
""" " Reconstruct full PDF from projected."""
self.gr = np.zeros_like(self.r_space)
for key in self.elem_gr:
self.gr += self.elem_gr[key]
@staticmethod
@numba.njit
def _normalize_gr(gr, r_space, dr, num_atoms, number_density):
"""Normalise a broadened PDF, ignoring the Gaussian magnitude."""
norm = 4 * np.pi * (r_space + dr) ** 2 * dr * num_atoms * number_density
return np.divide(gr, norm)
@staticmethod
@numba.njit
def _dist_hist(distances, r_space, dr):
"""Bin the pair-wise distances according to the radial grid.
Parameters:
distances (numpy.ndarray): array of pair-wise distances.
r_space (numpy.ndarray): radial grid
dr (float): bin width.
"""
hist = np.zeros_like(r_space)
for dij in distances:
hist[ceil(dij / dr)] += 1
return hist
def _get_broadened_normalised_pdf(
self, distances, style="smear", gaussian_width=0.1
):
"""Broaden the values provided as distances and return
G(r) and r_space of the normalised PDF.
Parameters:
distances (numpy.ndarray): distances used to calculate PDF
Keyword arguments:
style (str): either 'smear' or 'histogram'
gaussian_width (float): smearing width in Angstrom^1/2
Returns:
gr (np.ndarray): G(r), the PDF of supplied distances
"""
if style == "histogram" or gaussian_width == 0:
gr = self._dist_hist(distances, self.r_space, self.dr)
else:
# otherwise do normal smearing
hist = self._dist_hist(distances, self.r_space, self.dr)
gr = self._broadening_unrolled(hist, self.r_space, gaussian_width)
gr = self._normalize_gr(
gr, self.r_space, self.dr, self._num_atoms, self.number_density
)
return gr
@staticmethod
def _get_image_trans_vectors_auto(lattice, rmax, dr, max_num_images=50):
"""Finds all "images" (integer 3-tuples, supercells) that have
atoms within rmax + dr + longest LV of the parent lattice.
Parameters:
lattice (list): list of lattice vectors.
rmax (float): maximum radial distance.
dr (float): PDF bin width.
Keyword arguments:
max_num_images (int): the greatest integer multiple of LVs to search out to.
Returns:
list: list of int 3-tuples of cells, up to rmax or if max_num_images is exceeded,
just up to 1 cell away.
"""
image_vec = set()
# find longest combination of single LV's
max_trans = 0
_lattice = np.asarray(lattice)
products = list(itertools.product(range(-1, 2), repeat=3))
for prod in products:
trans = prod @ _lattice
length = np.sqrt(np.sum(trans**2))
if length > max_trans:
max_trans = length
unit_vector_lengths = np.sqrt(np.sum(_lattice**2, axis=1))
limits = [
int((dr + rmax + max_trans) / length) for length in unit_vector_lengths
]
for ind, limit in enumerate(limits):
if abs(limit) > max_num_images:
limits[ind] = int(copysign(max_num_images, limit))
products = itertools.product(*(range(-lim, lim + 1) for lim in limits))
for prod in products:
trans = prod @ _lattice
length = np.sqrt(np.sum(trans**2))
if length <= rmax + dr + max_trans:
image_vec.add(prod)
return image_vec
def _set_image_trans_vectors(self):
"""Sets self._image_vec to a list/generator of image translation vectors,
based on self._num_images.
If self._num_images is an integer, create all 3-member integer combinations
up to the value.
If self._num_images is 'auto', create all translation vectors up to length self.rmax.
e.g. self._image_vec = [[1, 0, 1], [0, 1, 1], [1, 1, 1]].
"""
if self._num_images == "auto":
self._image_vec = self._get_image_trans_vectors_auto(
self._lattice,
self.rmax,
self.dr,
max_num_images=self.kwargs.get("max_num_images"),
)
else:
self._image_vec = list(
itertools.product(
range(-self._num_images, self._num_images + 1), repeat=3
)
)
[docs] def get_sim_distance(self, pdf_b, projected=False):
"""Return the similarity between two PDFs."""
return PDFOverlap(self, pdf_b, projected=projected).similarity_distance
[docs] def pdf(self):
"""Return G(r) and the r_space for easy plotting."""
try:
return (self.r_space, self.gr)
except AttributeError:
return (None, None)
[docs] def plot_projected_pdf(self, **kwargs):
"""Plot projected PDFs.
Keyword arguments:
keys (list): plot only a subset of projections, e.g. [('K', )].
other_pdfs (list of PDF): other PDFs to plot.
"""
from matador.plotting.pdf_plotting import plot_projected_pdf
plot_projected_pdf(self, **kwargs)
[docs] def plot(self, **kwargs):
"""Plot PDFs.
Keyword arguments:
other_pdfs (list of PDF): other PDFs to add to the plot.
"""
from matador.plotting.pdf_plotting import plot_pdf
plot_pdf(self, **kwargs)
[docs]class PDFFactory(FingerprintFactory):
"""This class computes PDF objects from a list of structures,
as concurrently as possible. The PDFs are stored under the `pdf`
key inside each structure dict.
Attributes:
nprocs (int): number of concurrent processes.
"""
default_key = "pdf"
fingerprint = PDF
[docs]class PDFOverlap:
"""Calculate the PDFOverlap between two PDF objects,
pdf_a and pdf_b, with number density rescaling.
Attributes:
pdf_a (PDF): first PDF to compare.
pdf_b (PDF): second PDF to compare.
fine_dr (float): fine grid scale on which to compare.
similarity_distance (float): "distance" between PDFs.
overlap_int (float): the value of the overlap integral.
"""
def __init__(self, pdf_a, pdf_b, projected=False):
"""Perform the overlap and similarity distance calculations.
Parameters:
pdf_a (PDF): first PDF to compare.
pdf_b (PDF): second PDF to compare.
Keyword arguments:
projected : if True, attempt to use projected PDFs.
"""
self.pdf_a = pdf_a
self.pdf_b = pdf_b
self.fine_dr = self.pdf_a.dr / 2.0
# initialise with large number
self.similarity_distance = 1e10
self.overlap_int = 0
if projected:
if isinstance(pdf_a.elem_gr, dict) and isinstance(pdf_b.elem_gr, dict):
self.projected_pdf_overlap()
else:
print("Projected PDFs missing, continuing with total.")
self.pdf_overlap()
else:
self.pdf_overlap()
[docs] def pdf_overlap(self):
"""Calculate the overlap of two PDFs via
a simple meshed sum of their difference.
"""
self.overlap_int = 0
self.similarity_distance = 1e10
self.fine_space = np.arange(0, self.pdf_a.rmax, self.fine_dr)
self.fine_gr_a = np.interp(self.fine_space, self.pdf_a.r_space, self.pdf_a.gr)
self.fine_gr_b = np.interp(self.fine_space, self.pdf_b.r_space, self.pdf_b.gr)
# scaling factor here is normalising to number density
density_rescaling_factor = pow(
self.pdf_b.number_density / (self.pdf_a.number_density), 1 / 3
)
rescale_factor = density_rescaling_factor
self.fine_gr_a = np.interp(
self.fine_space, rescale_factor * self.fine_space, self.fine_gr_a
)
self.fine_gr_a = self.fine_gr_a[: int(len(self.fine_space) * 0.75)]
self.fine_gr_b = self.fine_gr_b[: int(len(self.fine_space) * 0.75)]
self.fine_space = self.fine_space[: int(len(self.fine_space) * 0.75)]
overlap_fn = self.fine_gr_a - self.fine_gr_b
worst_case_overlap_int = np.trapz(
np.abs(self.fine_gr_a), dx=self.pdf_a.dr / 2.0
) + np.trapz(np.abs(self.fine_gr_b), dx=self.pdf_b.dr / 2.0)
self.overlap_int = np.trapz(np.abs(overlap_fn), dx=self.pdf_a.dr / 2.0)
self.similarity_distance = self.overlap_int / worst_case_overlap_int
self.overlap_fn = overlap_fn
[docs] def projected_pdf_overlap(self):
"""Calculate the overlap of two projected PDFs via
a simple meshed sum of their difference.
"""
self.fine_space = np.arange(0, self.pdf_a.rmax, self.fine_dr)
self.overlap_int = 0
self.similarity_distance = 1e10
elems = set(key for key in self.pdf_a.elem_gr)
if elems != set(key for key in self.pdf_b.elem_gr):
for key in self.pdf_b.elem_gr:
elems.add(key)
# pad out missing elements with zero PDFs
for key in elems:
if key not in self.pdf_a.elem_gr:
self.pdf_a.elem_gr[key] = np.zeros_like(self.pdf_a.r_space)
if key not in self.pdf_b.elem_gr:
self.pdf_b.elem_gr[key] = np.zeros_like(self.pdf_b.r_space)
self.fine_elem_gr_a, self.fine_elem_gr_b = dict(), dict()
for key in elems:
self.fine_elem_gr_a[key] = np.interp(
self.fine_space, self.pdf_a.r_space, self.pdf_a.elem_gr[key]
)
self.fine_elem_gr_b[key] = np.interp(
self.fine_space, self.pdf_b.r_space, self.pdf_b.elem_gr[key]
)
# scaling factor here is normalising to number density
density_rescaling_factor = pow(
(self.pdf_b.number_density) / (self.pdf_a.number_density), 1 / 3
)
rescale_factor = density_rescaling_factor
for key in elems:
self.fine_elem_gr_a[key] = np.interp(
self.fine_space,
rescale_factor * self.fine_space,
self.fine_elem_gr_a[key],
)
for key in elems:
self.fine_elem_gr_a[key] = self.fine_elem_gr_a[key][
: int(len(self.fine_space) * 0.75)
]
self.fine_elem_gr_b[key] = self.fine_elem_gr_b[key][
: int(len(self.fine_space) * 0.75)
]
self.fine_space = self.fine_space[: int(len(self.fine_space) * 0.75)]
for key in elems:
overlap_fn = self.fine_elem_gr_a[key] - self.fine_elem_gr_b[key]
worst_case_a = np.trapz(
np.abs(self.fine_elem_gr_a[key]), dx=self.pdf_a.dr / 2.0
)
worst_case_b = np.trapz(
np.abs(self.fine_elem_gr_b[key]), dx=self.pdf_b.dr / 2.0
)
worst_case_overlap = worst_case_a + worst_case_b
overlap = np.trapz(np.abs(overlap_fn), dx=self.pdf_a.dr / 2.0)
self.overlap_int += overlap / worst_case_overlap
self.similarity_distance = self.overlap_int / len(elems)
[docs] def plot_diff(self):
"""Simple plot for comparing two PDFs."""
from matador.plotting.pdf_plotting import plot_diff_overlap
plot_diff_overlap(self)
[docs] def plot_projected_diff(self):
"""Simple plot for comparing two projected PDFs."""
from matador.plotting.pdf_plotting import plot_projected_diff_overlap
plot_projected_diff_overlap(self)
[docs]class CombinedProjectedPDF:
"""Take some computed PDFs and add them together."""
def __init__(self, pdf_cursor):
"""Create CombinedPDF object from list of PDFs.
Parameters:
pdf_cursor (:obj:`list` of :obj:`PDF`): list of
PDF objects to combine.
"""
self.dr = min([pdf.dr for pdf in pdf_cursor])
self.rmax = min([pdf.rmax for pdf in pdf_cursor])
self.r_space = np.arange(0, self.rmax + self.dr, self.dr)
self.label = "Combined PDF"
if any([not pdf.elem_gr for pdf in pdf_cursor]):
raise RuntimeError("Projected PDFs not found.")
keys = {key for pdf in pdf_cursor for key in pdf.elem_gr}
self.elem_gr = {key: np.zeros_like(self.r_space) for key in keys}
for pdf in pdf_cursor:
for key in pdf.elem_gr:
self.elem_gr[key] += np.interp(
self.r_space, pdf.r_space, pdf.elem_gr[key]
)
[docs] def plot_projected_pdf(self):
"""Plot the combined PDF."""
from matador.plotting.pdf_plotting import plot_projected_pdf
plot_projected_pdf(self)