Source code for matador.fingerprints.pdf

# coding: utf-8
# Distributed under the terms of the MIT License.

""" This submodule defines classes for computing, combining
and convolving pair distribution functions.

"""


from itertools import combinations_with_replacement
import itertools
import copy
from math import ceil, copysign
import time

import numpy as np
import numba

from matador.utils.cell_utils import frac2cart, cart2volume
from matador.utils.cell_utils import standardize_doc_cell
from matador.utils.chem_utils import get_stoich
from matador.fingerprints.fingerprint import Fingerprint, FingerprintFactory


[docs]class PDF(Fingerprint): """This class implements the calculation and comparison of pair distribution functions. Attributes: r_space (ndarray) : 1-D array containing real space grid gr (ndarray): 1-D array containing total PDF dr (float): real-space grid spacing in Å rmax (float): extent of real-space grid in Å label (str): structure label elem_gr (dict): dict with pairs of element symbol keys, containing 1-D arrays of projected PDFs (if calculated) number_density (float): number density for renormalisation and comparison with other PDFs kwargs (dict): arguments used to create PDF """ def __init__(self, doc, lazy=False, **kwargs): """Initialise parameters and run PDF (unless lazy=True). Parameters: doc (dict) : matador document to calculate PDF of Keyword Arguments: dr (float) : bin width for PDF (Angstrom) (DEFAULT: 0.01) gaussian_width (float) : width of Gaussian smearing (Angstrom) (DEFAULT: 0.01) num_images (int/str) : number of unit cell images include in PDF calculation (DEFAULT: 'auto') max_num_images (int) : cutoff number of unit cells before crashing (DEFAULT: 50) rmax (float) : maximum distance cutoff for PDF (Angstrom) (DEFAULT: 15) projected (bool) : optionally calculate the element-projected PDF standardize (bool) : standardize cell before calculating PDF lazy (bool) : if True, calculator is not called when initializing PDF object timing (bool) : if True, print the total time taken to calculate the PDF """ prop_defaults = { "dr": 0.01, "gaussian_width": 0.1, "rmax": 15, "num_images": "auto", "style": "smear", "debug": False, "timing": False, "low_mem": False, "projected": True, "max_num_images": 50, "standardize": True, } # read and store kwargs self.kwargs = prop_defaults self.kwargs.update( {key: kwargs[key] for key in kwargs if kwargs[key] is not None} ) # useful data for labelling structure = copy.deepcopy(doc) self.spg = structure.get("space_group", "") if self.kwargs.get("standardize"): structure = standardize_doc_cell(structure) self.spg = structure["space_group"] self.stoichiometry = structure.get( "stoichiometry", get_stoich(structure["atom_types"]) ) # private variables self._num_images = self.kwargs.get("num_images") self._lattice = np.asarray(structure["lattice_cart"]) self._poscart = np.asarray( frac2cart(structure["lattice_cart"], structure["positions_frac"]) ).reshape(-1, 3) self._types = structure["atom_types"] self._num_atoms = len(self._poscart) self._volume = cart2volume(self._lattice) self._image_vec = None # public variables self.rmax = self.kwargs.get("rmax") self.number_density = self._num_atoms / self._volume self.dr = self.kwargs.get("dr") self.r_space = None self.gr = None self.elem_gr = None self.label = None if self.kwargs.get("label"): self.label = self.kwargs["label"] elif "text_id" in structure: self.label = " ".join(structure["text_id"]) if not lazy: if self.kwargs.get("timing"): start = time.time() self.calc_pdf() if self.kwargs.get("timing"): end = time.time() print("PDF calculated in {:.3f} s".format(end - start))
[docs] def calc_pdf(self): """Wrapper to calculate PDF with current settings.""" if self._image_vec is None: if self.kwargs.get("debug"): start = time.time() self._set_image_trans_vectors() if self.kwargs.get("debug"): end = time.time() print("Image vectors length = {}".format(len(self._image_vec))) print("Set image trans vectors in {} s".format(end - start)) if self.kwargs.get("projected"): if self.kwargs.get("debug"): start = time.time() if self.elem_gr is None: self._calc_projected_pdf() if self.kwargs.get("debug"): end = time.time() print("Calculated projected PDF {} s".format(end - start)) if self.gr is None: self._calc_unprojected_pdf()
[docs] def calculate(self): """Alias for `self.calc_pdf`.""" self.calc_pdf()
def _calc_distances(self, poscart, poscart_b=None): """Calculate PBC distances with cdist. Parameters: poscart (numpy.ndarray): array of absolute atomic coordinates. Keyword Arguments: poscart_b (numpy.ndarray): absolute positions of a second type of atoms, where only A-B distances will be calculated. Returns: numpy.ndarray: pair d_ij matrix with values > rmax < 1e-12 removed. """ from matador.utils.cell_utils import calc_pairwise_distances_pbc return calc_pairwise_distances_pbc( poscart, self._image_vec, self._lattice, self.rmax, filter_zero=True, compress=True, poscart_b=poscart_b, debug=self.kwargs.get("debug"), ) def _calc_unprojected_pdf(self): """Wrapper function to calculate distances and output a broadened and normalised PDF. Sets self.gr and self.r_space to G(r) and r respectively. """ if self.elem_gr is not None: self._calc_unprojected_pdf_from_projected() else: distances = self._calc_distances(self._poscart) self.r_space = np.arange(0, self.rmax + self.dr, self.dr) self.gr = self._get_broadened_normalised_pdf( distances, style=self.kwargs.get("style"), gaussian_width=self.kwargs.get("gaussian_width"), ) def _calc_projected_pdf(self): """Calculate broadened and normalised element-projected PDF of a matador document. Sets self.elem_gr of e.g. Li2Zn3 to { ('Li', 'Li'): G_{Li-Li}(r), ('Li', 'Zn'): G_{Li-Zn}(r), ('Zn', 'Zn'): G_{Zn-Zn}(r) } """ # initalise dict of element pairs with correct keys style = self.kwargs.get("style") gw = self.kwargs.get("gaussian_width") self.r_space = np.arange(0, self.rmax + self.dr, self.dr) elem_gr = dict() for comb in combinations_with_replacement(set(self._types), 2): elem_gr[tuple(set(comb))] = np.zeros_like(self.r_space) for elem_type in elem_gr: poscart = [ self._poscart[i] for i in range(len(self._poscart)) if self._types[i] == elem_type[0] ] poscart_b = ( [ self._poscart[i] for i in range(len(self._poscart)) if self._types[i] == elem_type[1] ] if len(elem_type) == 2 else None ) distances = self._calc_distances(poscart, poscart_b=poscart_b) elem_gr[elem_type] = len(elem_type) * self._get_broadened_normalised_pdf( distances, style=style, gaussian_width=gw ) self.elem_gr = elem_gr def _calc_unprojected_pdf_from_projected(self): """ " Reconstruct full PDF from projected.""" self.gr = np.zeros_like(self.r_space) for key in self.elem_gr: self.gr += self.elem_gr[key] @staticmethod @numba.njit def _normalize_gr(gr, r_space, dr, num_atoms, number_density): """Normalise a broadened PDF, ignoring the Gaussian magnitude.""" norm = 4 * np.pi * (r_space + dr) ** 2 * dr * num_atoms * number_density return np.divide(gr, norm) @staticmethod @numba.njit def _dist_hist(distances, r_space, dr): """Bin the pair-wise distances according to the radial grid. Parameters: distances (numpy.ndarray): array of pair-wise distances. r_space (numpy.ndarray): radial grid dr (float): bin width. """ hist = np.zeros_like(r_space) for dij in distances: hist[ceil(dij / dr)] += 1 return hist def _get_broadened_normalised_pdf( self, distances, style="smear", gaussian_width=0.1 ): """Broaden the values provided as distances and return G(r) and r_space of the normalised PDF. Parameters: distances (numpy.ndarray): distances used to calculate PDF Keyword arguments: style (str): either 'smear' or 'histogram' gaussian_width (float): smearing width in Angstrom^1/2 Returns: gr (np.ndarray): G(r), the PDF of supplied distances """ if style == "histogram" or gaussian_width == 0: gr = self._dist_hist(distances, self.r_space, self.dr) else: # otherwise do normal smearing hist = self._dist_hist(distances, self.r_space, self.dr) gr = self._broadening_unrolled(hist, self.r_space, gaussian_width) gr = self._normalize_gr( gr, self.r_space, self.dr, self._num_atoms, self.number_density ) return gr @staticmethod def _get_image_trans_vectors_auto(lattice, rmax, dr, max_num_images=50): """Finds all "images" (integer 3-tuples, supercells) that have atoms within rmax + dr + longest LV of the parent lattice. Parameters: lattice (list): list of lattice vectors. rmax (float): maximum radial distance. dr (float): PDF bin width. Keyword arguments: max_num_images (int): the greatest integer multiple of LVs to search out to. Returns: list: list of int 3-tuples of cells, up to rmax or if max_num_images is exceeded, just up to 1 cell away. """ image_vec = set() # find longest combination of single LV's max_trans = 0 _lattice = np.asarray(lattice) products = list(itertools.product(range(-1, 2), repeat=3)) for prod in products: trans = prod @ _lattice length = np.sqrt(np.sum(trans**2)) if length > max_trans: max_trans = length unit_vector_lengths = np.sqrt(np.sum(_lattice**2, axis=1)) limits = [ int((dr + rmax + max_trans) / length) for length in unit_vector_lengths ] for ind, limit in enumerate(limits): if abs(limit) > max_num_images: limits[ind] = int(copysign(max_num_images, limit)) products = itertools.product(*(range(-lim, lim + 1) for lim in limits)) for prod in products: trans = prod @ _lattice length = np.sqrt(np.sum(trans**2)) if length <= rmax + dr + max_trans: image_vec.add(prod) return image_vec def _set_image_trans_vectors(self): """Sets self._image_vec to a list/generator of image translation vectors, based on self._num_images. If self._num_images is an integer, create all 3-member integer combinations up to the value. If self._num_images is 'auto', create all translation vectors up to length self.rmax. e.g. self._image_vec = [[1, 0, 1], [0, 1, 1], [1, 1, 1]]. """ if self._num_images == "auto": self._image_vec = self._get_image_trans_vectors_auto( self._lattice, self.rmax, self.dr, max_num_images=self.kwargs.get("max_num_images"), ) else: self._image_vec = list( itertools.product( range(-self._num_images, self._num_images + 1), repeat=3 ) )
[docs] def get_sim_distance(self, pdf_b, projected=False): """Return the similarity between two PDFs.""" return PDFOverlap(self, pdf_b, projected=projected).similarity_distance
[docs] def pdf(self): """Return G(r) and the r_space for easy plotting.""" try: return (self.r_space, self.gr) except AttributeError: return (None, None)
[docs] def plot_projected_pdf(self, **kwargs): """Plot projected PDFs. Keyword arguments: keys (list): plot only a subset of projections, e.g. [('K', )]. other_pdfs (list of PDF): other PDFs to plot. """ from matador.plotting.pdf_plotting import plot_projected_pdf plot_projected_pdf(self, **kwargs)
[docs] def plot(self, **kwargs): """Plot PDFs. Keyword arguments: other_pdfs (list of PDF): other PDFs to add to the plot. """ from matador.plotting.pdf_plotting import plot_pdf plot_pdf(self, **kwargs)
[docs]class PDFFactory(FingerprintFactory): """This class computes PDF objects from a list of structures, as concurrently as possible. The PDFs are stored under the `pdf` key inside each structure dict. Attributes: nprocs (int): number of concurrent processes. """ default_key = "pdf" fingerprint = PDF
[docs]class PDFOverlap: """Calculate the PDFOverlap between two PDF objects, pdf_a and pdf_b, with number density rescaling. Attributes: pdf_a (PDF): first PDF to compare. pdf_b (PDF): second PDF to compare. fine_dr (float): fine grid scale on which to compare. similarity_distance (float): "distance" between PDFs. overlap_int (float): the value of the overlap integral. """ def __init__(self, pdf_a, pdf_b, projected=False): """Perform the overlap and similarity distance calculations. Parameters: pdf_a (PDF): first PDF to compare. pdf_b (PDF): second PDF to compare. Keyword arguments: projected : if True, attempt to use projected PDFs. """ self.pdf_a = pdf_a self.pdf_b = pdf_b self.fine_dr = self.pdf_a.dr / 2.0 # initialise with large number self.similarity_distance = 1e10 self.overlap_int = 0 if projected: if isinstance(pdf_a.elem_gr, dict) and isinstance(pdf_b.elem_gr, dict): self.projected_pdf_overlap() else: print("Projected PDFs missing, continuing with total.") self.pdf_overlap() else: self.pdf_overlap()
[docs] def pdf_overlap(self): """Calculate the overlap of two PDFs via a simple meshed sum of their difference. """ self.overlap_int = 0 self.similarity_distance = 1e10 self.fine_space = np.arange(0, self.pdf_a.rmax, self.fine_dr) self.fine_gr_a = np.interp(self.fine_space, self.pdf_a.r_space, self.pdf_a.gr) self.fine_gr_b = np.interp(self.fine_space, self.pdf_b.r_space, self.pdf_b.gr) # scaling factor here is normalising to number density density_rescaling_factor = pow( self.pdf_b.number_density / (self.pdf_a.number_density), 1 / 3 ) rescale_factor = density_rescaling_factor self.fine_gr_a = np.interp( self.fine_space, rescale_factor * self.fine_space, self.fine_gr_a ) self.fine_gr_a = self.fine_gr_a[: int(len(self.fine_space) * 0.75)] self.fine_gr_b = self.fine_gr_b[: int(len(self.fine_space) * 0.75)] self.fine_space = self.fine_space[: int(len(self.fine_space) * 0.75)] overlap_fn = self.fine_gr_a - self.fine_gr_b worst_case_overlap_int = np.trapz( np.abs(self.fine_gr_a), dx=self.pdf_a.dr / 2.0 ) + np.trapz(np.abs(self.fine_gr_b), dx=self.pdf_b.dr / 2.0) self.overlap_int = np.trapz(np.abs(overlap_fn), dx=self.pdf_a.dr / 2.0) self.similarity_distance = self.overlap_int / worst_case_overlap_int self.overlap_fn = overlap_fn
[docs] def projected_pdf_overlap(self): """Calculate the overlap of two projected PDFs via a simple meshed sum of their difference. """ self.fine_space = np.arange(0, self.pdf_a.rmax, self.fine_dr) self.overlap_int = 0 self.similarity_distance = 1e10 elems = set(key for key in self.pdf_a.elem_gr) if elems != set(key for key in self.pdf_b.elem_gr): for key in self.pdf_b.elem_gr: elems.add(key) # pad out missing elements with zero PDFs for key in elems: if key not in self.pdf_a.elem_gr: self.pdf_a.elem_gr[key] = np.zeros_like(self.pdf_a.r_space) if key not in self.pdf_b.elem_gr: self.pdf_b.elem_gr[key] = np.zeros_like(self.pdf_b.r_space) self.fine_elem_gr_a, self.fine_elem_gr_b = dict(), dict() for key in elems: self.fine_elem_gr_a[key] = np.interp( self.fine_space, self.pdf_a.r_space, self.pdf_a.elem_gr[key] ) self.fine_elem_gr_b[key] = np.interp( self.fine_space, self.pdf_b.r_space, self.pdf_b.elem_gr[key] ) # scaling factor here is normalising to number density density_rescaling_factor = pow( (self.pdf_b.number_density) / (self.pdf_a.number_density), 1 / 3 ) rescale_factor = density_rescaling_factor for key in elems: self.fine_elem_gr_a[key] = np.interp( self.fine_space, rescale_factor * self.fine_space, self.fine_elem_gr_a[key], ) for key in elems: self.fine_elem_gr_a[key] = self.fine_elem_gr_a[key][ : int(len(self.fine_space) * 0.75) ] self.fine_elem_gr_b[key] = self.fine_elem_gr_b[key][ : int(len(self.fine_space) * 0.75) ] self.fine_space = self.fine_space[: int(len(self.fine_space) * 0.75)] for key in elems: overlap_fn = self.fine_elem_gr_a[key] - self.fine_elem_gr_b[key] worst_case_a = np.trapz( np.abs(self.fine_elem_gr_a[key]), dx=self.pdf_a.dr / 2.0 ) worst_case_b = np.trapz( np.abs(self.fine_elem_gr_b[key]), dx=self.pdf_b.dr / 2.0 ) worst_case_overlap = worst_case_a + worst_case_b overlap = np.trapz(np.abs(overlap_fn), dx=self.pdf_a.dr / 2.0) self.overlap_int += overlap / worst_case_overlap self.similarity_distance = self.overlap_int / len(elems)
[docs] def plot_diff(self): """Simple plot for comparing two PDFs.""" from matador.plotting.pdf_plotting import plot_diff_overlap plot_diff_overlap(self)
[docs] def plot_projected_diff(self): """Simple plot for comparing two projected PDFs.""" from matador.plotting.pdf_plotting import plot_projected_diff_overlap plot_projected_diff_overlap(self)
[docs]class CombinedProjectedPDF: """Take some computed PDFs and add them together.""" def __init__(self, pdf_cursor): """Create CombinedPDF object from list of PDFs. Parameters: pdf_cursor (:obj:`list` of :obj:`PDF`): list of PDF objects to combine. """ self.dr = min([pdf.dr for pdf in pdf_cursor]) self.rmax = min([pdf.rmax for pdf in pdf_cursor]) self.r_space = np.arange(0, self.rmax + self.dr, self.dr) self.label = "Combined PDF" if any([not pdf.elem_gr for pdf in pdf_cursor]): raise RuntimeError("Projected PDFs not found.") keys = {key for pdf in pdf_cursor for key in pdf.elem_gr} self.elem_gr = {key: np.zeros_like(self.r_space) for key in keys} for pdf in pdf_cursor: for key in pdf.elem_gr: self.elem_gr[key] += np.interp( self.r_space, pdf.r_space, pdf.elem_gr[key] )
[docs] def plot_projected_pdf(self): """Plot the combined PDF.""" from matador.plotting.pdf_plotting import plot_projected_pdf plot_projected_pdf(self)