Source code for matador.utils.chem_utils

# coding: utf-8
# Distributed under the terms of the MIT License.

""" This submodule defines some useful chemical functions and
constants, with a focus on battery materials.

"""


import copy
import warnings
from typing import Dict, Tuple, Any, List

import numpy as np
from matador.data.constants import *  # noqa


EPS = 1e-8


[docs]def get_iupac_ordered_elements(elements: List[str]) -> List[str]: """Returns the list of elements in IUPAC order, i.e., that specified in Table VI of the IUPAC Red Book: Nomenclature of Inorganic Chemistry, IUPAC Recommendations 2005, IUPAC Red Book, RSC Publishing, 2005 [ISBN 0 85404 438 8] Returns: List of elements in the IUPAC order. """ from matador.data.periodic_table import PERIODIC_TABLE return sorted(elements, key=lambda x: PERIODIC_TABLE[x].iupac_order)
[docs]def get_periodic_table(): """Return some periodic table macros.""" periodic_table = dict() periodic_table["I"] = ["Li", "Na", "K", "Rb", "Cs", "Fr"] periodic_table["II"] = ["Be", "Mg", "Ca", "Sr", "Ba", "Ra"] periodic_table["III"] = ["B", "Al", "Ga", "In", "Tl"] periodic_table["IV"] = ["C", "Si", "Ge", "Sn", "Pb"] periodic_table["V"] = ["N", "P", "As", "Sb", "Bi"] periodic_table["VI"] = ["O", "S", "Se", "Te", "Po"] periodic_table["VII"] = ["F", "Cl", "Br", "I", "At"] periodic_table["Tran"] = [ "Sc", "Ti", "V", "Cr", "Mn", "Fe", "Co", "Ni", "Cu", "Zn", "Y", "Zr", "Nb", "Mo", "Tc", "Ru", "Rh", "Pd", "Ag", "Cd", "Hf", "Ta", "W", "Re", "Os", "Ir", "Pt", "Au", "Hg", ] periodic_table["Lan"] = [ "La", "Ce", "Pr", "Nd", "Pm", "Sm", "Eu", "Gd", "Tb", "Dy", "Ho", "Er", "Tm", "Yb", "Lu", ] periodic_table["Act"] = [ "Ac", "Th", "Pa", "U", "Np", "Pu", "Am", "Cm", "Bk", "Cf", "Es", "Fm", "Md", "No", "Lr", ] periodic_table["X"] = [ elem for group in periodic_table for elem in periodic_table[group] ] return periodic_table
[docs]def get_molar_mass(elem): """Returns molar mass of chosen element.""" from matador.data.periodic_table import PERIODIC_TABLE try: return PERIODIC_TABLE[elem].mass except KeyError as exc: raise KeyError(f"{elem} is not a valid element symbol.") from exc
[docs]def get_atomic_number(elem): """Returns atomic number of chosen element.""" from matador.data.periodic_table import PERIODIC_TABLE try: return PERIODIC_TABLE[elem].number except KeyError as exc: raise KeyError(f"{elem} is not a valid element symbol.") from exc
[docs]def get_atomic_symbol(atomic_number): """Returns elemental symbol from atomic number.""" from matador.data.periodic_table import PERIODIC_TABLE try: return list(PERIODIC_TABLE)[atomic_number] except ValueError as exc: raise KeyError( f"{atomic_number} not contained within periodic table from this package!" ) from exc
[docs]def get_concentration(doc, elements, include_end=False): """Returns x for A_x B_{1-x} or x,y for A_x B_y C_z, (x+y+z=1). Parameters: doc (list/dict): structure to evaluate OR matador-style stoichiometry. elements (list): list of element symbols to enforce ordering. Keyword arguments: include_end (bool): whether or not to return the final value, i.e. [x, y, z] rather than [x, y] in the above. Returns: list of float: concentrations of elements in given order. """ if isinstance(doc, dict): if doc.get("stoichiometry") is None: raise RuntimeError("No stoichiometry found.") stoich = doc["stoichiometry"] else: stoich = doc concs = [0.0] * (len(elements) - bool(not include_end)) for _, elem in enumerate(stoich): if (include_end and elem[0] in elements) or ( not include_end and elem[0] in elements[:-1] ): concs[elements.index(elem[0])] = elem[1] / float(get_atoms_per_fu(doc)) return concs
[docs]def get_subscripted_formula(formula: str) -> str: """Subscripts numbers in a chemical formula, returning unicode subscripts.""" return formula.translate(str.maketrans("0123456789", "₀₁₂₃₄₅₆₇₈₉"))
[docs]def get_subscripted_formula_tex(formula: str) -> str: """Subscripts numbers in a chemical formula, returning LaTeX math subscripts.""" return get_formula_from_stoich(get_stoich_from_formula(formula), tex=True)
[docs]def get_num_intercalated(cursor): """Return array of the number of intercalated atoms per host atom from a list of structures, of type defined by the first entry in the structures' concentration vectors. Parameters: cursor (list of dict): structures to evaluate. Returns: ndarray: number of intercalated ions in each structure. """ from .cursor_utils import get_array_from_cursor x = np.zeros((len(cursor))) comps = get_array_from_cursor(cursor, "concentration") for idx, comp in enumerate(comps): if len(comp) > 1: comp = comp[0] if 1 - comp == 0: x[idx] = np.NaN else: x[idx] = comp / (1 - comp) return x
[docs]def get_binary_grav_capacities(x, m_B): """Returns capacity in mAh/g from x/y in A_x B_y and m_B in a.m.u. """ x = np.array(x) if m_B != 0: return x * FARADAY_CONSTANT_Cpermol * Cperg_to_mAhperg / m_B return float("NaN")
[docs]def get_generic_grav_capacity(concs, elements): """Returns gravimetric capacity of <elements[0]> in mAh/g of matador doc. """ tmp_concs = np.array(concs, copy=True) # if no Li, capacity = 0... # tmp_concs /= np.min(concs) x = tmp_concs[0] if x == 0: return 0.0 masses = dict() m_B = 0 for elem in elements: masses[elem] = get_molar_mass(elem) for ind, elem in enumerate(elements): if ind == 0: continue else: m_B += masses[elem] * tmp_concs[ind] Q = get_binary_grav_capacities(x, m_B) return Q
[docs]def get_binary_volumetric_capacity(initial_doc, final_doc): """For initial (delithiated/sodiated) (single element) structure and final (maximally charged) binary structure, calculate the volumetric capacity. Parameters: initial_doc (dict): matador doc of delithiated phase final_doc (dict): matador doc of maximally lithiated phase Returns: volumetric_capacity (float): capacity in mAh/cm^3. """ assert len(initial_doc["stoichiometry"]) == 1 assert len(final_doc["stoichiometry"]) == 2 for _ion in final_doc["stoichiometry"]: if _ion[0] != initial_doc["stoichiometry"][0][0]: ion = _ion[0] for species in final_doc["stoichiometry"]: if species[0] == ion: num_ion = species[1] else: num_B = species[1] num_ions_per_initial_fu = num_ion / num_B volume_per_fu_cm3 = ( initial_doc["cell_volume"] * ANGSTROM_CUBED_TO_CENTIMETRE_CUBED / initial_doc["num_fu"] ) return (num_ions_per_initial_fu / volume_per_fu_cm3) * ( ELECTRON_CHARGE * Cperg_to_mAhperg )
[docs]def get_atoms_per_fu(doc): """Calculate and return the number of atoms per formula unit. Parameters: doc (list/dict): structure to evaluate OR matador-style stoichiometry. """ if "stoichiometry" in doc: return sum([elem[1] for elem in doc["stoichiometry"]]) return sum([elem[1] for elem in doc])
[docs]def get_formation_energy(chempots, doc, energy_key="enthalpy_per_atom"): """From given chemical potentials, calculate the simplest formation energy per atom of the desired document. Note: recursive_get(doc, energy_key) MUST return an energy per atom for the target doc and the chemical potentials. Parameters: chempots (list of dict): list of chempot structures, must be unique. doc (dict): structure to evaluate. Keyword arguments: energy_key (str or list): name of energy field to use to calculate formation energy. Can use a list of keys/subkeys/indices to query nested dicts with `matador.utils.cursor_utils.recursive_get`. Returns: float: formation energy per atom. """ from matador.utils.cursor_utils import recursive_get # warn user if per_atom energy is not found if ( isinstance(energy_key, (list, tuple)) and not any(["per_atom" in str(key) for key in energy_key]) or (isinstance(energy_key, str) and "per_atom" not in energy_key) ): warnings.warn( "Requested energy key {} in get_formation_energy may" " not be per atom, if so results will be incorrect.".format(energy_key) ) try: formation = recursive_get(doc, energy_key) except KeyError as exc: print("Doc {} missing key {}".format(doc.get("source"), energy_key)) raise exc # see if num chempots has been set and try to reuse it if "num_chempots" in doc: num_chempots = doc["num_chempots"] else: num_chempots = get_number_of_chempots(doc, chempots) num_atoms_per_fu = get_atoms_per_fu(doc) for ind, mu in enumerate(chempots): num_atoms_per_mu = get_atoms_per_fu(mu) try: mu_energy = recursive_get(mu, energy_key) except KeyError as exc: raise exc( "Chemical potential {} missing key {}".format(mu["source"], energy_key) ) formation -= mu_energy * num_chempots[ind] * num_atoms_per_mu / num_atoms_per_fu return formation
[docs]def get_number_of_chempots(stoich, chempot_stoichs, precision=5): """Return the required number of each (arbitrary) chemical potentials to construct one formula unit of the input stoichiometry. Uses least-squares as implemented by `numpy.linalg.lstsq` and rounds the output precision based on the `precision` kwarg. Parameters: stoich (list/dict): matador-style stoichiometry, e.g. [['Li', 3], ['P', 1]], or the full document. chempot_stoichs (list/dict): list of stoichiometries of the input chemical potentials, or the full documents. Keyword arguments: precision (int/None): number of decimal places to round answer to. None maintains the precision from `numpy.linalg.lstsq`. Returns: list: number of each chemical potential required to create 1 formula unit. Raises: RuntimeError: if the stoichiometry provided cannot be created with the given chemical potentials. """ # need to support dict, list and Crystal inputs try: stoich = stoich["stoichiometry"] except (TypeError, ValueError): pass try: chempot_stoichs = [mu["stoichiometry"] for mu in chempot_stoichs] except (TypeError, ValueError): pass # find all elements present in the chemical potentials elements = set() for mu in chempot_stoichs: for elem, _ in mu: elements.add(elem) elements = sorted(list(elements)) chempot_matrix = np.asarray( [get_padded_composition(mu, elements) for mu in chempot_stoichs] ) try: solution = np.asarray(get_padded_composition(stoich, elements)) except RuntimeError: raise RuntimeError( "Stoichiometry {} could not be created from chemical potentials {}: missing chempot".format( stoich, chempot_stoichs ) ) try: num_chempots, residuals, _, _ = np.linalg.lstsq( chempot_matrix.T, solution, rcond=None ) except np.linalg.LinAlgError: raise RuntimeError( "Stoichiometry {} could not be created from chemical potentials {}: numpy LinAlg error".format( stoich, chempot_stoichs ) ) # check if lstsq actually found a "solution" if np.abs(np.sum(residuals)) > EPS: raise RuntimeError( "Stoichiometry {} could not be created from chemical potentials {}".format( stoich, chempot_stoichs ) ) # round output array based on user-specified precision if precision is not None: num_chempots[np.where(np.abs(num_chempots) < EPS)] = 0.0 for i, val in enumerate(num_chempots): if np.abs(val - round(val, precision)) < EPS: num_chempots[i] = round(val, precision) # check for sensible numbers in output if np.min(np.sign(num_chempots)) == -1: raise RuntimeError( "Stoichiometry {} could not be created from chemical potentials {}".format( stoich, chempot_stoichs ) ) return num_chempots.tolist()
[docs]def get_stoich(atom_types): """Return integer stoichiometry from atom_types list. Parameters: atom_types (list): list of element symbols of each atom. Returns: list: matador-style stoichiometry, e.g. [['Li', 1], ['P', 2]]. """ from collections import defaultdict from math import gcd stoich = defaultdict(float) for atom in atom_types: if atom not in stoich: stoich[atom] = 0 stoich[atom] += 1 gcd_val = 0 for atom in atom_types: if gcd_val == 0: gcd_val = stoich[atom] else: gcd_val = gcd(stoich[atom], gcd_val) # convert stoichiometry to tuple for fryan temp_stoich = [] try: for key, value in stoich.items(): if float(value) / gcd_val % 1 != 0: temp_stoich.append([key, float(value) / gcd_val]) else: temp_stoich.append([key, value / gcd_val]) except AttributeError: for key, value in stoich.iteritems(): if float(value) / gcd_val % 1 != 0: temp_stoich.append([key, float(value) / gcd_val]) else: temp_stoich.append([key, value / gcd_val]) return sorted(temp_stoich)
[docs]def get_padded_composition(stoichiometry, elements): """Return a list that contains how many of each species in elements exists in the given stoichiometry. e.g. for [['Li', 2], ['O', 1]] with elements ['O', 'Li', 'Ba'], this function will return [1, 2, 0]. Parameters: stoichiometry (list): matador-style stoichiometry, as above. elements (list): order of element labels to pick out. """ composition = [] for element in elements: if not isinstance(element, str): raise RuntimeError("Found invalid element symbol {}".format(element)) for species in stoichiometry: if not isinstance(species, list): raise RuntimeError( "Found invalid stoichiometry {}".format(stoichiometry) ) if species[0] == element: composition.append(species[1]) break elif species[0] not in elements: raise RuntimeError( "Extra element {} in stoichiometry".format(species[0]) ) else: composition.append(0) return composition
[docs]def get_ratios_from_stoichiometry(stoichiometry): """Get a dictionary of pairwise atomic ratios. Parameters: stoichiometry (list): matador-style stoichiometry. Returns: dict: dictionary of pairwise ratios, e.g. for K8SnP4, ratio_dict = {'KSn': 8, 'KP': 2, 'SnP': 0.25, 'SnK': 0.125, 'PSn': 4, 'PK': 0.5}. """ ratio_dict = dict() for _, elem_i in enumerate(stoichiometry): for _, elem_j in enumerate(stoichiometry): if elem_j[0] != elem_i[0]: ratio_dict[elem_i[0] + elem_j[0]] = round( float(elem_i[1]) / elem_j[1], 3 ) return ratio_dict
[docs]def get_stoich_from_formula(formula: str, sort=True): """Convert formula string, e.g. Li2TiP4 into a matador-style stoichiometry, e.g. [['Li', 2], ['Ti', 1], ['P', 4]]. Parameters: formula (str): chemical formula of compound Returns: list: sorted matador-style stoichiometry. """ from math import gcd import re parsed_elements = parse_element_string(formula, stoich=True) elements = [] fraction = [] for i, _ in enumerate(parsed_elements): if not bool(re.search(r"\d", parsed_elements[i])): elements.append(parsed_elements[i]) try: fraction.append(float(parsed_elements[i + 1])) except (ValueError, IndexError): fraction.append(1.0) gcd_val = 0 for frac in fraction: if gcd_val == 0: gcd_val = frac else: gcd_val = gcd(int(frac), int(gcd_val)) fraction = np.asarray(fraction) fraction /= gcd_val stoich = [[elements[ind], fraction[ind]] for ind, _ in enumerate(elements)] if sort: return sorted(stoich) return stoich
[docs]def parse_element_string(elements_str, stoich=False): """Parse element query string with macros. Has to parse braces too, and throw an error if brackets are unmatched. e.g. Parameters: '[VII][Fe,Ru,Os][I]' Returns: ['[VII]', '[Fe,Ru,Os]', '[I]'] e.g.2 Parameters: '[VII]2[Fe,Ru,Os][I]' Returns: ['[VII]2', '[Fe,Ru,Os]', '[I]'] Parameters: elements_str: str, chemical formula, including macros. Keyword arguments: stoich: bool, parse as a stoichiometry, i.e. check for numbers Raises: RuntimeError: if the composition contains unmatched brackets. Returns: list: split list of elements contained in input """ import re valid = False for char in elements_str: if char not in ["[", "]", "{", "}", ",", ":"] and not char.isalnum(): raise RuntimeError("Illegal character {} detected in query.".format(char)) valid = False for char in elements_str: if char.isupper(): valid = True break if not valid: raise RuntimeError( "Composition must contain at least one upper case character." ) elements = [elem for elem in re.split(r"([A-Z][a-z]*)", elements_str) if elem] if stoich: tmp_stoich = elements for ind, strng in enumerate(elements): if not any(char.isdigit() for char in strng): tmp_stoich[ind] = [strng] else: tmp_stoich[ind] = [ elem for elem in re.split(r"([0-9]+)", strng) if elem ] elements = [item for sublist in tmp_stoich for item in sublist] # split macros while ( "[" in elements or "{" in elements or "][" in elements or "}{" in elements or "]{" in elements or "}[" in elements ): tmp_stoich = list(elements) cleaned = True while cleaned: for ind, tmp in enumerate(tmp_stoich): if tmp == "][": del tmp_stoich[ind] tmp_stoich.insert(ind, "[") tmp_stoich.insert(ind, "]") cleaned = True elif tmp == "}{": del tmp_stoich[ind] tmp_stoich.insert(ind, "{") tmp_stoich.insert(ind, "}") cleaned = True elif tmp == "]{": del tmp_stoich[ind] tmp_stoich.insert(ind, "{") tmp_stoich.insert(ind, "]") cleaned = True elif tmp == "}[": del tmp_stoich[ind] tmp_stoich.insert(ind, "[") tmp_stoich.insert(ind, "}") cleaned = True elif ind == len(tmp_stoich) - 1: cleaned = False for ind, tmp in enumerate(tmp_stoich): if tmp == "[": end_bracket = False while not end_bracket: if tmp_stoich[ind + 1] == "]": end_bracket = True tmp_stoich[ind] += tmp_stoich[ind + 1] del tmp_stoich[ind + 1] for ind, tmp in enumerate(tmp_stoich): if tmp == "{": end_bracket = False while not end_bracket: if tmp_stoich[ind + 1] == "}": end_bracket = True tmp_stoich[ind] += tmp_stoich[ind + 1] del tmp_stoich[ind + 1] if "]" in tmp_stoich: tmp_stoich.remove("]") if "}" in tmp_stoich: tmp_stoich.remove("}") if "" in tmp_stoich: tmp_stoich.remove("") elements = tmp_stoich for elem in elements: if "}[" in elem or "]{" in elem: raise RuntimeError("Unmatched brackets in query string") return elements
[docs]def get_root_source(source): """Get the main file source from a doc's source list. Parameters: source (str/list/dict): contents of doc['source'] or the doc itself. Returns: str: "root" filename, e.g. if source = ['KP.cell', 'KP.param', 'KP_specific_structure.res'] then root = 'KP_specific_structure'. """ try: sources = copy.deepcopy(source["source"]) except (KeyError, TypeError): sources = copy.deepcopy(source) if isinstance(source, str): return source src_list = set() for src in sources: if any( [ src.endswith(ext) for ext in [ ".res", ".castep", ".history", ".history.gz", ".phonon", ".phonon_dos", ".bands", ".cif", ".magres", ] ] ): src_list.add(".".join(src.split("/")[-1].split(".")[0:-1])) elif "OQMD" in src.upper(): src_list.add(src) elif "MP-" in src.upper(): src_list.add(src) elif len(sources) == 1: src_list.add(src) elif src == "command_line": src_list.add("command line") if len(src_list) > 1: raise RuntimeError("Ambiguous root source {}".format(sources)) if len(src_list) < 1: raise RuntimeError("Unable to find root source from {}".format(sources)) return list(src_list)[0]
[docs]def get_formula_from_stoich( stoich, elements=None, tex=False, sort=True, unicode_sub=False, latex_sub_style="" ): """Get the chemical formula of a structure from its matador stoichiometry. Parameters: stoich (list): matador-style stoichiometry. Keyword arguments: elements (list): list of element symbols to enforce order. tex (bool): whether to print a LaTeX-compatibile string. unicode_sub (bool): use unicode subscripts latex_sub_style (str): a string to wrap subscripts in, e.g. r"\\mathrm" or r"\\text" (default is blank). Returns: str: the string representation of the chemical formula. """ if tex and unicode_sub: raise RuntimeError( "Cannot produce a formula with both LaTeX and unicode subscripts." ) form = "" if not isinstance(stoich, list): stoich = stoich.tolist() if sort: stoich = sorted(stoich) if elements is None: elements = get_iupac_ordered_elements([s[0] for s in stoich]) if elements is not None: for targ_elem in elements: for elem in stoich: if elem[0] == targ_elem: if elem[1] == 1: form += elem[0] elif int(elem[1]) != 0: if tex: if elem[1] % 1 == 0: elem[1] = int(elem[1]) form += r"{}$_{}{{{}}}$".format( elem[0], latex_sub_style, elem[1] ) else: form += elem[0] + str(int(elem[1])) assert form != "" if unicode_sub: form = get_subscripted_formula(form) return form
[docs]def magres_reference_shifts( magres: Dict[str, Any], reference: Dict[str, Tuple[float, float]], ): """Set chemical shifts inside a matador document from shieldings and a given reference. Parameters: magres: A matador document containing the structure and magres shielding data. reference: Reference values in the form `{element: [gradient, constant]}`. Returns: The input dictionary with the `chemical_shift_isos` key set to the referenced shifts. """ chemical_shifts = magres.get( "chemical_shift_isos", len(magres["atom_types"]) * [None] ) for ind, species in enumerate(magres["atom_types"]): if species in reference: chemical_shift = ( magres["chemical_shielding_isos"][ind] * reference[species][0] ) + reference[species][1] chemical_shifts[ind] = chemical_shift magres["chemical_shift_isos"] = chemical_shifts