Source code for matador.cli.cli

# coding: utf-8
# Distributed under the terms of the MIT License.

""" This file implements the "matador" command line. """


from matador.query import DBQuery
from matador.hull import QueryConvexHull
from matador.utils.print_utils import print_failure, print_warning, print_notify
from matador.db import make_connection_to_collection
from matador.config import load_custom_settings


[docs]class MatadorCommandLine: """Class that implements the command-line interface to a MongoDB structure repository. """ def __init__(self, *args, **kwargs): """Initialise the query with command line arguments and return results. """ # read args self.kwargs = kwargs self.args = vars(args[0]) self.args["no_quickstart"] = self.kwargs.get("no_quickstart") self.argstr = kwargs.get("argstr") file_exts = ["cell", "res", "pdb", "markdown", "latex", "param", "xsf"] self.export = any([self.args.get(ext) for ext in file_exts]) self.subcommand = self.args.pop("subcmd") if self.subcommand != "import": self.settings = load_custom_settings( config_fname=self.args.get("config"), debug=self.args.get("debug"), no_quickstart=self.args.get("no_quickstart"), ) result = make_connection_to_collection( self.args.get("db"), check_collection=(self.subcommand != "stats"), mongo_settings=self.settings, ) self.client, self.db, self.collections = result if self.subcommand == "stats": self.stats() try: if self.subcommand == "import": from matador.db import Spatula self.importer = Spatula(self.args) if self.subcommand == "query": self.query = DBQuery(self.client, self.collections, **self.args) self.cursor = self.query.cursor if self.subcommand == "swaps": from matador.swaps import AtomicSwapper self.query = DBQuery(self.client, self.collections, **self.args) if self.args.get("hull_cutoff") is not None: self.hull = QueryConvexHull(query=self.query, **self.args) self.swapper = AtomicSwapper(self.hull.hull_cursor, **self.args) else: self.swapper = AtomicSwapper(self.query.cursor, **self.args) self.cursor = self.swapper.cursor if self.subcommand == "refine": from matador.db import Refiner self.query = DBQuery(self.client, self.collections, **self.args) if self.args.get("hull_cutoff") is not None: self.hull = QueryConvexHull(self.query, **self.args) self.refiner = Refiner( self.hull.cursor, self.query.repo, **self.args ) else: self.refiner = Refiner( self.query.cursor, self.query.repo, **self.args ) self.cursor = self.refiner.cursor if self.subcommand == "hull" or self.subcommand == "voltage": self.hull = QueryConvexHull( **self.args, voltage=self.subcommand == "voltage", client=self.client, collections=self.collections ) self.cursor = self.hull.hull_cursor if self.subcommand == "changes": from matador.db import DatabaseChanges if len(self.collections) != 1: raise SystemExit( "Cannot view changes of more than one collection at once." ) if self.args.get("undo"): action = "undo" else: action = "view" changeset = self.args.get("changeset") if changeset is None: changeset = 0 DatabaseChanges( [key for key in self.collections][0], changeset_ind=changeset, action=action, mongo_settings=self.settings, override=kwargs.get("no_quickstart"), ) if self.subcommand == "hulldiff": from matador.hull.hull_diff import diff_hulls if self.args.get("compare") is None: raise SystemExit( "Please specify which hulls to query with --compare." ) diff_hulls(self.client, self.collections, **self.args) if self.export and self.cursor: from matador.export import query2files if self.args.get("write_n") is not None: self.cursor = [ doc for doc in self.cursor if len(doc["stoichiometry"]) == self.args.get("write_n") ] if not self.cursor: print_failure("No structures left to export.") query2files( self.cursor, **self.args, argstr=self.argstr, subcmd=self.subcommand, hash_dupe=True ) if self.args.get("view"): from matador.utils.viz_utils import viz if self.args.get("top") is None: self.top = len(self.cursor) else: self.top = self.args.get("top") if len(self.cursor[: self.top]) > 10: from time import sleep print_warning( "WARNING: opening {} files with ase-gui...".format( len(self.cursor) ) ) print_warning("Please kill script within 3 seconds if undesired...") sleep(3) if len(self.cursor[: self.top]) > 20: print_failure( "You will literally be opening that many windows, " + "I'll give you another 5 seconds to reconsider..." ) sleep(5) print_notify("It's your funeral...") sleep(1) for doc in self.cursor[: self.top]: viz(doc) if self.subcommand != "import": self.client.close() except (RuntimeError, SystemExit, KeyboardInterrupt) as oops: if isinstance(oops, RuntimeError): print_failure(oops) elif isinstance(oops, SystemExit): print_warning(oops) try: self.client.close() except AttributeError: pass raise oops
[docs] def print_report(self): """Print spatula report on current database.""" try: report = self.report.find_one() print( "Database last modified on", report["last_modified"], "with matador", report["version"] + ".", ) except Exception: print_warning( "Failed to print database report: spatula is probably running!" )
[docs] def stats(self): """Print some useful stats about the database.""" if self.args.get("list"): print_notify( str(len(self.db.list_collection_names())) + " collections found in database:\n" ) collstats_list = [] for name in self.db.list_collection_names(): collstats_list.append(self.db.command("collstats", name)) collstats_list[-1]["name"] = name collstats_list = sorted( collstats_list, key=lambda k: k["count"], reverse=True ) print("\t{:^20}\t{:^20}".format("Name", "Number of structures")) for collection in collstats_list: if not collection["name"].startswith("__"): print( "\t{:<20}\t{:>20d}".format( collection["name"], collection["count"] ) ) print("\n") elif self.args.get("delete"): target = self.args.get("db") if isinstance(target, list) and len(target) == 1: target = target[0] else: raise SystemExit("I will only delete one collection at a time...") if target is None: raise SystemExit("Please specify a collection to delete.") if target not in self.db.list_collection_names(): raise SystemExit("No collection named {} was found".format(target)) from getpass import getuser user = getuser() if user not in target: raise SystemExit( "I cannot delete a collection that's name does not start with " "your username, {}".format(user) ) stats = self.db.command("collstats", target) if self.args.get("no_quickstart"): answer = "y" else: answer = input( "Are you sure you want to delete collection {} containing {} " "structures? [y/n]\n".format(target, stats["count"]) ) if answer.lower() == "y": if target == "repo": raise SystemExit("I'm sorry Dave, I'm afraid I can't do that...") print("Deleting collection {}...".format(target)) self.db[target].drop() print("and its changelog...") self.db["__changelog_{}".format(target)].drop() else: raise SystemExit("Nevermind then!") else: comp_list = dict() stats_dict = dict() stats_dict["count"] = 0 stats_dict["avgObjSize"] = 0 stats_dict["storageSize"] = 0 stats_dict["totalIndexSize"] = 0 for collection in self.collections: db_stats_dict = self.db.command("collstats", collection) stats_dict["count"] += db_stats_dict["count"] stats_dict["avgObjSize"] += db_stats_dict["avgObjSize"] stats_dict["storageSize"] += db_stats_dict["storageSize"] stats_dict["totalIndexSize"] += db_stats_dict["totalIndexSize"] print( ( "The collection(s) queried in {} contain {} structures at {:.1f} kB each " "totalling {:.1f} MB with a further {:.1f} MB of indexes." ).format( self.db.name, stats_dict["count"], stats_dict["avgObjSize"] / (1024), stats_dict["storageSize"] / (1024**2), stats_dict["totalIndexSize"] / (1024**2), ) ) for collname in self.collections: cursor = self.collections[collname].find() for doc in cursor: temp = "" for ind, elem in enumerate(sorted(doc["stoichiometry"])): temp += str(elem[0]) if ind != len(doc["stoichiometry"]) - 1: temp += "+" if temp not in comp_list: comp_list[temp] = 0 comp_list[temp] += 1 keys = list(comp_list.keys()) vals = list(comp_list.values()) comp_list = list(zip(keys, vals)) comp_list.sort(key=lambda t: t[1], reverse=True) small_count = 0 first_ind = 1000 cutoff = 100 for ind, comp in enumerate(comp_list): if comp[1] < cutoff: if ind < first_ind: first_ind = ind small_count += comp[1] comp_list = comp_list[:first_ind] comp_list.append(["others < " + str(cutoff), small_count]) comp_list.sort(key=lambda t: t[1], reverse=True) try: from ascii_graph import Pyasciigraph from ascii_graph.colors import Gre, Blu, Red from ascii_graph.colordata import hcolor except ImportError: print("ascii_graph dependency not found, not creating histogram.") else: graph = Pyasciigraph(line_length=80, multivalue=False) thresholds = { int(stats_dict["count"] / 40): Gre, int(stats_dict["count"] / 10): Blu, int(stats_dict["count"] / 4): Red, } data = hcolor(comp_list, thresholds) for line in graph.graph(label=None, data=data): print(line) print("\n") for comp in comp_list: print(comp)
[docs]def main(no_quickstart=False): """Parse all user args and construct a MatadorCommandLine object. Keyword arguments: no_quickstart: no_quickstart all stdin with sensible defaults. """ import argparse from sys import argv from matador import __version__, script_epilog parser = argparse.ArgumentParser( prog="matador", description="MATerial and Atomistic Database Of Refined structures.", epilog=script_epilog, ) parser.add_argument( "--version", action="version", version="matador version " + __version__ + "." ) # define subparsers for self.subcommands subparsers = parser.add_subparsers( title="self.subcommands", description="valid sub-commands", dest="subcmd" ) # define parent parser for global arguments global_flags = argparse.ArgumentParser(add_help=False) # common arguments to all self.subcommands global_flags.add_argument( "--db", nargs="+", help="choose which collection to query" ) global_flags.add_argument( "--debug", action="store_true", help="enable debug printing throughout code." ) global_flags.add_argument( "-conf", "--config", type=str, help="specify custom location of matador config file." "(DEFAULT: $MATADOR_ROOT/config/matador_conf.json)", ) global_flags.add_argument("--devel", action="store_true", help="test devel code.") global_flags.add_argument( "--profile", action="store_true", help="run code profiler." ) global_flags.add_argument( "-q", "--quiet", action="store_true", help="redirect most output to /dev/null." ) # define all other flags by group structure_flags = argparse.ArgumentParser(add_help=False) structure_flags.add_argument( "-c", "--composition", type=str, nargs="+", help="find all structures containing exclusively the given " "elements, e.g. LiSi. Macros defined for groups [I]-[VII] " "[Tran] [Lan] and [Act], used with square brackets.", ) structure_flags.add_argument( "-int", "--intersection", action="store_true", help="query the intersection of compositions instead of the union " "e.g. -c LiSnS -int queries Li, Sn, S, LiSn, LiS & LiSnS.", ) structure_flags.add_argument( "-n", "--num_species", type=int, help="find all structures containing a certain number of species.", ) structure_flags.add_argument( "-f", "--formula", type=str, nargs="+", help="query a particular chemical formula, e.g. GeTeSi3", ) structure_flags.add_argument( "-i", "--id", type=str, nargs="+", help="specify a particular structure by its text_id", ) structure_flags.add_argument( "-ac", "--calc_match", action="store_true", help="display calculations of the same accuracy as specified id", ) structure_flags.add_argument( "-kpttol", "--kpoint_tolerance", type=float, help="kpoint tolerance for calculation matches (DEFAULT: +/- 0.01 1/Å)", ) structure_flags.add_argument( "-presstol", "--pressure_tolerance", type=float, help="pressure tolerance for calculation matches (DEFAULT: +/- 0.5 GPa)", ) structure_flags.add_argument( "-z", "--num_fu", type=int, help="query a calculations with more than n formula units", ) structure_flags.add_argument( "-sg", "--space_group", help="query a particular space group" ) structure_flags.add_argument( "-u", "--uniq", type=float, nargs="?", const=0.1, help="float, return only unique structures (filtered by PDF " "overlap), to this tolerance (DEFAULT: 0.1)", ) structure_flags.add_argument( "-p", "--pressure", type=float, help="specify an isotropic external pressure to search for, e.g. 10 (GPa)", ) structure_flags.add_argument( "-pf", "--partial-formula", action="store_true", help="stoichiometry/composition queries will include other unspecified species, e.g. " "-pf search for Li will query any structure containing Li, not just pure Li.", ) structure_flags.add_argument( "--tags", nargs="+", type=str, help=("search for manual tags") ) structure_flags.add_argument( "--doi", type=str, help=("search for DOI in format xxxx/xxxx") ) structure_flags.add_argument( "-icsd", "--icsd", type=int, const=0, nargs="?", help=("search for an ICSD CollCode"), ) structure_flags.add_argument( "-ss", "--src_str", type=str, help=("search for a string inside the structure sources"), ) structure_flags.add_argument( "-root", "--root_src", type=str, help=("search for a root_source string of the structure"), ) structure_flags.add_argument( "-encap", "--encapsulated", action="store_true", help="query only structures encapsulated in a carbon nanotube.", ) structure_flags.add_argument( "-cntr", "--cnt_radius", type=float, help="specify the radius of the encapsulating nanotube to within 0.01 Å", ) structure_flags.add_argument( "-cntv", "--cnt_vector", type=int, nargs="+", help="specify the chiral vector of the encapsulating nanotube", ) structure_flags.add_argument( "-ecut", "--cutoff", type=float, nargs="+", help="specify the min. and optionally max. planewave cutoff.", ) structure_flags.add_argument( "-geom", "--geom_force_tol", type=float, nargs="+", help="force tolerance in eV/Å to query for calc matches.", ) structure_flags.add_argument( "-grid", "--grid_scale", type=float, nargs="+", help="grid scale to query for calc matches.", ) structure_flags.add_argument( "-finegrid", "--fine_grid_scale", type=float, nargs="+", help="fine grid scale to query for calc matches.", ) structure_flags.add_argument( "--sedc", type=str, help="specify the dispersion correction scheme, e.g. TS or null.", ) structure_flags.add_argument( "-xc", "--xc_functional", type=str, help="specify an xc-functional to query (case-insensitive).", ) structure_flags.add_argument( "-kpts", "--mp_spacing", type=float, help="specify an MP grid spacing in 2π/Å units, e.g. 0.05, will return all values " "structures with value within --kpt_tol", ) structure_flags.add_argument( "--spin", type=str, help="specifiy whether to query non-spin-polarized (0) calcs or spin polarized calcs " "(!=1), or lump them both together with `any`", ) structure_flags.add_argument( "--loose", action="store_true", help="loosely matches with calc_match, i.e. only matches pspot and xc_functional", ) structure_flags.add_argument( "--ignore_warnings", action="store_true", help="includes possibly bad structures", ) structure_flags.add_argument( "--field", type=str, action="append", help="name of arbitrary field to query" ) structure_flags.add_argument( "--filter", nargs="+", action="append", help="specify either float [min, max] or a string/float value.", ) material_flags = argparse.ArgumentParser(add_help=False) material_flags.add_argument( "-hc", "--hull_cutoff", type=float, help="return only structures within a certain distance from hull in eV/atom", ) material_flags.add_argument( "-lc", "--label_cutoff", nargs="+", type=float, help="label only structures within a certain distance from hull in eV/atom", ) material_flags.add_argument( "--biggest", action="store_true", help="use the largest subset of structures to create a hull", ) material_flags.add_argument( "--volume", action="store_true", help="plot a volume curve from convex hull (currently limited to binaries)", ) material_flags.add_argument( "--chempots", type=float, nargs="+", help="manually specify chem pots as enthalpy per atom for a rough hull.", ) plot_flags = argparse.ArgumentParser(add_help=False) plot_flags.add_argument( "--pdf", action="store_true", help="save pdf rather than showing plot in X" ) plot_flags.add_argument( "--png", action="store_true", help="save png rather than showing plot in X" ) plot_flags.add_argument( "--csv", action="store_true", help="save plotting data to separate csv files" ) plot_flags.add_argument("--labels", action="store_true", help="label hull plots") plot_flags.add_argument( "--svg", action="store_true", help="save svg rather than showing plot in X" ) plot_flags.add_argument( "--subplot", action="store_true", help="plot combined hull and voltage graph" ) plot_flags.add_argument("--no_plot", action="store_true", help="suppress plotting") plot_flags.add_argument( "--capmap", action="store_true", help="plot heat map of gravimetric capacity" ) plot_flags.add_argument( "--sampmap", action="store_true", help="plot heat map of concentration sampling" ) plot_flags.add_argument( "--efmap", action="store_true", help="plot heat map of formation energy" ) plot_flags.add_argument( "--pathways", action="store_true", help="plot line from stable B_x C_y to pure A in ABC ternary.", ) plot_flags.add_argument( "--expt", type=str, help="enter experimental voltage curve .csv file for plotting.", ) plot_flags.add_argument( "--expt_label", type=str, help="label for experimental data on voltage curve." ) import_flags = argparse.ArgumentParser(add_help=False) import_flags.add_argument( "-d", "--dryrun", action="store_true", help="run the importer without connecting to the database", ) import_flags.add_argument( "-v", "--verbosity", type=int, help="enable verbose output", default=0 ) import_flags.add_argument( "-f", "--force", action="store_true", help="override main database protection" ) import_flags.add_argument( "-t", "--tags", nargs="+", type=str, help="set user tags, e.g. nanotube, project name", ) import_flags.add_argument( "--recent_only", action="store_true", help="sort files by creation date (st_ctime) and " "stop importing after a duplicate is found in the database.", ) import_flags.add_argument( "-s", "--scan", action="store_true", help="only scan the database for new structures, do not import new structures", ) import_flags.add_argument( "-p", "--prototype", action="store_true", help="create a database of prototype structures that contain no DFT calculations", ) changes_flags = argparse.ArgumentParser(add_help=False) changes_flags.add_argument( "-c", "--changeset", type=int, help="changeset number to query" ) changes_flags.add_argument( "-r", "--revert", type=int, help="revert database to specified changeset" ) changes_flags.add_argument( "-u", "--undo", action="store_true", help="undo changeset" ) collection_flags = argparse.ArgumentParser(add_help=False) collection_flags.add_argument( "--to", type=str, help="the text_id of a structure with the desired parameters" ) collection_flags.add_argument( "--with", type=str, help=( "the seedname (must be within pwd) of cell and param " + "files to use for swaps" ), ) collection_flags.add_argument( "--prefix", type=str, help="add a prefix to all file names to write out (auto-appended with an underscore", ) query_flags = argparse.ArgumentParser(add_help=False) query_flags.add_argument( "-s", "--summary", action="store_true", help="show only the ground state for each stoichiometry.", ) query_flags.add_argument( "-t", "--top", type=int, help="number of structures to show/write (DEFAULT: 10)" ) query_flags.add_argument( "-dE", "--delta_E", type=float, help="maximum distance from ground state structure to show/write in eV/atom", ) query_flags.add_argument( "-d", "--details", action="store_true", help="show as much detail about calculation as possible", ) query_flags.add_argument( "-pa", "--per_atom", action="store_true", help="show quantities per atom not per fu.", ) query_flags.add_argument( "-ef", "--eform", action="store_true", help="print formation energy not hull distance.", ) query_flags.add_argument( "-dt", "--time", type=int, help="query only structures added before this time in days", ) query_flags.add_argument( "-avail", "--available_values", type=str, help="list all values of field in query results", ) query_flags.add_argument( "--use_source", default=False, action="store_true", help="show the source rather than database ID", ) query_flags.add_argument( "--since", action="store_true", help="query only structures added after time specified by --time in days", ) query_flags.add_argument( "--source", action="store_true", help="print filenames from which structures were wrangled", ) query_flags.add_argument( "-v", "--view", action="store_true", help="quickly view a structure/structures with ase-gui", ) query_flags.add_argument( "--cell", action="store_true", help="export query to .cell files in folder name from query string", ) query_flags.add_argument( "--param", action="store_true", help="export query to .param files in folder name from query string", ) query_flags.add_argument( "--res", action="store_true", help="export query to .res files in folder name from query string", ) query_flags.add_argument( "--json", action="store_true", help="export query to raw json files in folder name from query string", ) query_flags.add_argument( "--pdb", action="store_true", help="export query to .pdb files in folder name from query string", ) query_flags.add_argument( "--xsf", action="store_true", help="export query to .xsf files in folder name from query string", ) query_flags.add_argument( "--markdown", action="store_true", help="export query summary to a markdown file", ) query_flags.add_argument( "--latex", action="store_true", help="export query summary to a LaTeX table" ) query_flags.add_argument( "--write_n", type=int, help="export only those structures with n species" ) swap_flags = argparse.ArgumentParser(add_help=False) swap_flags.add_argument( "-sw", "--swap", type=str, nargs="+", help="swap all atoms in structures from a query from the first n-1 species to the nth, " "e.g. -sw NAs will swap all N to As, -sw NAs:LiNa will swap all N to As, and all Li " "to Na, and -sw [V]As:[Li,K,Rb]Na will swap all group V elements to As and all of Li," "K and Rb to Na.", ) diff_flags = argparse.ArgumentParser(add_help=False) diff_flags.add_argument( "-cmp", "--compare", type=str, nargs="+", help="diff phase diagrams between two different times, in standard time format, " "e.g. `--compare 1y2m5d3h` will compare the present hull with that of 1 year, 2 " "months, 5 days and 3 hours ago, and `--compare 3d 2d` will compare three days ago " "to two days ago.", ) refine_flags = argparse.ArgumentParser(add_help=False) refine_flags.add_argument( "-task", "--task", type=str, help=( "refine subtask to perform: options are spg, elem_set, tag, doi, source, " "pspot or raw or sub" ), ) refine_flags.add_argument( "-mode", "--mode", type=str, help="mode of refinement: options are display, set and overwrite", ) refine_flags.add_argument( "-symprec", "--symprec", type=float, help="spglib symmetry precision for refinement", ) refine_flags.add_argument( "--new_tag", type=str, help="new tag to add to structures in query" ) refine_flags.add_argument( "--new_doi", type=str, help="new doi to add to structures in query" ) stats_flags = argparse.ArgumentParser(add_help=False) stats_flags.add_argument( "-l", "--list", action="store_true", help="list all collections, their sizes, and owners", ) stats_flags.add_argument( "--delete", action="store_true", help="try to delete collection specified by --db", ) # define subcommand parsers and their arguments # matador stats subparsers.add_parser( "stats", help="print some stats about the database.", parents=[global_flags, stats_flags], ) # matador query subparsers.add_parser( "query", help="query and extract structures from the database", parents=[global_flags, query_flags, structure_flags], ) # matador import subparsers.add_parser( "import", help="import new structures in folder into database", parents=[global_flags, import_flags], ) # matador hull subparsers.add_parser( "hull", help="create a convex hull from query results (currently limited to binaries and ternaries)", parents=[ global_flags, structure_flags, material_flags, plot_flags, query_flags, ], ) # matador voltage subparsers.add_parser( "voltage", help="plot a voltage curve from query results (currently limited to binaries and ternaries)", parents=[ global_flags, structure_flags, material_flags, plot_flags, query_flags, ], ) # matador changes subparsers.add_parser( "changes", help="view database changelog or undo additions to database (NB: not deletions!)", parents=[global_flags, changes_flags], ) # matador hulldiff subparsers.add_parser( "hulldiff", help="diff two convex hulls with the --compare flag.", parents=[ global_flags, structure_flags, material_flags, plot_flags, query_flags, diff_flags, ], ) # matador swaps subparsers.add_parser( "swaps", help="perform atomic swaps on query results", parents=[ global_flags, collection_flags, query_flags, structure_flags, material_flags, swap_flags, ], ) # matador refine subparsers.add_parser( "refine", help="update structures in the database according to specified --task", parents=[ global_flags, query_flags, structure_flags, refine_flags, material_flags, ], ) parsed_args = parser.parse_args() vars_args = vars(parsed_args) # check for inconsistent argument combinations if vars_args.get("intersection") and vars_args.get("composition") is None: raise SystemExit("--intersection requires --composition.") if ( vars_args.get("subcmd") == "stats" and vars_args.get("list") and vars_args.get("delete") ): raise SystemExit("Cannot use -l/--list and --delete") if vars_args.get("field") and vars_args.get("filter") is None: raise SystemExit("--field requires --filter.") if vars_args.get("subcmd") == "hull" and vars_args.get("composition") is None: raise SystemExit("hull requires --composition") if vars_args.get("calc_match") and vars_args.get("id") is None: raise SystemExit( "calc_match requires specification of a text_id with -i, exiting..." ) if vars_args.get("profile"): import cProfile import pstats from sys import version_info profiler = cProfile.Profile() profiler.enable() MatadorCommandLine(parsed_args, argstr=argv[1:], no_quickstart=no_quickstart) if vars_args.get("profile"): profiler.disable() fname = "matador-{}-{}.{}.{}".format( __version__, version_info.major, version_info.minor, version_info.micro ) profiler.dump_stats(fname + ".prof") with open(fname + ".pstats", "w") as fp: stats = pstats.Stats(profiler, stream=fp).sort_stats("cumulative") stats.print_stats()
if __name__ == "__main__": main()