Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor VASP and QChem Enums #1098

Closed
232 changes: 232 additions & 0 deletions emmet-core/dev_scripts/generate_enums.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,232 @@
"""Module to define various calculation types as Enums for VASP."""
from __future__ import annotations
from importlib.resources import files as import_resource_files
from itertools import product

from ruamel.yaml import YAML

_BASE_ENUM_PATH = {
"vasp": import_resource_files("emmet.core.vasp"),
"qchem": import_resource_files("emmet.core.qchem"),
}

for code_base in _BASE_ENUM_PATH:
_BASE_ENUM_PATH[code_base] /= "calc_types"


def get_enum_source(
enum_name: str, doc: str, members: dict, enum_class: str = "ValueEnum"
) -> str:
"""Write python-format enum from a dict of members and metadata.

Parameters
-----------
enum_name : str
Name of the enum.
doc : str
Enum docstr
members : dict
The key-value pair indexed members of the enum.
enum_class : str = "ValueEnum"
The name of the enum class.

Returns
--------
str
"""
items = [f"class {enum_name}({enum_class}):", f' """ {doc} """\n']
items += [f' {const} = "{val}"' for const, val in members.items()]
return "\n".join(items)


def string_bulk_replace(string: str, rules: dict[str, str]) -> str:
"""Perform multiple string replacements subject to a set of rules.

Parameters
-----------
string : str
input string
rules : dict[str,str]
A dict of string replacements, with the characters to replace
as keys, and their replacements as values.

Returns
--------
str
"""
for targ_char, rep_char in rules.items():
string = string.replace(targ_char, rep_char)
return string


def generate_vasp_enums_file(enum_file_name: str | None = None) -> None:
"""
Generate VASP enum members from reference yaml data.

Parameters
-----------
enum_file_name : str
Name of the file to write the enums to.
Defaults to _BASE_ENUM_PATH / vasp_enums.json.gz
"""

with open(_BASE_ENUM_PATH["vasp"] / "calc_types.yaml", "r") as config:
_RUN_TASK_TYPE_DATA = YAML().load(config)

_TASK_TYPES = _RUN_TASK_TYPE_DATA.get("TASK_TYPES")

_RUN_TYPES = set(
rt
for functionals in _RUN_TASK_TYPE_DATA.get("RUN_TYPES", {}).values()
for rt in functionals
)
_RUN_TYPES.update(set(f"{rt}+U" for rt in _RUN_TYPES))

_ENUMS = {
"RunType": {
"_".join(rt.split()).replace("+", "_").replace("-", "_"): rt
for rt in _RUN_TYPES
},
"TaskType": {"_".join(tt.split()): tt for tt in _TASK_TYPES},
"CalcType": {
f"{'_'.join(rt.split()).replace('+','_').replace('-','_')}"
f"_{'_'.join(tt.split())}": f"{rt} {tt}"
for rt, tt in product(_RUN_TYPES, _TASK_TYPES)
},
}

docstr = {}
for enum_name in _ENUMS:
rtc_type = enum_name.split("Calc")[-1].split("Type")[0].lower()
if len(rtc_type) > 0:
rtc_type += " "
docstr[enum_name] = f"VASP calculation {rtc_type}types."

enum_file_name = enum_file_name or str(_BASE_ENUM_PATH["vasp"] / "enums.py")
with open(enum_file_name, "w+") as f:
f.write(
"""\"\"\"
Autogenerated Enums for VASP RunType, TaskType, and CalcType.

Do not edit this by hand to add or remove enums.
Instead, edit
dev_scripts/generate_enums.py
and/or
emmet/core/vasp/calc_types/calc_types.yaml
\"\"\"
from emmet.core.utils import ValueEnum, IgnoreCaseEnum

"""
)
enum_order = (
"RunType",
"TaskType",
"CalcType",
)
for ienum, enum_name in enumerate(enum_order):
sorted_enums = {k: _ENUMS[enum_name][k] for k in sorted(_ENUMS[enum_name])}
f.write(
get_enum_source(
enum_name,
docstr[enum_name],
sorted_enums,
enum_class=("IgnoreCase" if enum_name == "RunType" else "Value")
+ "Enum",
)
)
f.write("\n\n" if ienum < (len(enum_order) - 1) else "\n")


def generate_qchem_enum_file(enum_file_name: str | None = None) -> None:
"""
Generate QChem enum members from reference yaml data.

Original author, Evan Spotte-Smith <[email protected]>

Parameters
-----------
enum_file_name : str
Name of the file to write the enums to.
Defaults to _BASE_ENUM_PATH / qchem_enums.json.gz
"""

with open(_BASE_ENUM_PATH["qchem"] / "calc_types.yaml", "r") as config:
_calc_type_meta = YAML().load(config)

_calc_type_meta["FUNCTIONALS"] = [
rt
for functionals in _calc_type_meta["FUNCTIONAL_CLASSES"].values()
for rt in functionals
]

_LOTS = list()

for funct in _calc_type_meta["FUNCTIONALS"]:
for basis in _calc_type_meta["BASIS_SETS"]:
for solv_model in _calc_type_meta["SOLVENT_MODELS"]:
_LOTS.append(f"{funct}/{basis}/{solv_model}")

_lot_str_replacements = {
"+": "_",
"-": "_",
"(": "_",
")": "_",
"/": "_",
"*": "_d",
}

_ENUMS = {
"LevelOfTheory": {
"_".join(string_bulk_replace(lot, _lot_str_replacements).split()): lot
for lot in _LOTS
},
"TaskType": {
"_".join(tt.split()).replace("-", "_"): tt
for tt in _calc_type_meta["TASK_TYPES"]
},
"CalcType": {
(
"_".join(string_bulk_replace(lot, _lot_str_replacements).split())
+ f"_{'_'.join(tt.split()).replace('-', '_')}"
): f"{lot} {tt}"
for lot, tt in product(_LOTS, _calc_type_meta["TASK_TYPES"])
},
}

docstr = {
"LevelOfTheory": "Levels of theory for calculations in Q-Chem.",
"TaskType": "Calculation task types for Q-Chem.",
"CalcType": "Calculation types (LOT + task type) for Q-Chem.",
}

enum_file_name = enum_file_name or str(_BASE_ENUM_PATH["qchem"] / "enums.py")

with open(enum_file_name, "w+") as f:
f.write(
"""\"\"\"
Autogenerated Enums for Q-Chem LevelOfTheory, TaskType, and CalcType.

Do not edit this by hand to add or remove enums.
Instead, edit
dev_scripts/generate_enums.py
and/or
emmet/core/qchem/calc_types/calc_types.yaml
\"\"\"
from emmet.core.utils import ValueEnum

"""
)
enum_order = (
"LevelOfTheory",
"TaskType",
"CalcType",
)
for ienum, enum_name in enumerate(enum_order):
sorted_enums = {k: _ENUMS[enum_name][k] for k in sorted(_ENUMS[enum_name])}
f.write(get_enum_source(enum_name, docstr[enum_name], sorted_enums))
f.write("\n\n" if ienum < (len(enum_order) - 1) else "\n")


if __name__ == "__main__":
generate_vasp_enums_file()
generate_qchem_enum_file()
7 changes: 1 addition & 6 deletions emmet-core/emmet/core/qchem/calc_types/__init__.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,4 @@
import importlib

try:
importlib.import_module("emmet.core.qchem.calc_types.enums")
except ImportError:
import emmet.core.qchem.calc_types.generate
"""Module defining Q-Chem calculation types."""

from emmet.core.qchem.calc_types.enums import CalcType, LevelOfTheory, TaskType
from emmet.core.qchem.calc_types.utils import (
Expand Down
97 changes: 11 additions & 86 deletions emmet-core/emmet/core/qchem/calc_types/calc_types.py
Original file line number Diff line number Diff line change
@@ -1,93 +1,18 @@
"""Task types and level of theory components for Q-Chem calculations"""

from importlib.resources import files as import_resource_files
from monty.serialization import loadfn

__author__ = "Evan Spotte-Smith <[email protected]>"

_calc_type_config = loadfn(
str(import_resource_files("emmet.core.qchem.calc_types") / "calc_types.yaml")
)

TASK_TYPES = [
"Single Point",
"Force",
"Geometry Optimization",
"Frequency Analysis",
"Frequency Flattening Geometry Optimization",
"Transition State Geometry Optimization",
"Frequency Flattening Transition State Geometry Optimization",
"Unknown",
]
# NB: this would be easier with setattr but the following is less opaque
FUNCTIONAL_CLASSES = _calc_type_config.get("FUNCTIONAL_CLASSES")
TASK_TYPES = _calc_type_config.get("TASK_TYPES")
BASIS_SETS = _calc_type_config.get("BASIS_SETS")
SOLVENT_MODELS = _calc_type_config.get("SOLVENT_MODELS")

FUNCTIONAL_CLASSES = {
"gga": [
"PBE",
# "PBE-D3(BJ)",
# "BLYP",
# "BLYP-D3(BJ)",
"B97-D",
"B97-D3",
# "mPW91",
# "mPW91-D3(BJ)",
# "VV10",
# "rVV10"
],
"meta-gga": [
# "M06-L",
# "M06-L-D3(0)",
# "SCAN",
# "SCAN-D3(BJ)",
# "TPSS",
# "TPSS-D3(BJ)",
"MN12-L",
# "MN12-L-D3(BJ)",
"B97M-V",
"B97M-rV",
],
"hybrid-gga": [
# "PBE0",
# "PBE0-D3(BJ)",
"B3LYP",
# "B3LYP-D3(BJ)",
# "CAM-B3LYP",
# "CAM-B3LYP-D3(0)",
# "mPW1PW91",
# "mPW1PW91-D3(BJ)",
# "wB97X",
"wB97X-D",
"wB97X-D3",
"wB97X-V",
],
"hybrid-meta-gga": [
# "M06-2X",
# "M06-2X-D3(0)",
# "M06-HF",
# "M08-SO",
# "M11",
# "MN15",
# "BMK",
# "BMK-D3(BJ)",
# "TPSSh",
# "TPSSh-D3(BJ)",
# "SCAN0",
# "mPWB1K",
# "mPWB1K-D3(BJ)",
"wB97M-V"
],
}

FUNCTIONALS = [
rt
for functional_class in FUNCTIONAL_CLASSES
for rt in FUNCTIONAL_CLASSES[functional_class]
]

BASIS_SETS = [
"6-31g*",
"def2-SVPD",
"def2-TZVP",
"def2-TZVPD",
"def2-TZVPP",
"def2-TZVPPD",
"def2-QZVPD",
"def2-QZVPPD",
]

# TODO: add ISOSVP and CMIRS once these are implemented in pymatgen and atomate/atomate2
SOLVENT_MODELS = ["VACUUM", "PCM", "SMD"]
FUNCTIONALS = [rt for functionals in FUNCTIONAL_CLASSES.values() for rt in functionals]
Loading
Loading