From 9ea326d2715b18aa42cf7d896b6a85911a77a212 Mon Sep 17 00:00:00 2001 From: nwinner Date: Wed, 26 Oct 2022 12:45:35 -0700 Subject: [PATCH 01/50] Re-add defects --- src/atomate2/cp2k/builders/defect.py | 0 src/atomate2/cp2k/flows/defect.py | 245 +++++++++++++++++++ src/atomate2/cp2k/jobs/defect.py | 119 +++++++++ src/atomate2/cp2k/schemas/defect.py | 345 +++++++++++++++++++++++++++ src/atomate2/cp2k/sets/defect.py | 55 +++++ 5 files changed, 764 insertions(+) create mode 100644 src/atomate2/cp2k/builders/defect.py create mode 100644 src/atomate2/cp2k/flows/defect.py create mode 100644 src/atomate2/cp2k/jobs/defect.py create mode 100644 src/atomate2/cp2k/schemas/defect.py create mode 100644 src/atomate2/cp2k/sets/defect.py diff --git a/src/atomate2/cp2k/builders/defect.py b/src/atomate2/cp2k/builders/defect.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/src/atomate2/cp2k/flows/defect.py b/src/atomate2/cp2k/flows/defect.py new file mode 100644 index 0000000000..7f48d318a2 --- /dev/null +++ b/src/atomate2/cp2k/flows/defect.py @@ -0,0 +1,245 @@ + +"""Flows used in the calculation of defect properties.""" + +from __future__ import annotations +from copy import deepcopy + +import logging +from dataclasses import dataclass, field +from typing import Iterable, Literal, Mapping +from pathlib import Path +from numpy.typing import NDArray +import itertools + +from jobflow import Flow, Job, Maker, OutputReference, job +from pymatgen.core.structure import Structure +from pymatgen.io.common import VolumetricData +from pymatgen.entries.computed_entries import ComputedStructureEntry +from pymatgen.analysis.defects.core import Defect +from pymatgen.analysis.defects.thermo import DefectEntry +from pymatgen.analysis.defects.supercells import get_sc_fromstruct + +from atomate2.cp2k.jobs.base import BaseCp2kMaker +from atomate2.cp2k.jobs.core import StaticMaker, HybridStaticMaker, RelaxMaker, HybridRelaxMaker, CellOptMaker, HybridCellOptMaker + +from atomate2.cp2k.schemas.defect import DefectDoc +from atomate2.cp2k.sets.core import ( + StaticSetGenerator, RelaxSetGenerator, CellOptSetGenerator +) + +from atomate2.cp2k.sets.defect import ( + DefectStaticSetGenerator, DefectRelaxSetGenerator, DefectCellOptSetGenerator, + DefectHybridStaticSetGenerator, DefectHybridRelaxSetGenerator, DefectHybridCellOptSetGenerator +) +from atomate2.cp2k.jobs.defect import ( + BaseDefectMaker, DefectStaticMaker, DefectRelaxMaker, DefectCellOptMaker, + DefectHybridStaticMaker, DefectHybridRelaxMaker, DefectHybridCellOptMaker +) + +from atomate2.cp2k.flows.core import HybridStaticFlowMaker, HybridRelaxFlowMaker, HybridCellOptFlowMaker + +logger = logging.getLogger(__name__) + +# TODO close to being able to put this in common. Just need a switch that decides which core flow/job to use based on software +@dataclass +class FormationEnergyMaker(Maker): + """ + Run a collection of defect jobs and (possibly) the bulk supercell + for determination of defect formation energies. + + Parameters + ---------- + name: This flow's name. i.e. "defect formation energy" + run_bulk: whether to run the bulk supercell as a static ("static") + calculation, a full relaxation ("relax"), or to skip it (False) + hybrid_functional: If provided, this activates hybrid version of the + workflow. Provide functional as a parameter that the input set + can recognize. e.g. "PBE0" or "HSE06" + initialize_with_pbe: If hybrid functional is provided, this enables + the use of a static PBE run before the hybrid calc to provide a + starting guess for CP2K HF module. + supercell_matrix: If provided, the defect supercell wil lbe created + by this 3x3 matrix. Else other parameters will be used. + max_atoms: Maximum number of atoms allowed in the supercell. + min_atoms: Minimum number of atoms allowed in the supercell. + min_length: Minimum length of the smallest supercell lattice vector. + force_diagonal: If True, return a transformation with a diagonal transformation matrix. + """ + + name: str = "defect formation energy" + run_bulk: Literal["static", "relax"] | bool = field(default="static") + hybrid_functional: str | None = field(default=None) + initialize_with_pbe: bool = field(default=True) + + supercell_matrix: NDArray = field(default=None) + min_atoms: int = field(default=80) + max_atoms: int = field(default=240) + min_length: int = field(default=10) + force_diagonal: bool = field(default=False) + + def __post_init__(self): + if self.run_bulk: + if self.run_bulk == 'relax': + if self.hybrid_functional: + self.bulk_maker = HybridCellOptFlowMaker( + initialize_with_pbe=self.initialize_with_pbe, + hybrid_functional=self.hybrid_functional, + hybrid_maker=HybridCellOptMaker( + input_set_generator=DefectHybridCellOptSetGenerator() + ) + ) + else: + self.bulk_maker = CellOptMaker( + input_set_generator=DefectCellOptSetGenerator() + ) + elif self.run_bulk == "static": + if self.hybrid_functional: + self.bulk_maker = HybridStaticFlowMaker( + hybrid_functional=self.hybrid_functional, + hybrid_maker=HybridStaticMaker( + input_set_generator=DefectHybridStaticSetGenerator() + ) + ) + else: + self.bulk_maker = StaticMaker( + input_set_generator=DefectStaticSetGenerator() + ) + + if self.hybrid_functional: + self.def_maker = HybridRelaxFlowMaker( + hybrid_functional=self.hybrid_functional, + initialize_with_pbe=self.initialize_with_pbe, + initialize_maker=DefectStaticMaker(), + hybrid_maker=HybridRelaxMaker() + ) + else: + self.def_maker = DefectRelaxMaker() + + + self.def_maker.supercell_matrix = self.supercell_matrix + self.def_maker.max_atoms = self.max_atoms + self.def_maker.min_atoms = self.min_atoms + self.def_maker.min_length = self.min_length + self.def_maker.force_diagonal = self.force_diagonal + + def make( + self, defects: Iterable[Defect], + run_all_charges: bool = False, + dielectric: NDArray | int | float | None = None, + prev_cp2k_dir: str | Path | None = None): + """Make a flow to run multiple defects in order to calculate their formation + energy diagram. + + Parameters + ---------- + defects: list[Defect] + List of defects objects to calculate the formation energy diagram for. + prev_cp2k_dir: str | Path | None + If provided, this acts as prev_dir for the bulk calculation only + Returns + ------- + flow: Flow + The workflow to calculate the formation energy diagram. + """ + jobs, defect_outputs = [], {} + defect_outputs = {defect.name: {} for defect in defects} # TODO DEFECT NAMES ARE NOT UNIQUE HASHES + bulk_structure = ensure_defects_same_structure(defects) + + sc_mat = self.supercell_matrix if self.supercell_matrix else \ + get_sc_fromstruct( + bulk_structure, self.min_atoms, + self.max_atoms, self.min_length, + self.force_diagonal,) + + if self.run_bulk: + bulk_job = self.bulk_maker.make(bulk_structure * sc_mat, prev_cp2k_dir=prev_cp2k_dir) + jobs.append(bulk_job) + + for defect in defects: + chgs = defect.get_charge_states() if run_all_charges else [0] + for charge in chgs: + # write some provenances data in info.json file + info = {"defect": deepcopy(defect), "supercell_matrix": sc_mat} + defect_job = self.def_maker.make(defect=deepcopy(defect), charge=charge) + defect_job.update_maker_kwargs( + {"_set": {"write_additional_data->info:json": info}}, dict_mod=True + ) + jobs.append(defect_job) + defect_outputs[defect.name][int(charge)] = (defect, defect_job.output) + + jobs.append(collect_defect_outputs( + defect_outputs=defect_outputs, + bulk_output=bulk_job.output, + dielectric=dielectric + ) + ) + + return Flow( + jobs=jobs, + name=self.name, + output=jobs[-1].output, + ) + +# TODO this is totally code agnostic and should be in common +@job +def collect_defect_outputs( + defect_outputs: Mapping[str, Mapping[int, OutputReference]], bulk_output: OutputReference, dielectric: NDArray | int | float | None +) -> dict: + """Collect all the outputs from the defect calculations. + This job will combine the structure and entry fields to create a + ComputerStructureEntry object. + Parameters + ---------- + defects_output: + The output from the defect calculations. + bulk_sc_dir: + The directory containing the bulk supercell calculation. + dielectric: + The dielectric constant used to construct the formation energy diagram. + """ + outputs = {"results": {}} + if not dielectric: + logger.warn("Dielectric constant not provided. Defect formation energies will be uncorrected.") + for defect_name, defects_with_charges in defect_outputs.items(): + defect_entries = [] + fnv_plots = {} + for charge, defect_and_output in defects_with_charges.items(): + defect, output_with_charge = defect_and_output + logger.info(f"Processing {defect.name} with charge state={charge}") + defect_entry = DefectEntry( + defect=defect, + charge_state=charge, + sc_entry=ComputedStructureEntry(structure=bulk_output.structure, energy=bulk_output.output.energy) + ) + defect_entries.append(defect_entry) + plot_data = defect_entry.get_freysoldt_correction( + defect_locpot=VolumetricData.from_dict(output_with_charge.cp2k_objects['v_hartree']), + bulk_locpot=VolumetricData.from_dict(output_with_charge.cp2k_objects['v_hartree']), + dielectric=dielectric + ) + fnv_plots[int(charge)] = plot_data + outputs["results"][defect.name] = dict( + defect=defect, defect_entries=defect_entries, fnv_plots=fnv_plots + ) + return outputs + +#TODO should be in common +def ensure_defects_same_structure(defects: Iterable[Defect]): + """Ensure that the defects are valid. + Parameters + ---------- + defects + The defects to check. + Raises + ------ + ValueError + If any defect is invalid. + """ + struct = None + for defect in defects: + if struct is None: + struct = defect.structure + elif struct != defect.structure: + raise ValueError("All defects must have the same host structure.") + return struct + diff --git a/src/atomate2/cp2k/jobs/defect.py b/src/atomate2/cp2k/jobs/defect.py new file mode 100644 index 0000000000..aa0fc56b3f --- /dev/null +++ b/src/atomate2/cp2k/jobs/defect.py @@ -0,0 +1,119 @@ +"""Jobs for defect calculations.""" + +from __future__ import annotations + +import logging +from pathlib import Path +from dataclasses import dataclass, field +from copy import deepcopy +from tkinter import W +from numpy.typing import NDArray + +from pymatgen.analysis.defects.core import Defect, Vacancy +from atomate2.cp2k.sets.base import Cp2kInputGenerator +from atomate2.cp2k.sets.defect import ( + DefectSetGenerator, DefectStaticSetGenerator, DefectRelaxSetGenerator, DefectCellOptSetGenerator, + DefectHybridStaticSetGenerator, DefectHybridRelaxSetGenerator, DefectHybridCellOptSetGenerator +) +from atomate2.cp2k.jobs.base import BaseCp2kMaker, cp2k_job +from atomate2.cp2k.jobs.core import HybridStaticMaker, HybridRelaxMaker, HybridCellOptMaker + +logger = logging.getLogger(__name__) + +DEFECT_TASK_DOC = { + "average_v_hartree": True, + "store_volumetric_data": ("v_hartree",) +} + +@dataclass +class BaseDefectMaker(BaseCp2kMaker): + + task_document_kwargs: dict = field(default_factory=lambda: DEFECT_TASK_DOC) + supercell_matrix: NDArray = field(default=None) + min_atoms: int = field(default=80) + max_atoms: int = field(default=240) + min_length: int = field(default=10) + force_diagonal: bool = field(default=False) + + @cp2k_job + def make(self, defect: Defect, charge: int = 0, prev_cp2k_dir: str | Path | None = None): + if isinstance(defect, Vacancy): + defect = GhostVacancy( + structure=defect.structure, site=defect.site, + multiplicity=defect.multiplicity, oxi_state=defect.oxi_state, + symprec=defect.symprec, angle_tolerance=defect.angle_tolerance + ) + structure = defect.get_supercell_structure( + sc_mat=self.supercell_matrix, + dummy_species=None, + min_atoms=self.min_atoms, + max_atoms=self.max_atoms, + min_length=self.min_length, + force_diagonal=self.force_diagonal, + ) + structure.set_charge(charge) + return super().make.original(self, structure=structure, prev_cp2k_dir=prev_cp2k_dir) + +@dataclass +class DefectStaticMaker(BaseDefectMaker): + + name: str = "defect static" + input_set_generator: DefectSetGenerator = field( + default_factory=DefectStaticSetGenerator + ) + +@dataclass +class DefectRelaxMaker(BaseDefectMaker): + """ + Maker to create a relaxation job for point defects. + + Adds an initial random perturbation and ensures that the output contains + the hartree potential for finite size corrections. + """ + + name: str = "defect relax" + input_set_generator: Cp2kInputGenerator = field(default_factory=DefectRelaxSetGenerator) + transformations: tuple[str, ...] = field(default=("PerturbStructureTransformation",)) + transformation_params: tuple[dict, ...] | None = field(default=({"distance": 0.01},)) + +@dataclass +class DefectCellOptMaker(BaseDefectMaker): + """ + Maker to create a cell for point defects. + + Adds an initial random perturbation and ensures that the output contains + the hartree potential for finite size corrections. + """ + + name: str = "defect relax" + input_set_generator: Cp2kInputGenerator = field(default_factory=DefectCellOptSetGenerator) + transformations: tuple[str, ...] = field(default=("PerturbStructureTransformation",)) + transformation_params: tuple[dict, ...] | None = field(default=({"distance": 0.01},)) + +@dataclass +class DefectHybridStaticMaker(DefectStaticMaker, HybridStaticMaker): + + name: str = "defect hybrid static" + input_set_generator: DefectSetGenerator = field(default_factory=DefectHybridStaticSetGenerator) + +@dataclass +class DefectHybridRelaxMaker(DefectRelaxMaker, HybridRelaxMaker): + + name: str = "defect hybrid relax" + input_set_generator: DefectSetGenerator = field(default_factory=DefectHybridRelaxSetGenerator) + +@dataclass +class DefectHybridCellOptMaker(DefectCellOptMaker, HybridCellOptMaker): + + name: str = "defect hybrid cell opt" + input_set_generator: DefectSetGenerator = field(default_factory=DefectHybridCellOptSetGenerator) + +class GhostVacancy(Vacancy): + """Custom override of vacancy to deal with basis set superposition error.""" + + @property + def defect_structure(self): + """Returns the defect structure with the proper oxidation state""" + struct = self.structure.copy() + struct.add_site_property("ghost", [i == self.defect_site_index for i in range(len(struct))]) + return struct \ No newline at end of file diff --git a/src/atomate2/cp2k/schemas/defect.py b/src/atomate2/cp2k/schemas/defect.py new file mode 100644 index 0000000000..9f58878733 --- /dev/null +++ b/src/atomate2/cp2k/schemas/defect.py @@ -0,0 +1,345 @@ +from datetime import datetime +from tokenize import group +from typing import ClassVar, Dict, Tuple, Mapping, List +from pydantic import BaseModel, Field +from pydantic import validator +from itertools import groupby + +from monty.json import MontyDecoder + +from pymatgen.core import Structure +from pymatgen.entries.computed_entries import ComputedStructureEntry +from pymatgen.analysis.defects.core import Defect +from pymatgen.analysis.defects.corrections import get_correction +from pymatgen.analysis.defects.thermo import DefectEntry, DefectSiteFinder +from pymatgen.symmetry.analyzer import SpacegroupAnalyzer +from atomate2 import SETTINGS + +from atomate2.common.schemas.structure import StructureMetadata +from atomate2.cp2k.schemas.calc_types.utils import run_type, task_type +from atomate2.cp2k.schemas.calc_types.enums import CalcType, TaskType, RunType +from atomate2.cp2k.schemas.task import TaskDocument + +class DefectDoc(StructureMetadata): + """ + A document used to represent a single defect. e.g. a O vacancy with a -2 charge. + This document can contain an arbitrary number of defect entries, originating from + pairs (defect and bulk) of calculations. This document provides access to the "best" + calculation of each run_type. + """ + + class Config: + arbitrary_types_allowed = True + + property_name: ClassVar[str] = "defect" + + defect: Defect = Field(None, description="Pymatgen defect object for this defect doc") + + name: str = Field(None, description="Name of this defect as generated by the defect object") + + material_id: int = Field(None, description="Unique material ID for the bulk material") #TODO Change to MPID + + task_ids: List[int] = Field( + None, description="All task ids used in creating this defect doc." + ) + + calc_types: Mapping[int, CalcType] = Field( # type: ignore + None, + description="Calculation types for all the calculations that make up this material", + ) + task_types: Mapping[int, TaskType] = Field( + None, + description="Task types for all the calculations that make up this material", + ) + run_types: Mapping[int, RunType] = Field( + None, + description="Run types for all the calculations that make up this material", + ) + + tasks: Mapping[RunType, Tuple[TaskDocument, TaskDocument]] = Field( + None, description="Task documents (defect task, bulk task) for the defect entry of RunType" + ) + + entries: Mapping[RunType, DefectEntry] = Field( + None, description="Dictionary for tracking entries for CP2K calculations" + ) + + last_updated: datetime = Field( + description="Timestamp for when this document was last updated", + default_factory=datetime.utcnow, + ) + + created_at: datetime = Field( + description="Timestamp for when this material document was first created", + default_factory=datetime.utcnow, + ) + + metadata: Dict = Field(description="Metadata for this defect") + + # TODO How can monty serialization incorporate into pydantic? It seems like VASP MatDocs dont need this + @validator("entries", pre=True) + def decode(cls, entries): + for e in entries: + if isinstance(entries[e], dict): + entries[e] = MontyDecoder().process_decoded({k: v for k, v in entries[e].items()}) + return entries + + def update(self, defect_task, bulk_task, dielectric, query='defect'): + + defect_task_doc = TaskDocument(**defect_task) + bulk_task_doc = TaskDocument(**bulk_task) + + rt = defect_task_doc.run_type + tt = defect_task_doc.task_type + ct = defect_task_doc.calc_type + + # Metadata + last_updated = max(dtsk.last_updated for dtsk, btsk in self.tasks.values()) if self.tasks else datetime.now() + created_at = min(dtsk.last_updated for dtsk, btsk in self.tasks.values()) if self.tasks else datetime.now() + + if defect_task_doc.task_id in self.task_ids: + return + else: + self.last_updated = last_updated + self.created_at = created_at + self.task_ids.append(defect_task_doc.task_id) + + def _run_type(x): + return run_type(x[0]['input']['dft']).value + + def _compare(new, old): + # TODO return kpoint density + return new['nsites'] > old.nsites + + if defect_task_doc.run_type not in self.tasks or _compare(defect_task, self.tasks[rt][0]): + self.run_types.update({defect_task_doc.task_id: rt}) + self.task_types.update({defect_task_doc.task_id: tt}) + self.calc_types.update({defect_task_doc.task_id: ct}) + entry = self.__class__.get_defect_entry_from_tasks( + defect_task=defect_task, + bulk_task=bulk_task, + dielectric=dielectric, + query=query + ) + self.entries[rt] = entry + self.tasks[rt] = (defect_task_doc, bulk_task_doc) + + def update_all(self, tasks, query='defect'): + for defect_task, bulk_task, dielectric in tasks: + self.update(defect_task=defect_task, bulk_task=bulk_task, dielectric=dielectric, query=query) + + @classmethod + def from_tasks(cls, tasks: List, query='defect', material_id=None): + """ + The standard way to create this document. + Args: + tasks: A list of defect,bulk task pairs which will be used to construct a + series of DefectEntry objects. + query: How to retrieve the defect object stored in the task. + """ + task_group = [TaskDocument(**defect_task) for defect_task, bulk_task, dielectric in tasks] + + # Metadata + last_updated = datetime.now() or max(task.last_updated for task in task_group) + created_at = datetime.now() or min(task.completed_at for task in task_group) + task_ids = {task.task_id for task in task_group} + + deprecated_tasks = list( + {task.task_id for task in task_group if not task.is_valid} + ) + + run_types = {task.task_id: task.run_type for task in task_group} + task_types = {task.task_id: task.task_type for task in task_group} + calc_types = {task.task_id: task.calc_type for task in task_group} + + def _run_type(x): + return run_type(x[0]['input']['dft']).value + + def _task_type(x): + return task_type(x[0]['input']['dft']).value + + def _sort(x): + # TODO return kpoint density, currently just does supercell size + return -x[0]['nsites'], x[0]['output']['energy'] + + entries = {} + final_tasks = {} + metadata = {} + for key, tasks_for_runtype in groupby(sorted(tasks, key=_run_type), key=_run_type): + sorted_tasks = sorted(tasks_for_runtype, key=_sort) + ents = [cls.get_defect_entry_from_tasks(t[0], t[1], t[2], query) for t in sorted_tasks] + best_entry = ents[0] + best_defect_task, best_bulk_task, dielectric = sorted_tasks[0] + metadata[key] = {'convergence': [(sorted_tasks[i][0]['nsites'], ents[i].energy) for i in range(len(ents))]} + best_defect_task, best_bulk_task = TaskDocument(**best_defect_task), TaskDocument(**best_bulk_task) + entries[best_defect_task.run_type] = best_entry + final_tasks[best_defect_task.run_type] = (best_defect_task, best_bulk_task) + + data = { + 'entries': entries, + 'run_types': run_types, + 'task_types': task_types, + 'calc_types': calc_types, + 'last_updated': last_updated, + 'created_at': created_at, + 'task_ids': task_ids, + 'deprecated_tasks': deprecated_tasks, + 'tasks': final_tasks, + 'material_id': material_id if material_id else best_entry.parameters['material_id'], + 'entry_ids': {rt: entries[rt].entry_id for rt in entries}, + 'defect': best_entry.defect, + 'name': best_entry.defect.name, + 'metadata': metadata, + } + prim = SpacegroupAnalyzer(best_entry.defect.bulk_structure).get_primitive_standard_structure() + data.update(StructureMetadata.from_structure(prim).dict()) + return cls(**data) + + @classmethod + def get_defect_entry_from_tasks(cls, defect_task, bulk_task, dielectric=None, query='transformations.history.0.defect'): + """ + Extract a defect entry from a single pair (defect and bulk) of tasks. + + Args: + defect_task: task dict for the defect calculation + bulk_task: task dict for the bulk calculation + dielectric: Dielectric doc if the defect is charged. If not present, no dielectric + corrections will be performed, even if the defect is charged. + query: Mongo-style query to retrieve the defect object from the defect task + """ + parameters = cls.get_parameters_from_tasks(defect_task=defect_task, bulk_task=bulk_task) + if dielectric: + parameters['dielectric'] = dielectric + + if parameters['charge_state'] + corrections, plt_data = get_correction( + q=defect_entry.charge_state, dielectric=parameters['dielectric'], + defect_locpot=parameters['defect_v_hartree'], + bulk_locpot=parameters['bulk_v_hartree'], + defect_frac_coords=parameters['defect_frac_sc_coords'], + ) + else: + corrections = {} + + sc_entry = ComputedStructureEntry( + structure=parameters['final_defect_structure'], + energy=parameters['defect_energy'] - parameters['bulk_energy'] + ) + + defect_entry = DefectEntry( + defect=cls.get_defect_from_task(query=query, task=defect_task), + charge_state=, + sc_entry=sc_entry, + sc_defect_frac_coords=parameters['defect_frac_sc_coords'], + corrections=corrections, + ) + + return defect_entry.as_dict() + + @classmethod + def get_defect_from_task(cls, query, task): + """ + Unpack a Mongo-style query and retrieve a defect object from a task. + """ + defect = unpack(query.split('.'), task) + needed_keys = ['@module', '@class', 'structure', 'defect_site', 'charge', 'site_name'] + return MontyDecoder().process_decoded({k: v for k, v in defect.items() if k in needed_keys}) + + @classmethod + def get_parameters_from_tasks(cls, defect_task, bulk_task): + """ + Get parameters necessary to create a defect entry from defect and bulk task dicts + Args: + defect_task: task dict for the defect calculation + bulk_task: task dict for the bulk calculation + """ + + defect_task = TaskDocument(**defect_task) + bulk_task = TaskDocument(**bulk_task) + + final_defect_structure = defect_task.structure + final_bulk_structure = bulk_task.structure + + ghost = [index for index, prop in enumerate(final_defect_structure.site_properties.get("ghost")) if prop] + if ghost: + defect_frac_sc_coords = final_defect_structure[ghost[0]] + else: + defect_frac_sc_coords = DefectSiteFinder(SETTINGS.SYMPREC).get_defect_fpos(defect_structure=final_defect_structure, base_structure=final_bulk_structure) + + parameters = { + 'defect_energy': defect_task['output']['energy'], + 'bulk_energy': bulk_task['output']['energy'], + 'final_defect_structure': final_defect_structure, + 'vbm': bulk_task['output']['vbm'], + 'cbm': bulk_task['output']['cbm'], + 'defect_frac_sc_coords': defect_frac_sc_coords, + 'defect_v_hartree': defect_task.cp2k_objects['v_hartree'], # TODO CP2K spec name + 'bulk_v_hartree': bulk_task.cp2k_objects['v_hartree'], # TODO CP2K spec name + } + + return parameters + + +# TODO Some of this should be done by DefectCompatibility, +# but it's not clear how to do that since 2d materials +# are not tagged in any particular way to allow defect compatibility +# to decide which correction to apply +class DefectDoc2d(DefectDoc): + """ + DefectDoc subclass for 2D defects + """ + + @classmethod + def get_defect_entry_from_tasks(cls, defect_task, bulk_task, dielectric=None, query='transformations.history.0.defect'): + """ + Get defect entry from defect and bulk tasks. + Args: + defect_task: task dict for the defect calculation + bulk_task: task dict for the bulk calculation + dielectric: dielectric tensor for the defect calculation + query: query string for defect entry + """ + parameters = cls.get_parameters_from_tasks(defect_task=defect_task, bulk_task=bulk_task) + if dielectric: + eps_parallel = (dielectric[0][0] + dielectric[1][1]) / 2 + eps_perp = dielectric[2][2] + parameters['dielectric'] = (eps_parallel - 1) / (1 - 1/eps_perp) + + defect_entry = DefectEntry( + cls.get_defect_from_task(query=query, task=defect_task), + uncorrected_energy=parameters.pop('defect_energy') - parameters.pop('bulk_energy'), + parameters=parameters, + entry_id=parameters.pop('entry_id') + ) + + DefectCompatibility().process_entry(defect_entry, perform_corrections=False) + with ScratchDir('.'): + fc = FreysoldtCorrection2d( + defect_entry.parameters.get('dielectric'), + "LOCPOT.ref", "LOCPOT.def", encut=520, buffer=2 + ) + lref = VolumetricData( + structure=Structure.from_dict(bulk_task['input']['structure']), + data={'total': MontyDecoder().process_decoded(bulk_task['v_hartree'])} + ) + ldef = VolumetricData( + structure=Structure.from_dict(defect_task['input']['structure']), + data={'total': MontyDecoder().process_decoded(defect_task['v_hartree'])} + ) + lref.write_file("LOCPOT.ref") + ldef.write_file("LOCPOT.def") + ecorr = fc.get_correction(defect_entry) + defect_entry.corrections.update(ecorr) + defect_entry.parameters['freysoldt2d_meta'] = fc.metadata + + defect_entry_as_dict = defect_entry.as_dict() + defect_entry_as_dict['task_id'] = defect_entry_as_dict['entry_id'] # this seemed necessary for legacy db + + return defect_entry + +def unpack(query, d): + if not query: + return d + if isinstance(d, List): + return unpack(query[1:], d.__getitem__(int(query.pop(0)))) + return unpack(query[1:], d.__getitem__(query.pop(0))) \ No newline at end of file diff --git a/src/atomate2/cp2k/sets/defect.py b/src/atomate2/cp2k/sets/defect.py new file mode 100644 index 0000000000..df3a2cbe4c --- /dev/null +++ b/src/atomate2/cp2k/sets/defect.py @@ -0,0 +1,55 @@ +"""Module defining defect input set generators.""" + +from __future__ import annotations + +import logging +from dataclasses import dataclass + +from pymatgen.core import Structure + +from atomate2.cp2k.sets.base import Cp2kInputGenerator, multiple_input_updators +from atomate2.cp2k.sets.core import ( + StaticSetGenerator, RelaxSetGenerator, CellOptSetGenerator, + HybridStaticSetGenerator, HybridRelaxSetGenerator, HybridCellOptSetGenerator +) +logger = logging.getLogger(__name__) + +@dataclass +class DefectSetGenerator(Cp2kInputGenerator): + """ + """ + + def get_input_updates(self, structure: Structure, *args, **kwargs) -> dict: + """ + """ + return {'print_v_hartree': True} + +@dataclass +@multiple_input_updators() +class DefectStaticSetGenerator(DefectSetGenerator, StaticSetGenerator): + pass + +@dataclass +@multiple_input_updators() +class DefectRelaxSetGenerator(DefectSetGenerator, RelaxSetGenerator): + pass + +@dataclass +@multiple_input_updators() +class DefectCellOptSetGenerator(DefectSetGenerator, CellOptSetGenerator): + pass + +@dataclass +@multiple_input_updators() +class DefectHybridStaticSetGenerator(DefectSetGenerator, HybridStaticSetGenerator): + pass + +@dataclass +@multiple_input_updators() +class DefectHybridRelaxSetGenerator(DefectSetGenerator, HybridRelaxSetGenerator): + pass + +@dataclass +@multiple_input_updators() +class DefectHybridCellOptSetGenerator(DefectSetGenerator, HybridCellOptSetGenerator): + pass \ No newline at end of file From 5be4ac3b34b8acc25d0575be621922c7138c21ef Mon Sep 17 00:00:00 2001 From: nwinner Date: Thu, 27 Oct 2022 20:39:17 -0700 Subject: [PATCH 02/50] Defect updates --- src/atomate2/cp2k/drones.py | 2 +- src/atomate2/cp2k/flows/defect.py | 55 +++++---- src/atomate2/cp2k/jobs/defect.py | 9 ++ src/atomate2/cp2k/schemas/defect.py | 118 ++++++++------------ src/atomate2/cp2k/sets/BaseCp2kSet.yaml | 142 ++++++++++++------------ 5 files changed, 156 insertions(+), 170 deletions(-) diff --git a/src/atomate2/cp2k/drones.py b/src/atomate2/cp2k/drones.py index 662565d4ac..cce5c096c0 100644 --- a/src/atomate2/cp2k/drones.py +++ b/src/atomate2/cp2k/drones.py @@ -1,4 +1,4 @@ -"""Drones for parsing VASP calculations and related outputs.""" +"""Drones for parsing CP2K calculations and related outputs.""" from __future__ import annotations diff --git a/src/atomate2/cp2k/flows/defect.py b/src/atomate2/cp2k/flows/defect.py index 7f48d318a2..df11cc2bb0 100644 --- a/src/atomate2/cp2k/flows/defect.py +++ b/src/atomate2/cp2k/flows/defect.py @@ -78,32 +78,34 @@ class FormationEnergyMaker(Maker): force_diagonal: bool = field(default=False) def __post_init__(self): - if self.run_bulk: - if self.run_bulk == 'relax': - if self.hybrid_functional: - self.bulk_maker = HybridCellOptFlowMaker( - initialize_with_pbe=self.initialize_with_pbe, - hybrid_functional=self.hybrid_functional, - hybrid_maker=HybridCellOptMaker( - input_set_generator=DefectHybridCellOptSetGenerator() - ) - ) - else: - self.bulk_maker = CellOptMaker( - input_set_generator=DefectCellOptSetGenerator() - ) - elif self.run_bulk == "static": - if self.hybrid_functional: - self.bulk_maker = HybridStaticFlowMaker( - hybrid_functional=self.hybrid_functional, - hybrid_maker=HybridStaticMaker( - input_set_generator=DefectHybridStaticSetGenerator() + if self.run_bulk == 'relax': + if self.hybrid_functional: + self.bulk_maker = HybridCellOptFlowMaker( + initialize_with_pbe=self.initialize_with_pbe, + hybrid_functional=self.hybrid_functional, + hybrid_maker=HybridCellOptMaker( + input_set_generator=DefectHybridCellOptSetGenerator() ) + ) + else: + self.bulk_maker = CellOptMaker( + input_set_generator=DefectCellOptSetGenerator() + ) + elif self.run_bulk == "static": + if self.hybrid_functional: + self.bulk_maker = HybridStaticFlowMaker( + hybrid_functional=self.hybrid_functional, + hybrid_maker=HybridStaticMaker( + input_set_generator=DefectHybridStaticSetGenerator() ) - else: - self.bulk_maker = StaticMaker( - input_set_generator=DefectStaticSetGenerator() - ) + ) + else: + self.bulk_maker = StaticMaker( + input_set_generator=DefectStaticSetGenerator() + ) + + # TODO Can probably put this somewhere else? + self.bulk_maker.task_document_kwargs.update({"average_v_hartree": True, "store_volumetric_data": ("v_hartree",)}) if self.hybrid_functional: self.def_maker = HybridRelaxFlowMaker( @@ -158,12 +160,7 @@ def make( for defect in defects: chgs = defect.get_charge_states() if run_all_charges else [0] for charge in chgs: - # write some provenances data in info.json file - info = {"defect": deepcopy(defect), "supercell_matrix": sc_mat} defect_job = self.def_maker.make(defect=deepcopy(defect), charge=charge) - defect_job.update_maker_kwargs( - {"_set": {"write_additional_data->info:json": info}}, dict_mod=True - ) jobs.append(defect_job) defect_outputs[defect.name][int(charge)] = (defect, defect_job.output) diff --git a/src/atomate2/cp2k/jobs/defect.py b/src/atomate2/cp2k/jobs/defect.py index aa0fc56b3f..df34dddd07 100644 --- a/src/atomate2/cp2k/jobs/defect.py +++ b/src/atomate2/cp2k/jobs/defect.py @@ -52,6 +52,15 @@ def make(self, defect: Defect, charge: int = 0, prev_cp2k_dir: str | Path | None force_diagonal=self.force_diagonal, ) structure.set_charge(charge) + # provenance stuff + self.write_additional_data.update( + { + "info.json": { + "defect": deepcopy(defect), + "defect_charge": charge, + "sc_mat": self.supercell_matrix} + } + ) return super().make.original(self, structure=structure, prev_cp2k_dir=prev_cp2k_dir) @dataclass diff --git a/src/atomate2/cp2k/schemas/defect.py b/src/atomate2/cp2k/schemas/defect.py index 9f58878733..08917df314 100644 --- a/src/atomate2/cp2k/schemas/defect.py +++ b/src/atomate2/cp2k/schemas/defect.py @@ -6,11 +6,12 @@ from itertools import groupby from monty.json import MontyDecoder +from monty.tempfile import ScratchDir from pymatgen.core import Structure from pymatgen.entries.computed_entries import ComputedStructureEntry from pymatgen.analysis.defects.core import Defect -from pymatgen.analysis.defects.corrections import get_correction +from pymatgen.analysis.defects.corrections import get_freysoldt_correction, get_freysoldt2d_correction from pymatgen.analysis.defects.thermo import DefectEntry, DefectSiteFinder from pymatgen.symmetry.analyzer import SpacegroupAnalyzer from atomate2 import SETTINGS @@ -211,15 +212,7 @@ def get_defect_entry_from_tasks(cls, defect_task, bulk_task, dielectric=None, qu if dielectric: parameters['dielectric'] = dielectric - if parameters['charge_state'] - corrections, plt_data = get_correction( - q=defect_entry.charge_state, dielectric=parameters['dielectric'], - defect_locpot=parameters['defect_v_hartree'], - bulk_locpot=parameters['bulk_v_hartree'], - defect_frac_coords=parameters['defect_frac_sc_coords'], - ) - else: - corrections = {} + corrections, metadata = cls.get_correction_from_parameters(parameters) sc_entry = ComputedStructureEntry( structure=parameters['final_defect_structure'], @@ -228,7 +221,7 @@ def get_defect_entry_from_tasks(cls, defect_task, bulk_task, dielectric=None, qu defect_entry = DefectEntry( defect=cls.get_defect_from_task(query=query, task=defect_task), - charge_state=, + charge_state=parameters['charge_state'], sc_entry=sc_entry, sc_defect_frac_coords=parameters['defect_frac_sc_coords'], corrections=corrections, @@ -236,6 +229,50 @@ def get_defect_entry_from_tasks(cls, defect_task, bulk_task, dielectric=None, qu return defect_entry.as_dict() + @classmethod + def get_correction_from_parameters(cls, parameters) -> Tuple[Dict, Dict]: + corrections = {} + metadata = {} + for correction in ["get_freysoldt_correction", "get_freysoldt2d_correction"]: + c, m = getattr(cls, correction)(parameters) + corrections.update(c) + metadata.update(m) + return corrections, metadata + + @classmethod + def get_freysold_correction(cls, parameters) -> Tuple[Dict, Dict]: + if parameters['charge_state'] and not parameters.get("2d"): + return get_freysoldt_correction( + q=parameters['charge_state'], dielectric=parameters['dielectric'], + defect_locpot=parameters['defect_v_hartree'], + bulk_locpot=parameters['bulk_v_hartree'], + defect_frac_coords=parameters['defect_frac_sc_coords'], + ) + return {}, {} + + @classmethod + def get_freysoldt2d_correction(cls, parameters): + + from pymatgen.io.vasp.outputs import VolumetricData as VaspVolumetricData + + if parameters['charge_state'] and parameters.get("2d"): + eps_parallel = (parameters['dielectric'][0][0] + parameters['dielectric'][1][1]) / 2 + eps_perp = parameters['dielectric'][2][2] + dielectric = (eps_parallel - 1) / (1 - 1/eps_perp) + with ScratchDir('.'): + + lref = VaspVolumetricData(structure=parameters['bulk_locpot'].structure, data=parameters['bulk_locpot'].data) + ldef = VaspVolumetricData(structure=parameters['defect_locpot'].structure, data=parameters['defect_locpot'].data) + lref.write_file("LOCPOT.ref") + ldef.write_file("LOCPOT.def") + + return get_freysoldt2d_correction( + q=parameters['charge_state'], dielectric=dielectric, defect_locpot="LOCPOT.def", + bulk_locpot="LOCPOT.ref", defect_frac_coords=parameters['defect_frac_sc_coords'], + energy_cutoff=250, slab_buffer=2 + ) + return {}, {} + @classmethod def get_defect_from_task(cls, query, task): """ @@ -272,6 +309,7 @@ def get_parameters_from_tasks(cls, defect_task, bulk_task): 'final_defect_structure': final_defect_structure, 'vbm': bulk_task['output']['vbm'], 'cbm': bulk_task['output']['cbm'], + 'charge_state': defect_task.output.structure.charge, 'defect_frac_sc_coords': defect_frac_sc_coords, 'defect_v_hartree': defect_task.cp2k_objects['v_hartree'], # TODO CP2K spec name 'bulk_v_hartree': bulk_task.cp2k_objects['v_hartree'], # TODO CP2K spec name @@ -279,64 +317,6 @@ def get_parameters_from_tasks(cls, defect_task, bulk_task): return parameters - -# TODO Some of this should be done by DefectCompatibility, -# but it's not clear how to do that since 2d materials -# are not tagged in any particular way to allow defect compatibility -# to decide which correction to apply -class DefectDoc2d(DefectDoc): - """ - DefectDoc subclass for 2D defects - """ - - @classmethod - def get_defect_entry_from_tasks(cls, defect_task, bulk_task, dielectric=None, query='transformations.history.0.defect'): - """ - Get defect entry from defect and bulk tasks. - Args: - defect_task: task dict for the defect calculation - bulk_task: task dict for the bulk calculation - dielectric: dielectric tensor for the defect calculation - query: query string for defect entry - """ - parameters = cls.get_parameters_from_tasks(defect_task=defect_task, bulk_task=bulk_task) - if dielectric: - eps_parallel = (dielectric[0][0] + dielectric[1][1]) / 2 - eps_perp = dielectric[2][2] - parameters['dielectric'] = (eps_parallel - 1) / (1 - 1/eps_perp) - - defect_entry = DefectEntry( - cls.get_defect_from_task(query=query, task=defect_task), - uncorrected_energy=parameters.pop('defect_energy') - parameters.pop('bulk_energy'), - parameters=parameters, - entry_id=parameters.pop('entry_id') - ) - - DefectCompatibility().process_entry(defect_entry, perform_corrections=False) - with ScratchDir('.'): - fc = FreysoldtCorrection2d( - defect_entry.parameters.get('dielectric'), - "LOCPOT.ref", "LOCPOT.def", encut=520, buffer=2 - ) - lref = VolumetricData( - structure=Structure.from_dict(bulk_task['input']['structure']), - data={'total': MontyDecoder().process_decoded(bulk_task['v_hartree'])} - ) - ldef = VolumetricData( - structure=Structure.from_dict(defect_task['input']['structure']), - data={'total': MontyDecoder().process_decoded(defect_task['v_hartree'])} - ) - lref.write_file("LOCPOT.ref") - ldef.write_file("LOCPOT.def") - ecorr = fc.get_correction(defect_entry) - defect_entry.corrections.update(ecorr) - defect_entry.parameters['freysoldt2d_meta'] = fc.metadata - - defect_entry_as_dict = defect_entry.as_dict() - defect_entry_as_dict['task_id'] = defect_entry_as_dict['entry_id'] # this seemed necessary for legacy db - - return defect_entry - def unpack(query, d): if not query: return d diff --git a/src/atomate2/cp2k/sets/BaseCp2kSet.yaml b/src/atomate2/cp2k/sets/BaseCp2kSet.yaml index 14a24c5f5a..edab0e1f93 100644 --- a/src/atomate2/cp2k/sets/BaseCp2kSet.yaml +++ b/src/atomate2/cp2k/sets/BaseCp2kSet.yaml @@ -8,11 +8,11 @@ cp2k_input: basis: null potential: null Ag: - aux_basis: admm-dz-q11 + aux_basis: admm-tzp-q11 basis: TZVP-MOLOPT-PBE-GTH-q11 potential: GTH-PBE-q19 Al: - aux_basis: admm-dz-q3 + aux_basis: admm-tzp-q3 basis: TZVP-MOLOPT-PBE-GTH-q3 potential: GTH-PBE-q3 Am: @@ -20,31 +20,31 @@ cp2k_input: basis: null potential: null Ar: - aux_basis: admm-dz-q8 + aux_basis: admm-tzp-q8 basis: TZVP-MOLOPT-PBE-GTH-q8 potential: GTH-PBE-q8 As: - aux_basis: admm-dz-q5 + aux_basis: admm-tzp-q5 basis: TZVP-MOLOPT-PBE-GTH-q5 potential: GTH-PBE-q5 At: - aux_basis: admm-dz-q7 + aux_basis: admm-tzp-q7 basis: TZVP-MOLOPT-PBE-GTH-q7 potential: GTH-PBE-q7 Au: - aux_basis: admm-dz-q11 + aux_basis: admm-tzp-q11 basis: TZVP-MOLOPT-PBE-GTH-q11 potential: GTH-PBE-q19 B: - aux_basis: admm-dz-q3 + aux_basis: admm-tzp-q3 basis: TZVP-MOLOPT-PBE-GTH-q3 potential: GTH-PBE-q3 Ba: - aux_basis: admm-dz-q10 + aux_basis: admm-tzp-q10 basis: TZVP-MOLOPT-PBE-GTH-q10 potential: GTH-PBE-q10 Be: - aux_basis: admm-dz-q4 + aux_basis: admm-tzp-q4 basis: TZVP-MOLOPT-PBE-GTH-q2 potential: GTH-PBE-q4 Bh: @@ -52,7 +52,7 @@ cp2k_input: basis: null potential: null Bi: - aux_basis: admm-dz-q5 + aux_basis: admm-tzp-q5 basis: TZVP-MOLOPT-PBE-GTH-q5 potential: GTH-PBE-q5 Bk: @@ -60,19 +60,19 @@ cp2k_input: basis: null potential: null Br: - aux_basis: admm-dz-q7 + aux_basis: admm-tzp-q7 basis: TZVP-MOLOPT-PBE-GTH-q7 potential: GTH-PBE-q7 C: - aux_basis: admm-dz-q4 + aux_basis: admm-tzp-q4 basis: TZVP-MOLOPT-PBE-GTH-q4 potential: GTH-PBE-q4 Ca: - aux_basis: admm-dz-q10 + aux_basis: admm-tzp-q10 basis: TZVP-MOLOPT-PBE-GTH-q10 potential: GTH-PBE-q10 Cd: - aux_basis: admm-dz-q12 + aux_basis: admm-tzp-q12 basis: TZVP-MOLOPT-PBE-GTH-q12 potential: GTH-PBE-q12 Ce: @@ -84,7 +84,7 @@ cp2k_input: basis: null potential: null Cl: - aux_basis: admm-dz-q7 + aux_basis: admm-tzp-q7 basis: TZVP-MOLOPT-PBE-GTH-q7 potential: GTH-PBE-q7 Cm: @@ -96,19 +96,19 @@ cp2k_input: basis: null potential: null Co: - aux_basis: admm-dz-q17 + aux_basis: admm-tzp-q17 basis: TZVP-MOLOPT-PBE-GTH-q17 potential: GTH-PBE-q17 Cr: - aux_basis: admm-dz-q14 + aux_basis: admm-tzp-q14 basis: TZVP-MOLOPT-PBE-GTH-q14 potential: GTH-PBE-q14 Cs: - aux_basis: admm-dz-q9 + aux_basis: admm-tzp-q9 basis: TZVP-MOLOPT-PBE-GTH-q9 potential: GTH-PBE-q9 Cu: - aux_basis: admm-dz-q11 + aux_basis: admm-tzp-q11 basis: TZVP-MOLOPT-PBE-GTH-q11 potential: GTH-PBE-q19 Db: @@ -136,11 +136,11 @@ cp2k_input: basis: null potential: GTH-PBE-q17 F: - aux_basis: admm-dz-q7 + aux_basis: admm-tzp-q7 basis: TZVP-MOLOPT-PBE-GTH-q7 potential: GTH-PBE-q7 Fe: - aux_basis: admm-dz-q16 + aux_basis: admm-tzp-q16 basis: TZVP-MOLOPT-PBE-GTH-q16 potential: GTH-PBE-q16 Fl: @@ -156,7 +156,7 @@ cp2k_input: basis: null potential: null Ga: - aux_basis: admm-dz-q13 + aux_basis: admm-tzp-q13 basis: TZVP-MOLOPT-PBE-GTH-q13 potential: GTH-PBE-q3 Gd: @@ -164,23 +164,23 @@ cp2k_input: basis: null potential: GTH-PBE-q18 Ge: - aux_basis: admm-dz-q4 + aux_basis: admm-tzp-q4 basis: TZVP-MOLOPT-PBE-GTH-q4 potential: GTH-PBE-q4 H: - aux_basis: admm-dz-q1 + aux_basis: admm-tzp-q1 basis: TZVP-MOLOPT-PBE-GTH-q1 potential: GTH-PBE-q1 He: - aux_basis: admm-dz-q2 + aux_basis: admm-tzp-q2 basis: TZVP-MOLOPT-PBE-GTH-q2 potential: GTH-PBE-q2 Hf: - aux_basis: admm-dz-q12 + aux_basis: admm-tzp-q12 basis: TZVP-MOLOPT-PBE-GTH-q12 potential: GTH-PBE-q12 Hg: - aux_basis: admm-dz-q12 + aux_basis: admm-tzp-q12 basis: TZVP-MOLOPT-PBE-GTH-q12 potential: GTH-PBE-q12 Ho: @@ -192,23 +192,23 @@ cp2k_input: basis: null potential: null I: - aux_basis: admm-dz-q7 + aux_basis: admm-tzp-q7 basis: TZVP-MOLOPT-PBE-GTH-q7 potential: GTH-PBE-q7 In: - aux_basis: admm-dz-q13 + aux_basis: admm-tzp-q13 basis: TZVP-MOLOPT-PBE-GTH-q13 potential: GTH-PBE-q3 Ir: - aux_basis: admm-dz-q17 + aux_basis: admm-tzp-q17 basis: TZVP-MOLOPT-PBE-GTH-q17 potential: GTH-PBE-q9 K: - aux_basis: admm-dz-q9 + aux_basis: admm-tzp-q9 basis: TZVP-MOLOPT-PBE-GTH-q9 potential: GTH-PBE-q9 Kr: - aux_basis: admm-dz-q8 + aux_basis: admm-tzp-q8 basis: TZVP-MOLOPT-PBE-GTH-q8 potential: GTH-PBE-q8 La: @@ -216,7 +216,7 @@ cp2k_input: basis: null potential: GTH-PBE-q11 Li: - aux_basis: admm-dz-q3 + aux_basis: admm-tzp-q3 basis: TZVP-MOLOPT-PBE-GTH-q1 potential: GTH-PBE-q3 Lr: @@ -240,15 +240,15 @@ cp2k_input: basis: null potential: null Mg: - aux_basis: admm-dz-q10 + aux_basis: admm-tzp-q10 basis: TZVP-MOLOPT-PBE-GTH-q10 potential: GTH-PBE-q2 Mn: - aux_basis: admm-dz-q15 + aux_basis: admm-tzp-q15 basis: TZVP-MOLOPT-PBE-GTH-q15 potential: GTH-PBE-q15 Mo: - aux_basis: admm-dz-q14 + aux_basis: admm-tzp-q14 basis: TZVP-MOLOPT-PBE-GTH-q14 potential: GTH-PBE-q14 Mt: @@ -256,15 +256,15 @@ cp2k_input: basis: null potential: null N: - aux_basis: admm-dz-q5 + aux_basis: admm-tzp-q5 basis: TZVP-MOLOPT-PBE-GTH-q5 potential: GTH-PBE-q5 Na: - aux_basis: admm-dz-q9 + aux_basis: admm-tzp-q9 basis: TZVP-MOLOPT-PBE-GTH-q1 potential: GTH-PBE-q9 Nb: - aux_basis: admm-dz-q13 + aux_basis: admm-tzp-q13 basis: TZVP-MOLOPT-PBE-GTH-q13 potential: GTH-PBE-q13 Nd: @@ -272,7 +272,7 @@ cp2k_input: basis: null potential: GTH-PBE-q14 Ne: - aux_basis: admm-dz-q8 + aux_basis: admm-tzp-q8 basis: TZVP-MOLOPT-PBE-GTH-q8 potential: GTH-PBE-q8 Nh: @@ -280,7 +280,7 @@ cp2k_input: basis: null potential: null Ni: - aux_basis: admm-dz-q18 + aux_basis: admm-tzp-q18 basis: TZVP-MOLOPT-PBE-GTH-q18 potential: GTH-PBE-q18 'No': @@ -292,7 +292,7 @@ cp2k_input: basis: null potential: null O: - aux_basis: admm-dz-q6 + aux_basis: admm-tzp-q6 basis: TZVP-MOLOPT-PBE-GTH-q6 potential: GTH-PBE-q6 Og: @@ -300,11 +300,11 @@ cp2k_input: basis: null potential: null Os: - aux_basis: admm-dz-q16 + aux_basis: admm-tzp-q16 basis: TZVP-MOLOPT-PBE-GTH-q16 potential: GTH-PBE-q8 P: - aux_basis: admm-dz-q5 + aux_basis: admm-tzp-q5 basis: TZVP-MOLOPT-PBE-GTH-q5 potential: GTH-PBE-q5 Pa: @@ -312,11 +312,11 @@ cp2k_input: basis: null potential: null Pb: - aux_basis: admm-dz-q4 + aux_basis: admm-tzp-q4 basis: TZVP-MOLOPT-PBE-GTH-q4 potential: GTH-PBE-q4 Pd: - aux_basis: admm-dz-q18 + aux_basis: admm-tzp-q18 basis: TZVP-MOLOPT-PBE-GTH-q18 potential: GTH-PBE-q18 Pm: @@ -324,7 +324,7 @@ cp2k_input: basis: null potential: GTH-PBE-q15 Po: - aux_basis: admm-dz-q6 + aux_basis: admm-tzp-q6 basis: TZVP-MOLOPT-PBE-GTH-q6 potential: GTH-PBE-q6 Pr: @@ -332,7 +332,7 @@ cp2k_input: basis: null potential: GTH-PBE-q13 Pt: - aux_basis: admm-dz-q18 + aux_basis: admm-tzp-q18 basis: TZVP-MOLOPT-PBE-GTH-q18 potential: GTH-PBE-q18 Pu: @@ -344,11 +344,11 @@ cp2k_input: basis: null potential: null Rb: - aux_basis: admm-dz-q9 + aux_basis: admm-tzp-q9 basis: TZVP-MOLOPT-PBE-GTH-q9 potential: GTH-PBE-q9 Re: - aux_basis: admm-dz-q15 + aux_basis: admm-tzp-q15 basis: TZVP-MOLOPT-PBE-GTH-q15 potential: GTH-PBE-q7 Rf: @@ -360,31 +360,31 @@ cp2k_input: basis: null potential: null Rh: - aux_basis: admm-dz-q17 + aux_basis: admm-tzp-q17 basis: TZVP-MOLOPT-PBE-GTH-q17 potential: GTH-PBE-q9 Rn: - aux_basis: admm-dz-q8 + aux_basis: admm-tzp-q8 basis: TZVP-MOLOPT-PBE-GTH-q8 potential: GTH-PBE-q8 Ru: - aux_basis: admm-dz-q16 + aux_basis: admm-tzp-q16 basis: TZVP-MOLOPT-PBE-GTH-q16 potential: GTH-PBE-q8 S: - aux_basis: admm-dz-q6 + aux_basis: admm-tzp-q6 basis: TZVP-MOLOPT-PBE-GTH-q6 potential: GTH-PBE-q6 Sb: - aux_basis: admm-dz-q5 + aux_basis: admm-tzp-q5 basis: TZVP-MOLOPT-PBE-GTH-q5 potential: GTH-PBE-q5 Sc: - aux_basis: admm-dz-q11 + aux_basis: admm-tzp-q11 basis: TZVP-MOLOPT-PBE-GTH-q11 potential: GTH-PBE-q11 Se: - aux_basis: admm-dz-q6 + aux_basis: admm-tzp-q6 basis: TZVP-MOLOPT-PBE-GTH-q6 potential: GTH-PBE-q6 Sg: @@ -392,7 +392,7 @@ cp2k_input: basis: null potential: null Si: - aux_basis: admm-dz-q4 + aux_basis: admm-tzp-q4 basis: TZVP-MOLOPT-PBE-GTH-q4 potential: GTH-PBE-q4 Sm: @@ -400,15 +400,15 @@ cp2k_input: basis: null potential: GTH-PBE-q16 Sn: - aux_basis: admm-dz-q4 + aux_basis: admm-tzp-q4 basis: TZVP-MOLOPT-PBE-GTH-q4 potential: GTH-PBE-q4 Sr: - aux_basis: admm-dz-q10 + aux_basis: admm-tzp-q10 basis: TZVP-MOLOPT-PBE-GTH-q10 potential: GTH-PBE-q10 Ta: - aux_basis: admm-dz-q13 + aux_basis: admm-tzp-q13 basis: TZVP-MOLOPT-PBE-GTH-q13 potential: GTH-PBE-q5 Tb: @@ -416,11 +416,11 @@ cp2k_input: basis: null potential: GTH-PBE-q19 Tc: - aux_basis: admm-dz-q15 + aux_basis: admm-tzp-q15 basis: TZVP-MOLOPT-PBE-GTH-q15 potential: GTH-PBE-q15 Te: - aux_basis: admm-dz-q6 + aux_basis: admm-tzp-q6 basis: TZVP-MOLOPT-PBE-GTH-q6 potential: GTH-PBE-q6 Th: @@ -428,11 +428,11 @@ cp2k_input: basis: null potential: null Ti: - aux_basis: admm-dz-q12 + aux_basis: admm-tzp-q12 basis: TZVP-MOLOPT-PBE-GTH-q12 potential: GTH-PBE-q12 Tl: - aux_basis: admm-dz-q13 + aux_basis: admm-tzp-q13 basis: TZVP-MOLOPT-PBE-GTH-q13 potential: GTH-PBE-q3 Tm: @@ -448,19 +448,19 @@ cp2k_input: basis: null potential: GTH-PBE-q14 V: - aux_basis: admm-dz-q13 + aux_basis: admm-tzp-q13 basis: TZVP-MOLOPT-PBE-GTH-q13 potential: GTH-PBE-q13 W: - aux_basis: admm-dz-q14 + aux_basis: admm-tzp-q14 basis: TZVP-MOLOPT-PBE-GTH-q14 potential: GTH-PBE-q6 Xe: - aux_basis: admm-dz-q8 + aux_basis: admm-tzp-q8 basis: TZVP-MOLOPT-PBE-GTH-q8 potential: GTH-PBE-q8 Y: - aux_basis: admm-dz-q11 + aux_basis: admm-tzp-q11 basis: TZVP-MOLOPT-PBE-GTH-q11 potential: GTH-PBE-q11 Yb: @@ -468,11 +468,11 @@ cp2k_input: basis: null potential: GTH-PBE-q24 Zn: - aux_basis: admm-dz-q12 + aux_basis: admm-tzp-q12 basis: TZVP-MOLOPT-PBE-GTH-q12 potential: GTH-PBE-q20 Zr: - aux_basis: admm-dz-q12 + aux_basis: admm-tzp-q12 basis: TZVP-MOLOPT-PBE-GTH-q12 potential: GTH-PBE-q12 basis_filenames: From 6b39a445f0119cbdf4f9fcedc99440522c4d825b Mon Sep 17 00:00:00 2001 From: nwinner Date: Sat, 29 Oct 2022 09:53:03 -0700 Subject: [PATCH 03/50] Defect updates --- src/atomate2/cp2k/flows/core.py | 11 ++++---- src/atomate2/cp2k/flows/defect.py | 37 +++++++++++++++------------ src/atomate2/cp2k/jobs/defect.py | 42 ++++++++++++++++++++----------- 3 files changed, 53 insertions(+), 37 deletions(-) diff --git a/src/atomate2/cp2k/flows/core.py b/src/atomate2/cp2k/flows/core.py index a153cfdc9b..7e531da2c6 100644 --- a/src/atomate2/cp2k/flows/core.py +++ b/src/atomate2/cp2k/flows/core.py @@ -217,15 +217,14 @@ class HybridFlowMaker(Maker): def __post_init__(self): self.hybrid_maker.hybrid_functional = self.hybrid_functional - def make(self, structure: Structure, prev_cp2k_dir: str | Path | None = None) -> Job: + def make(self, *args, **kwargs) -> Flow: jobs = [] if self.initialize_with_pbe: - initialization = self.initialize_maker.make(structure, prev_cp2k_dir) + initialization = self.initialize_maker.make(*args, **kwargs) jobs.append(initialization) - hyb = self.hybrid_maker.make( - initialization.output.structure if self.initialize_with_pbe else structure, - prev_cp2k_dir=initialization.output.dir_name if self.initialize_with_pbe else prev_cp2k_dir - ) + hyb = self.hybrid_maker.make(initialization.output.structure, prev_cp2k_dir=initialization.output.dir_name) + else: + hyb = self.hybrid_maker.make(*args, **kwargs) jobs.append(hyb) return Flow(jobs, output=hyb.output, name=self.name) diff --git a/src/atomate2/cp2k/flows/defect.py b/src/atomate2/cp2k/flows/defect.py index df11cc2bb0..b67aa6da03 100644 --- a/src/atomate2/cp2k/flows/defect.py +++ b/src/atomate2/cp2k/flows/defect.py @@ -40,6 +40,7 @@ logger = logging.getLogger(__name__) + # TODO close to being able to put this in common. Just need a switch that decides which core flow/job to use based on software @dataclass class FormationEnergyMaker(Maker): @@ -84,35 +85,36 @@ def __post_init__(self): initialize_with_pbe=self.initialize_with_pbe, hybrid_functional=self.hybrid_functional, hybrid_maker=HybridCellOptMaker( - input_set_generator=DefectHybridCellOptSetGenerator() + input_set_generator=DefectHybridCellOptSetGenerator(), + task_document_kwargs={"average_v_hartree": True, "store_volumetric_data": ("v_hartree",)} ) ) else: self.bulk_maker = CellOptMaker( - input_set_generator=DefectCellOptSetGenerator() + input_set_generator=DefectCellOptSetGenerator(), + task_document_kwargs={"average_v_hartree": True, "store_volumetric_data": ("v_hartree",)} ) elif self.run_bulk == "static": if self.hybrid_functional: self.bulk_maker = HybridStaticFlowMaker( hybrid_functional=self.hybrid_functional, hybrid_maker=HybridStaticMaker( - input_set_generator=DefectHybridStaticSetGenerator() + input_set_generator=DefectHybridStaticSetGenerator(), + task_document_kwargs={"average_v_hartree": True, "store_volumetric_data": ("v_hartree",)} ) ) else: self.bulk_maker = StaticMaker( - input_set_generator=DefectStaticSetGenerator() + input_set_generator=DefectStaticSetGenerator(), + task_document_kwargs={"average_v_hartree": True, "store_volumetric_data": ("v_hartree",)} ) - # TODO Can probably put this somewhere else? - self.bulk_maker.task_document_kwargs.update({"average_v_hartree": True, "store_volumetric_data": ("v_hartree",)}) - if self.hybrid_functional: self.def_maker = HybridRelaxFlowMaker( hybrid_functional=self.hybrid_functional, initialize_with_pbe=self.initialize_with_pbe, initialize_maker=DefectStaticMaker(), - hybrid_maker=HybridRelaxMaker() + hybrid_maker=DefectHybridRelaxMaker(), ) else: self.def_maker = DefectRelaxMaker() @@ -160,21 +162,24 @@ def make( for defect in defects: chgs = defect.get_charge_states() if run_all_charges else [0] for charge in chgs: - defect_job = self.def_maker.make(defect=deepcopy(defect), charge=charge) + defect_job = self.def_maker.make(deepcopy(defect), charge) jobs.append(defect_job) defect_outputs[defect.name][int(charge)] = (defect, defect_job.output) - jobs.append(collect_defect_outputs( - defect_outputs=defect_outputs, - bulk_output=bulk_job.output, - dielectric=dielectric - ) - ) + if self.run_bulk and defects: + collect_job = collect_defect_outputs( + defect_outputs=defect_outputs, + bulk_output=bulk_job.output if self.run_bulk else None, + dielectric=dielectric + ) + jobs.append(collect_job) + else: + collect_job = None return Flow( jobs=jobs, name=self.name, - output=jobs[-1].output, + output=jobs[-1].output if collect_job else None, ) # TODO this is totally code agnostic and should be in common diff --git a/src/atomate2/cp2k/jobs/defect.py b/src/atomate2/cp2k/jobs/defect.py index df34dddd07..7a31173310 100644 --- a/src/atomate2/cp2k/jobs/defect.py +++ b/src/atomate2/cp2k/jobs/defect.py @@ -9,6 +9,7 @@ from tkinter import W from numpy.typing import NDArray +from pymatgen.core import Structure from pymatgen.analysis.defects.core import Defect, Vacancy from atomate2.cp2k.sets.base import Cp2kInputGenerator from atomate2.cp2k.sets.defect import ( @@ -36,21 +37,32 @@ class BaseDefectMaker(BaseCp2kMaker): force_diagonal: bool = field(default=False) @cp2k_job - def make(self, defect: Defect, charge: int = 0, prev_cp2k_dir: str | Path | None = None): - if isinstance(defect, Vacancy): - defect = GhostVacancy( - structure=defect.structure, site=defect.site, - multiplicity=defect.multiplicity, oxi_state=defect.oxi_state, - symprec=defect.symprec, angle_tolerance=defect.angle_tolerance - ) - structure = defect.get_supercell_structure( - sc_mat=self.supercell_matrix, - dummy_species=None, - min_atoms=self.min_atoms, - max_atoms=self.max_atoms, - min_length=self.min_length, - force_diagonal=self.force_diagonal, - ) + def make(self, defect: Defect | Structure, charge: int = 0, prev_cp2k_dir: str | Path | None = None): + if isinstance(defect, Defect): + if isinstance(defect, Vacancy): + defect = GhostVacancy( + structure=defect.structure, site=defect.site, + multiplicity=defect.multiplicity, oxi_state=defect.oxi_state, + symprec=defect.symprec, angle_tolerance=defect.angle_tolerance + ) + structure = defect.get_supercell_structure( + sc_mat=self.supercell_matrix, + dummy_species=None, + min_atoms=self.min_atoms, + max_atoms=self.max_atoms, + min_length=self.min_length, + force_diagonal=self.force_diagonal, + ) + self.write_additional_data.update( + { + "info.json": { + "defect": deepcopy(defect), + "defect_charge": charge, + "sc_mat": self.supercell_matrix} + } + ) + else: + structure = deepcopy(defect) structure.set_charge(charge) # provenance stuff self.write_additional_data.update( From 2951a966d205c45424c38ffb318f282375b4cec6 Mon Sep 17 00:00:00 2001 From: nwinner Date: Mon, 31 Oct 2022 10:22:47 -0700 Subject: [PATCH 04/50] Working on defects with builder --- src/atomate2/cp2k/builders/defect.py | 897 +++++++++++++++++++++++++++ src/atomate2/cp2k/flows/defect.py | 58 +- src/atomate2/cp2k/jobs/defect.py | 11 +- src/atomate2/cp2k/schemas/defect.py | 94 ++- 4 files changed, 976 insertions(+), 84 deletions(-) diff --git a/src/atomate2/cp2k/builders/defect.py b/src/atomate2/cp2k/builders/defect.py index e69de29bb2..354502650d 100644 --- a/src/atomate2/cp2k/builders/defect.py +++ b/src/atomate2/cp2k/builders/defect.py @@ -0,0 +1,897 @@ +from datetime import datetime +from itertools import chain, groupby, combinations +from re import A +from tkinter import W +from typing import Dict, Iterator, List, Literal, Optional +from copy import deepcopy +from math import ceil +from monty.json import MontyDecoder, jsanitize + +from maggma.builders import Builder +from maggma.stores import Store +from maggma.utils import grouper + +from pymatgen.core import Structure +from pymatgen.analysis.structure_matcher import ElementComparator, StructureMatcher +from pymatgen.symmetry.analyzer import SpacegroupAnalyzer + +from atomate.utils.utils import load_class + +from emmet.core.thermo import ThermoDoc +from emmet.core.material import MaterialsDoc + +from emmet.builders.settings import EmmetBuildSettings + +from atomate2.settings import Atomate2Settings +from atomate2.cp2k.schemas.task import TaskDocument +from atomate2.cp2k.schemas.defect import DefectDoc +from atomate2.cp2k.schemas.calc_types import TaskType +from atomate2.cp2k.schemas.calc_types.utils import run_type + +from emmet.core.electronic_structure import ElectronicStructureDoc + +__author__ = "Nicholas Winner " + + +# TODO this builder is very close to being code agnostic. We need only resolve the standard key names and +# how they are fed to the DefectDoc class. e.g. VASP calcs store "locpot", but CP2K store "v_hartree" +class DefectBuilder(Builder): + """ + The DefectBuilder collects task documents performed on structures containing a single point defect. + The builder is intended to group tasks corresponding to the same defect (species including charge state), + find the best ones, and perform finite-size defect corrections to create a defect document. These + defect documents can then be assembled into defect phase diagrams using the DefectThermoBuilder. + + In order to make the build process easier, an entry must exist inside of the task doc that identifies it + as a point defect calculation. Currently this is the Pymatgen defect object keyed by "defect". In the future, + this may be changed to having a defect transformation in the transformation history. + + The process is as follows: + + 1.) Find all documents containing the defect query. + 2.) Find all documents that do not contain the defect query, and which have DOS and dielectric data already + calculated. These are the candidate bulk tasks. + 3.) For each candidate defect task, attempt to match to a candidate bulk task of the same number of sites + (+/- 1) with the required properties for analysis. Reject defects that do not have a corresponding + bulk calculation. + 4.) Convert (defect, bulk task) doc pairs to DefectDocs + 5.) Post-process and validate defect document + 6.) Update the defect store + """ + + #TODO how to incorporate into settings? + DEFAULT_ALLOWED_TASKS = [ + TaskType.Structure_Optimization.value, + TaskType.Static.value + ] + + def __init__( + self, + tasks: Store, + defects: Store, + dielectric: Store, + electronic_structure: Store, + materials: Store, + electrostatic_potentials: Store, + task_validation: Optional[Store] = None, + query: Optional[Dict] = None, + bulk_query: Optional[Dict] = None, + allowed_task_types: Optional[List[str]] = DEFAULT_ALLOWED_TASKS, + task_schema: Literal["cp2k"] = "cp2k", # TODO cp2k specific right now, but this will go in common eventually + settings: Dict | None = None, + **kwargs, + ): + """ + Args: + tasks: Store of task documents + defects: Store of defect documents to generate + dielectric: Store of dielectric data + electronic_structure: Store of electronic structure data + materials: Store of materials documents + electrostatic_potentials: Store of electrostatic potential data. These + are generally stored in seperately from the tasks on GridFS due to their size. + task_validation: Store of task validation documents. + query: dictionary to limit tasks to be analyzed. NOT the same as the defect_query property + allowed_task_types: list of task_types that can be processed + settings: EmmetBuildSettings object + """ + + self.tasks = tasks + self.defects = defects + self.materials = materials + self.dielectric = dielectric + self.electronic_structure = electronic_structure + self.electrostatic_potentials = electrostatic_potentials + self.task_validation = task_validation + self.allowed_task_types = allowed_task_types #TODO How to incorporate into getitems? + + self._allowed_task_types = {TaskType(t) for t in self.allowed_task_types} + settings = settings if settings else {} + self.settings = Atomate2Settings(**settings) # TODO don't think this is right + self.query = query if query else {} + self.bulk_query = bulk_query if bulk_query else {} + self.timestamp = None + self._mpid_map = {} + self.task_schema = task_schema + self.kwargs = kwargs + + # TODO Long term, schemas should be part of the matching and grouping process so that a builder can be run on a mixture + self.query.update({'output.@module': f"atomate2.{self.task_schema}.schemas.task", "output.@class": "TaskDocument"}) + self.bulk_query.update({'output.@module': f"atomate2.{self.task_schema}.schemas.task", "output.@class": "TaskDocument"}) + self._defect_query = 'output.additional_json.info.defect' + + self._required_defect_properties = [ + self._defect_query, + self.tasks.key, + 'output.output.energy', + 'output.output.structure', + 'output.input', + 'output.nsites', + 'output.cp2k_objects.v_hartree' + ] + + self._required_bulk_properties = [ + self.tasks.key, + 'output.output.energy', + 'output.output.structure', + 'output.input', + 'output.cp2k_objects.v_hartree' + ] + + self._optional_defect_properties = [] + self._optional_bulk_properties = [] + + sources = [tasks, dielectric, electronic_structure, materials, electrostatic_potentials] + if self.task_validation: + sources.append(self.task_validation) + super().__init__(sources=sources, targets=[defects], **kwargs) + + @property + def defect_query(self) -> str: + """ + The standard query for defect tasks. + """ + return self._defect_query + + #TODO Hartree pot should be required but only for charged defects + @property + def required_defect_properties(self) -> List: + """ + Properties essential to processing a defect task. + """ + return self._required_defect_properties + + @property + def required_bulk_properties(self) -> List: + """ + Properties essential to processing a bulk task. + """ + return self._required_bulk_properties + + @property + def optional_defect_properties(self) -> List: + """ + Properties that are optional for processing a defect task. + """ + return self._optional_defect_properties + + @property + def optional_bulk_properties(self) -> List: + """ + Properties that are optional for bulk tasks. + """ + return self._optional_bulk_properties + + @property + def mpid_map(self) -> Dict: + return self._mpid_map + + def ensure_indexes(self): + """ + Ensures indicies on the tasks and materials collections + """ + + # Basic search index for tasks + self.tasks.ensure_index(self.tasks.key) + self.tasks.ensure_index("output.last_updated") + self.tasks.ensure_index("output.state") + self.tasks.ensure_index("output.formula_pretty") # TODO is necessary? + + # Search index for materials + self.materials.ensure_index("material_id") + self.materials.ensure_index("last_updated") + self.materials.ensure_index("task_ids") + + # Search index for defects + self.defects.ensure_index("material_id") + self.defects.ensure_index("last_updated") + self.defects.ensure_index("task_ids") + + if self.task_validation: + self.task_validation.ensure_index("task_id") + self.task_validation.ensure_index("valid") + + def prechunk(self, number_splits: int) -> Iterator[Dict]: + + tag_query = {} + if len(self.settings.BUILD_TAGS) > 0 and len(self.settings.EXCLUDED_TAGS) > 0: + tag_query["$and"] = [ + {"tags": {"$in": self.settings.BUILD_TAGS}}, + {"tags": {"$nin": self.settings.EXCLUDED_TAGS}}, + ] + elif len(self.settings.BUILD_TAGS) > 0: + tag_query["tags"] = {"$in": self.settings.BUILD_TAGS} + + # Get defect tasks + temp_query = self.query.copy() + temp_query.update(tag_query) + temp_query.update({d: {'$exists': True, "$ne": None} for d in self.required_defect_properties}) + temp_query.update({self.defect_query: {'$exists': True}, "state": "successful"}) + defect_tasks = { + doc[self.tasks.key] + for doc in self.tasks.query(criteria=temp_query, properties=[self.tasks.key]) + } + + # Get bulk tasks + temp_query = self.bulk_query.copy() + temp_query.update(tag_query) + temp_query.update({d: {'$exists': True} for d in self.required_bulk_properties}) + temp_query.update({self.defect_query: {'$exists': False}, "state": "successful"}) + bulk_tasks = { + doc[self.tasks.key] + for doc in self.tasks.query(criteria=temp_query, properties=[self.tasks.key]) + } + + N = ceil(len(defect_tasks) / number_splits) + for task_chunk in grouper(defect_tasks, N): + yield {"query": {"task_id": {"$in": task_chunk + list(bulk_tasks)}}} + + def get_items(self) -> Iterator[List[Dict]]: + """ + Gets all items to process into defect documents. + This does no datetime checking; relying on on whether + task_ids are included in the Defect Collection. + + The procedure is as follows: + + 1. Get all tasks with standard "defect" query tag + 2. Filter all tasks by skipping tasks which are already in the Defect Store + 3. Get all tasks that could be used as bulk + 4. Filter all bulks which do not have corresponding Dielectric and + ElectronicStructure data (if a band gap exists for that task). + 5. Group defect tasks by defect matching + 6. Given defect object in a group, bundle them with bulk tasks + identified with structure matching + 7. Yield the item bundles + + Returns: + Iterator of (defect documents, task bundles) + + The defect document is an existing defect doc to be updated with new data, or None + + task bundles bundle are all the tasks that correspond to the same defect and all possible + bulk tasks that could be matched to them. +d """ + + self.logger.info("Defect builder started") + self.logger.info( + f"Allowed task types: {[task_type.value for task_type in self._allowed_task_types]}" + ) + + self.logger.info("Setting indexes") + self.ensure_indexes() + + # Save timestamp to mark buildtime for material documents + self.timestamp = datetime.utcnow() + + self.logger.info("Finding tasks to process") + + # Get defect tasks + temp_query = self.query.copy() + temp_query.update({d: {'$exists': True, "$ne": None} for d in self.required_defect_properties}) + temp_query.update({self.defect_query: {'$exists': True}, "output.state": "successful"}) + defect_tasks = { + doc[self.tasks.key] + for doc in self.tasks.query(criteria=temp_query, properties=[self.tasks.key]) + } + + # Get bulk tasks + temp_query = self.bulk_query.copy() + temp_query.update({d: {'$exists': True} for d in self.required_bulk_properties}) + temp_query.update({self.defect_query: {'$exists': False}, "output.state": "successful"}) + bulk_tasks = { + doc[self.tasks.key] + for doc in self.tasks.query(criteria=temp_query, properties=[self.tasks.key]) + } + + # TODO Not the same validation behavior as material builders? + # If validation store exists, find tasks that are invalid and remove them + if self.task_validation: + validated = { + doc[self.tasks.key] + for doc in self.task_validation.query( + {}, [self.task_validation.key] + ) + } + + defect_tasks = defect_tasks.intersection(validated) + bulk_tasks = bulk_tasks.intersection(validated) + + invalid_ids = { + doc[self.tasks.key] + for doc in self.task_validation.query( + {"is_valid": False}, [self.task_validation.key] + ) + } + self.logger.info("Removing {} invalid tasks".format(len(invalid_ids))) + defect_tasks = defect_tasks - invalid_ids + bulk_tasks = bulk_tasks - invalid_ids + + processed_defect_tasks = { + t_id + for d in self.defects.query({}, ["task_ids"]) + for t_id in d.get("task_ids", []) + } + + all_tasks = defect_tasks | bulk_tasks + + self.logger.debug("All tasks: {}".format(len(all_tasks))) + self.logger.debug("Bulk tasks before filter: {}".format(len(bulk_tasks))) + bulk_tasks = set(filter(self.__preprocess_bulk, bulk_tasks)) + self.logger.debug("Bulk tasks after filter: {}".format(len(bulk_tasks))) + self.logger.debug("All defect tasks: {}".format(len(defect_tasks))) + unprocessed_defect_tasks = defect_tasks - processed_defect_tasks + + if not unprocessed_defect_tasks: + self.logger.info("No unprocessed defect tasks. Exiting") + return + elif not bulk_tasks: + self.logger.info("No compatible bulk calculations. Exiting.") + return + + self.logger.info(f"Found {len(unprocessed_defect_tasks)} unprocessed defect tasks") + self.logger.info(f"Found {len(bulk_tasks)} bulk tasks with dielectric properties") + + # Set total for builder bars to have a total + self.total = len(unprocessed_defect_tasks) + + # yield list of defects that are of the same type, matched to an appropriate bulk calc + self.logger.info(f"Starting defect matching.") + + for defect, defect_task_group in self.__filter_and_group_tasks(unprocessed_defect_tasks): + task_ids = self.__match_defects_to_bulks(bulk_tasks, defect_task_group) + if not task_ids: + continue + doc = self.__get_defect_doc(defect) + item_bundle = self.__get_item_bundle(task_ids) + material_id = self.mpid_map[item_bundle[0][1][self.tasks.key]] + yield doc, item_bundle, material_id + + def process_item(self, items): + """ + Process a group of defect tasks that correspond to the same defect into a single defect + document. If the DefectDoc already exists, then update it and return it. If it does not, + create a new DefectDoc + + Args: + items: (DefectDoc or None, [(defect task dict, bulk task dict, dielectric dict), ... ] + + returns: the defect document as a dictionary + """ + defect_doc, item_bundle, material_id = items + self.logger.info(f"Processing group of {len(item_bundle)} defects into DefectDoc") + if item_bundle: + defect_tasks, bulk_tasks, dielectrics = list(zip(*item_bundle)) + if defect_doc: + defect_doc.update_all( + defect_tasks=defect_tasks, bulk_tasks=bulk_tasks, + dielectrics=dielectrics, query=self.defect_query + ) + else: + defect_doc = DefectDoc.from_tasks( + defect_tasks=defect_tasks, bulk_tasks=bulk_tasks, dielectrics=dielectrics, + query=self.defect_query, key=self.tasks.key, material_id=material_id + ) + return defect_doc.dict() + return {} + + def update_targets(self, items): + """ + Inserts the new task_types into the task_types collection + """ + + items = [item for item in items if item] + + if len(items) > 0: + self.logger.info(f"Updating {len(items)} defects") + for item in items: + item.update({"_bt": self.timestamp}) + self.defects.remove_docs( + { + "task_ids": item['task_ids'], + } + ) + self.defects.update( + docs=jsanitize(items, allow_bson=True), + key='task_ids', + ) + else: + self.logger.info("No items to update") + + def __filter_and_group_tasks(self, tasks): + """ + Groups defect tasks. Tasks are grouped according to the reduced representation + of the defect, and so tasks with different settings (e.g. supercell size, functional) + will be grouped together. + + Args: + tasks: task_ids for unprocessed defects + + returns: + [ (defect, [task_ids] ), ...] where task_ids correspond to the same defect + """ + + props = [ + self.defect_query, + self.tasks.key, + 'output.structure' + ] + + self.logger.debug(f"Finding equivalent tasks for {len(tasks)} defects") + + sm = StructureMatcher(allow_subset=False) #TODO build settings + defects = [ + { + self.tasks.key: t[self.tasks.key], 'defect': self.__get_defect_from_task(t), + 'structure': Structure.from_dict(t['output']['structure']) + } + for t in self.tasks.query(criteria={self.tasks.key: {'$in': list(tasks)}}, properties=props) + ] + for d in defects: + # TODO remove oxidation state because spins/oxidation cause errors in comparison. + # but they shouldnt if those props are close in value + d['structure'].remove_oxidation_states() + + def key(x): + s = x['defect'].structure + return get_sg(s), s.composition.reduced_composition + + def are_equal(x, y): + """ + To decide if defects are equal. Either the defect objects are + equal, OR two different defect objects relaxed to the same final structure + (common with interstitials). +:w + + TODO Need a way to do the output structure comparison for a X atom defect cell + TODO which can be embedded in a Y atom defect cell up to tolerance. + """ + if x['defect'] == y['defect']: + return True + + # TODO This is needed for ghost vacancy unfortunately, since sm.fit can't distinguish ghosts + if x['defect'].defect_composition == y['defect'].defect_composition and \ + x['defect'].charge == y['defect'].charge and \ + sm.fit(x['structure'], y['structure']): + return True + return False + + sorted_s_list = sorted(enumerate(defects), key=lambda x: key(x[1])) + all_groups = [] + + # For each pre-grouped list of structures, perform actual matching. + for k, g in groupby(sorted_s_list, key=lambda x: key(x[1])): + unmatched = list(g) + while len(unmatched) > 0: + i, refs = unmatched.pop(0) + matches = [i] + inds = list(filter(lambda j: are_equal(refs, unmatched[j][1]), list(range(len(unmatched))))) + matches.extend([unmatched[i][0] for i in inds]) + unmatched = [unmatched[i] for i in range(len(unmatched)) if i not in inds] + all_groups.append( + (defects[i]['defect'], [defects[i][self.tasks.key] for i in matches]) + ) + + self.logger.debug(f"All groups {all_groups}") + return all_groups + + def __get_defect_from_task(self, task): + """ + Using the defect_query property, retrieve a pymatgen defect object from the task document + """ + defect = unpack(self.defect_query, task) + return MontyDecoder().process_decoded(defect) + + def __get_defect_doc(self, defect): + """ + Given a defect, find the DefectDoc corresponding to it in the defects store if it exists + + returns: DefectDoc or None + """ + material_id = self._get_mpid(defect.structure) + docs = [ + DefectDoc(**doc) + for doc in self.defects.query(criteria={'material_id': material_id}, properties=None) + ] + for doc in docs: + if defect == doc.defect: + return doc + return None + + # TODO should move to returning dielectric doc or continue returning the total diel tensor? + def __get_dielectric(self, key): + """ + Given a bulk task's task_id, find the material_id, and then use it to query the dielectric store + and retrieve the total dielectric tensor for defect analysis. If no dielectric exists, as would + be the case for metallic systems, return None. + """ + for diel in self.dielectric.query(criteria={"material_id": key}, properties=['total']): + return diel['total'] + return None + + #TODO retrieving the electrostatic potential is by far the most expesive part of the builder. Any way to reduce? + def __get_item_bundle(self, task_ids): + """ + Gets a group of items that can be processed together into a defect document. + + Args: + bulk_tasks: possible bulk tasks to match to defects + defect_task_group: group of equivalent defects (defined by PointDefectComparator) + + returns: [(defect task dict, bulk_task_dict, dielectric dict), ...] + """ + return [ + ( + self.tasks.query_one(criteria={self.tasks.key: defect_tasks_id}), + self.tasks.query_one(criteria={self.tasks.key: bulk_tasks_id}), + self.__get_dielectric(self._mpid_map[bulk_tasks_id]), + ) + for defect_tasks_id, bulk_tasks_id in task_ids + ] + + def _get_mpid(self, structure): + """ + Given a structure, determine if an equivalent structure exists, with a material_id, + in the materials store. + + Args: + structure: Candidate structure + + returns: material_id, if one exists, else None + """ + sga = SpacegroupAnalyzer(structure, symprec=self.settings.SYMPREC) # TODO Add angle tolerance + mats = self.materials.query( + criteria={ + 'chemsys': structure.composition.chemical_system, + }, properties=['structure', 'material_id'] + ) + # TODO coudl more than one material match true? + sm = StructureMatcher() # TODO add tolerances + for m in mats: + if sm.fit(structure, Structure.from_dict(m['structure'])): + return m['material_id'] + return None + + def __match_defects_to_bulks(self, bulk_ids, defect_ids): + """ + Given task_ids of bulk and defect tasks, match the defects to a bulk task that has + commensurate: + + - Composition + - Number of sites + - Symmetry + + """ + + self.logger.debug(f"Finding bulk/defect task combinations.") + self.logger.debug(f"Bulk tasks: {bulk_ids}") + self.logger.debug(f"Defect tasks: {defect_ids}") + + # TODO mongo projection on array doesn't work (see above) + props = [ + self.tasks.key, + self.defect_query, + 'output.input', + 'output.nsites', + 'output.output.structure', + "output.additional_json.info.sc_mat" + ] + defects = list(self.tasks.query(criteria={self.tasks.key: {'$in': list(defect_ids)}}, properties=props)) + ps = self.__get_pristine_supercell(defects[0]) + ps.remove_oxidation_states() # TODO might cause problems + bulks = list( + self.tasks.query( + criteria={ + self.tasks.key: {'$in': list(bulk_ids)}, + 'output.composition_reduced': jsanitize(ps.composition.to_reduced_dict), + }, + properties=props + ) + ) + + # TODO add settings + sm = StructureMatcher( + primitive_cell=False, + scale=True, + attempt_supercell=False, + allow_subset=False, + comparator=ElementComparator(), + ) + + def _compare(b, d): + rtb = b.get('output').get('input').get('xc').split("+U")[0] + rtd = d.get('output').get('input').get('xc').split("+U")[0] + if rtb == rtd: + if sm.fit(self.__get_pristine_supercell(d), self.__get_pristine_supercell(b)): + return True + return False + + pairs = [ + (defect[self.tasks.key], bulk[self.tasks.key]) + for bulk in bulks + for defect in defects + if _compare(bulk, defect) + ] + + self.logger.debug(f"Found {len(pairs)} commensurate bulk/defect pairs") + return pairs + + def __preprocess_bulk(self, task): + """ + Given a TaskDoc that could be a bulk for defect analysis, check to see if it can be used. Bulk + tasks must have: + + (1) Correspond to an existing material_id in the materials store + (2) If the bulk is not a metal, then the dielectric tensor must exist in the dielectric store + (3) If bulk is not a metal, electronic structure document must exist in the store + + """ + self.logger.debug("Preprocessing bulk task {}".format(task)) + t = next(self.tasks.query(criteria={self.tasks.key: task}, properties=['output.output.structure', 'mpid'])) + + struc = Structure.from_dict(t.get('output').get('output').get('structure')) # TODO specific to atomate2 + mpid = self._get_mpid(struc) + if not mpid: + self.logger.debug(f"No material id found for bulk task {task}") + return False + self._mpid_map[task] = mpid + self.logger.debug(f"Material ID: {mpid}") + + elec = self.electronic_structure.query_one( + properties=['band_gap'], criteria={self.electronic_structure.key: mpid} + ) + if not elec: + self.logger.debug(f"Electronic structure data not found for {mpid}") + return False + + # TODO right now pulling dos from electronic structure, should just pull summary document + if elec['band_gap'] > 0: + diel = self.__get_dielectric(mpid) + if not diel: + self.logger.info(f"Task {task} for {mpid} ({struc.composition.reduced_formula}) requires " + f"dielectric properties, but none found in dielectric store") + return False + + return True + + def __get_pristine_supercell(self, task): + """ + Given a task document for a defect calculation, retrieve the un-defective, pristine supercell. + - If defect transform exists, the following transform's input will be returned + - If no follow up transform exists, the calculation input will be returned + + If defect cannot be found in task, return the input structure. + """ + d = unpack(query=self.defect_query, d=task) + if d: + defect = MontyDecoder().process_decoded(d) + sc_mat = task.get('output', {}).get('additional_json', {}).get("info", {}).get('sc_mat') + s = defect.structure.copy() + s.make_supercell(sc_mat) + return s + else: + return MontyDecoder().process_decoded(task['output']['output']['structure']) + +#TODO Major problem with this builder. materials store is used to sync the diel, elec, and pd with a single material id +#TODO This is a problem because the material id in vasp store is not synced to cp2k store +#TODO Also the chempots needed to adjust entries must come from cp2k, but you need to give vasp to sync the others +class DefectThermoBuilder(Builder): + + """ + This builder creates collections of the DefectThermoDoc object. + + (1) Find all DefectDocs that correspond to the same bulk material + given by material_id + (2) Create a new DefectThermoDoc for all of those documents + (3) Insert/Update the defect_thermos store with the new documents + """ + + def __init__( + self, + defects: Store, + defect_thermos: Store, + materials: Store, + thermo: Store, + electronic_structures: Store, + dos: Store, + query: Optional[Dict] = None, + **kwargs, + ): + """ + Args: + defects: Store of defect documents (generated by DefectBuilder) + defect_thermos: Store of DefectThermoDocs to generate. + materials: Store of MaterialDocs to construct phase diagram + electronic_structures: Store of DOS objects + query: dictionary to limit tasks to be analyzed + """ + + self.defects = defects + self.defect_thermos = defect_thermos + self.materials = materials + self.thermo = thermo + self.dos = dos + self.electronic_structures = electronic_structures + + self.query = query if query else {} + self.timestamp = None + self.kwargs = kwargs + + super().__init__(sources=[defects, materials, thermo, electronic_structures, dos], targets=[defect_thermos], **kwargs) + + def ensure_indexes(self): + """ + Ensures indicies on the collections + """ + + # Basic search index for tasks + self.defects.ensure_index("material_id") + self.defects.ensure_index("defect_id") + + # Search index for materials + self.defect_thermos.ensure_index("material_id") + + # TODO need to only process new tasks. Fast builder so currently is OK for small collections + def get_items(self) -> Iterator[List[Dict]]: + """ + Gets items to process into DefectThermoDocs. + + returns: + iterator yielding tuples containing: + - group of DefectDocs belonging to the same bulk material as indexed by material_id, + - materials in the chemsys of the bulk material for constructing phase diagram + - Dos of the bulk material for constructing phase diagrams/getting doping + + """ + + self.logger.info("Defect thermo builder started") + self.logger.info("Setting indexes") + self.ensure_indexes() + + # Save timestamp to mark build time for defect thermo documents + self.timestamp = datetime.utcnow() + + # Get all tasks + self.logger.info("Finding tasks to process") + temp_query = dict(self.query) + temp_query["state"] = "successful" + + #unprocessed_defect_tasks = all_tasks - processed_defect_tasks + + all_docs = [doc for doc in self.defects.query(self.query)] + + self.logger.debug(f"Found {len(all_docs)} defect docs to process") + + def filterfunc(x): + # material for defect x exists + if not list(self.materials.query(criteria={'material_id': x['material_id']}, properties=None)): + self.logger.debug(f"No material with MPID={x['material_id']} in the material store") + return False + + for el in load_class(x['defect']['@module'], x['defect']['@class']).from_dict(x['defect']).defect_composition: + if not list(self.thermo.query(criteria={'chemsys': str(el)}, properties=None)): + self.logger.debug(f"No entry for {el} in Thermo Store") + return False + + return True + + for key, group in groupby( + filter( + filterfunc, + sorted(all_docs, key=lambda x: x['material_id']) + ), key=lambda x: x['material_id'] + ): + group = [g for g in group] + try: + mat = self.__get_materials(key) + thermo = self.__get_thermos(mat.composition) + elec = self.__get_electronic_structure(group[0]['material_id']) + yield (group, mat, thermo, elec) + except LookupError as exception: + raise exception + + def process_item(self, docs): + """ + Process a group of defects belonging to the same material into a defect thermo doc + """ + self.logger.info(f"Processing defects") + defects, material, thermos, elec_struc = docs + defects = [DefectDoc(**d) for d in defects] + thermos = [ThermoDoc(**t) for t in thermos] + defect_thermo_doc = DefectThermoDoc.from_docs(defects, thermos=thermos, electronic_structure=elec_struc) + return defect_thermo_doc.dict() + + def update_targets(self, items): + """ + Inserts the new DefectThermoDocs into the defect_thermos store + """ + items = [item for item in items if item] + for item in items: + item.update({"_bt": self.timestamp}) + + if len(items) > 0: + self.logger.info(f"Updating {len(items)} defect thermo docs") + self.defect_thermos.update( + docs=jsanitize(items, allow_bson=True), + key=self.defect_thermos.key, + ) + else: + self.logger.info("No items to update") + + def __get_electronic_structure(self, material_id): + """ + Gets the electronic structure of the bulk material + """ + self.logger.info(f"Getting electronic structure for {material_id}") + + # TODO This is updated to return the whole query because a.t.m. the + # DOS part of the electronic builder isn't working, so I'm using + # this to pull direct from the store of dos objects with no processing. + dosdoc = self.electronic_structures.query_one( + criteria={self.electronic_structures.key: material_id}, + properties=None, + ) + t_id = ElectronicStructureDoc(**dosdoc).dos.total['1'].task_id + dos = self.dos.query_one(criteria={'task_id': int(t_id)}, properties=None) #TODO MPID str/int issues + return dos + + def __get_materials(self, key) -> List: + """ + Given a group of DefectDocs, use the bulk material_id to get materials in the chemsys from the + materials store. + """ + bulk = self.materials.query_one(criteria={'material_id': key}, properties=None) + if not bulk: + raise LookupError( + f"The bulk material ({key}) for these defects cannot be found in the materials store" + ) + return MaterialsDoc(**bulk) + + def __get_thermos(self, composition) -> List: + return list(self.thermo.query(criteria={'elements': {"$size": 1}}, properties=None)) + + +def unpack(query, d): + """ + Unpack a mongo-style query into dictionary retrieval + """ + if not d: + return None + if not query: + return d + if isinstance(d, List): + return unpack(query[1:], d.__getitem__(int(query.pop(0)))) + if isinstance(query, str): + for seperator in [".", ":", "->"]: + tmp = query.split(seperator) + if len(tmp) > 1: + return unpack(query.split("."), d) + return unpack(query[1:], d.__getitem__(query.pop(0))) + +# TODO SHOULD GO IN COMMON +def get_sg(struc, symprec=.01) -> int: + """helper function to get spacegroup with a loose tolerance""" + try: + return struc.get_space_group_info(symprec=symprec)[1] + except Exception: + return -1 \ No newline at end of file diff --git a/src/atomate2/cp2k/flows/defect.py b/src/atomate2/cp2k/flows/defect.py index b67aa6da03..d23c914822 100644 --- a/src/atomate2/cp2k/flows/defect.py +++ b/src/atomate2/cp2k/flows/defect.py @@ -40,6 +40,23 @@ logger = logging.getLogger(__name__) +@dataclass +class DefectHybridStaticFlowMaker(HybridStaticFlowMaker): + + initialize_maker: BaseCp2kMaker = field(default_factory=DefectStaticMaker) + hybrid_maker: BaseCp2kMaker = field(default_factory=DefectHybridStaticMaker) + +@dataclass +class DefectHybridRelaxFlowMaker(HybridRelaxFlowMaker): + + initialize_maker: BaseCp2kMaker = field(default_factory=DefectStaticMaker) + hybrid_maker: BaseCp2kMaker = field(default_factory=DefectHybridRelaxMaker) + +@dataclass +class DefectHybridCellOptFlowMaker(HybridCellOptFlowMaker): + + initialize_maker: BaseCp2kMaker = field(default_factory=DefectStaticMaker) + hybrid_maker: BaseCp2kMaker = field(default_factory=DefectHybridCellOptMaker) # TODO close to being able to put this in common. Just need a switch that decides which core flow/job to use based on software @dataclass @@ -81,40 +98,28 @@ class FormationEnergyMaker(Maker): def __post_init__(self): if self.run_bulk == 'relax': if self.hybrid_functional: - self.bulk_maker = HybridCellOptFlowMaker( - initialize_with_pbe=self.initialize_with_pbe, - hybrid_functional=self.hybrid_functional, - hybrid_maker=HybridCellOptMaker( - input_set_generator=DefectHybridCellOptSetGenerator(), - task_document_kwargs={"average_v_hartree": True, "store_volumetric_data": ("v_hartree",)} - ) + self.bulk_maker = DefectHybridCellOptMaker( + name="bulk hybrid relax", + initialize_with_pbe=self.initialize_with_pbe, + hybrid_functional=self.hybrid_functional ) else: - self.bulk_maker = CellOptMaker( - input_set_generator=DefectCellOptSetGenerator(), - task_document_kwargs={"average_v_hartree": True, "store_volumetric_data": ("v_hartree",)} - ) + self.bulk_maker = DefectCellOptMaker(name="bulk relax") + elif self.run_bulk == "static": if self.hybrid_functional: - self.bulk_maker = HybridStaticFlowMaker( + self.bulk_maker = DefectHybridStaticFlowMaker( + name='bulk hybrid static', + initialize_with_pbe=self.initialize_with_pbe, hybrid_functional=self.hybrid_functional, - hybrid_maker=HybridStaticMaker( - input_set_generator=DefectHybridStaticSetGenerator(), - task_document_kwargs={"average_v_hartree": True, "store_volumetric_data": ("v_hartree",)} - ) ) else: - self.bulk_maker = StaticMaker( - input_set_generator=DefectStaticSetGenerator(), - task_document_kwargs={"average_v_hartree": True, "store_volumetric_data": ("v_hartree",)} - ) + self.bulk_maker = DefectStaticMaker(name="bulk static") if self.hybrid_functional: - self.def_maker = HybridRelaxFlowMaker( + self.def_maker = DefectHybridRelaxFlowMaker( hybrid_functional=self.hybrid_functional, initialize_with_pbe=self.initialize_with_pbe, - initialize_maker=DefectStaticMaker(), - hybrid_maker=DefectHybridRelaxMaker(), ) else: self.def_maker = DefectRelaxMaker() @@ -128,7 +133,7 @@ def __post_init__(self): def make( self, defects: Iterable[Defect], - run_all_charges: bool = False, + charges: bool | Iterable[int] = False, dielectric: NDArray | int | float | None = None, prev_cp2k_dir: str | Path | None = None): """Make a flow to run multiple defects in order to calculate their formation @@ -160,7 +165,10 @@ def make( jobs.append(bulk_job) for defect in defects: - chgs = defect.get_charge_states() if run_all_charges else [0] + if charges == True: + chgs = defect.get_charge_states() if charges else [0] + else: + chgs = charges if charges else [0] for charge in chgs: defect_job = self.def_maker.make(deepcopy(defect), charge) jobs.append(defect_job) diff --git a/src/atomate2/cp2k/jobs/defect.py b/src/atomate2/cp2k/jobs/defect.py index 7a31173310..1d5db8f432 100644 --- a/src/atomate2/cp2k/jobs/defect.py +++ b/src/atomate2/cp2k/jobs/defect.py @@ -53,6 +53,8 @@ def make(self, defect: Defect | Structure, charge: int = 0, prev_cp2k_dir: str | min_length=self.min_length, force_diagonal=self.force_diagonal, ) + + # provenance stuff self.write_additional_data.update( { "info.json": { @@ -64,15 +66,6 @@ def make(self, defect: Defect | Structure, charge: int = 0, prev_cp2k_dir: str | else: structure = deepcopy(defect) structure.set_charge(charge) - # provenance stuff - self.write_additional_data.update( - { - "info.json": { - "defect": deepcopy(defect), - "defect_charge": charge, - "sc_mat": self.supercell_matrix} - } - ) return super().make.original(self, structure=structure, prev_cp2k_dir=prev_cp2k_dir) @dataclass diff --git a/src/atomate2/cp2k/schemas/defect.py b/src/atomate2/cp2k/schemas/defect.py index 08917df314..736e592f64 100644 --- a/src/atomate2/cp2k/schemas/defect.py +++ b/src/atomate2/cp2k/schemas/defect.py @@ -17,7 +17,7 @@ from atomate2 import SETTINGS from atomate2.common.schemas.structure import StructureMetadata -from atomate2.cp2k.schemas.calc_types.utils import run_type, task_type +from atomate2.cp2k.schemas.calc_types.utils import run_type, task_type, calc_type from atomate2.cp2k.schemas.calc_types.enums import CalcType, TaskType, RunType from atomate2.cp2k.schemas.task import TaskDocument @@ -38,21 +38,21 @@ class Config: name: str = Field(None, description="Name of this defect as generated by the defect object") - material_id: int = Field(None, description="Unique material ID for the bulk material") #TODO Change to MPID + material_id: str = Field(None, description="Unique material ID for the bulk material") #TODO Change to MPID - task_ids: List[int] = Field( + task_ids: List[str] = Field( None, description="All task ids used in creating this defect doc." ) - calc_types: Mapping[int, CalcType] = Field( # type: ignore + calc_types: Mapping[str, CalcType] = Field( # type: ignore None, description="Calculation types for all the calculations that make up this material", ) - task_types: Mapping[int, TaskType] = Field( + task_types: Mapping[str, TaskType] = Field( None, description="Task types for all the calculations that make up this material", ) - run_types: Mapping[int, RunType] = Field( + run_types: Mapping[str, RunType] = Field( None, description="Run types for all the calculations that make up this material", ) @@ -125,12 +125,12 @@ def _compare(new, old): self.entries[rt] = entry self.tasks[rt] = (defect_task_doc, bulk_task_doc) - def update_all(self, tasks, query='defect'): - for defect_task, bulk_task, dielectric in tasks: + def update_all(self, defect_tasks: List, bulk_tasks: List, dielectrics: List, query='defect'): + for defect_task, bulk_task, dielectric in zip(defect_tasks, bulk_tasks, dielectrics): self.update(defect_task=defect_task, bulk_task=bulk_task, dielectric=dielectric, query=query) @classmethod - def from_tasks(cls, tasks: List, query='defect', material_id=None): + def from_tasks(cls, defect_tasks: List, bulk_tasks: List, dielectrics: List, query='defect', key="task_id", material_id=None): """ The standard way to create this document. Args: @@ -138,43 +138,45 @@ def from_tasks(cls, tasks: List, query='defect', material_id=None): series of DefectEntry objects. query: How to retrieve the defect object stored in the task. """ - task_group = [TaskDocument(**defect_task) for defect_task, bulk_task, dielectric in tasks] - + task_ids = [defect_task[key] for defect_task in defect_tasks] + bulk_tasks= [TaskDocument(**bulk_task['output']) for bulk_task in bulk_tasks] + defects = [cls.get_defect_from_task(query=query, task=defect_task) for defect_task in defect_tasks] + defect_tasks = [TaskDocument(**defect_task['output']) for defect_task in defect_tasks] + # Metadata - last_updated = datetime.now() or max(task.last_updated for task in task_group) - created_at = datetime.now() or min(task.completed_at for task in task_group) - task_ids = {task.task_id for task in task_group} + last_updated = datetime.now() or max(task.last_updated for task in defect_tasks) + created_at = datetime.now() or min(task.completed_at for task in defect_tasks) - deprecated_tasks = list( - {task.task_id for task in task_group if not task.is_valid} - ) + #deprecated_tasks = list( + # {task.task_id for task in task_group if not task.is_valid} + #) - run_types = {task.task_id: task.run_type for task in task_group} - task_types = {task.task_id: task.task_type for task in task_group} - calc_types = {task.task_id: task.calc_type for task in task_group} + run_types = {id: task.calcs_reversed[0].run_type for id, task in zip(task_ids, defect_tasks)} + task_types = {id: task.calcs_reversed[0].task_type for id, task in zip(task_ids, defect_tasks)} + calc_types = {id: task.calcs_reversed[0].calc_type for id, task in zip(task_ids, defect_tasks)} def _run_type(x): - return run_type(x[0]['input']['dft']).value - - def _task_type(x): - return task_type(x[0]['input']['dft']).value + return x[0].calcs_reversed[0].run_type.value def _sort(x): # TODO return kpoint density, currently just does supercell size - return -x[0]['nsites'], x[0]['output']['energy'] + return -x[0].nsites, x[0].output.energy entries = {} final_tasks = {} metadata = {} - for key, tasks_for_runtype in groupby(sorted(tasks, key=_run_type), key=_run_type): + for key, tasks_for_runtype in groupby(sorted(zip(defect_tasks, bulk_tasks, defects, dielectrics), key=_run_type), key=_run_type): sorted_tasks = sorted(tasks_for_runtype, key=_sort) - ents = [cls.get_defect_entry_from_tasks(t[0], t[1], t[2], query) for t in sorted_tasks] + ents = [ + cls.get_defect_entry_from_tasks(defect_task, bulk_task, defect, dielectric) + for defect_task, bulk_task, defect, dielectric in sorted_tasks + ] best_entry = ents[0] - best_defect_task, best_bulk_task, dielectric = sorted_tasks[0] - metadata[key] = {'convergence': [(sorted_tasks[i][0]['nsites'], ents[i].energy) for i in range(len(ents))]} - best_defect_task, best_bulk_task = TaskDocument(**best_defect_task), TaskDocument(**best_bulk_task) - entries[best_defect_task.run_type] = best_entry - final_tasks[best_defect_task.run_type] = (best_defect_task, best_bulk_task) + best_defect_task = sorted_tasks[0][0] + best_bulk_task = sorted_tasks[0][1] + metadata[key] = {'convergence': [(sorted_tasks[i][0].nsites, ents[i].corrected_energy) for i in range(len(ents))]} + entries[best_defect_task.calcs_reversed[0].run_type] = best_entry + final_tasks[best_defect_task.calcs_reversed[0].run_type] = (best_defect_task, best_bulk_task) data = { 'entries': entries, @@ -184,20 +186,18 @@ def _sort(x): 'last_updated': last_updated, 'created_at': created_at, 'task_ids': task_ids, - 'deprecated_tasks': deprecated_tasks, + #'deprecated_tasks': deprecated_tasks, 'tasks': final_tasks, 'material_id': material_id if material_id else best_entry.parameters['material_id'], - 'entry_ids': {rt: entries[rt].entry_id for rt in entries}, 'defect': best_entry.defect, - 'name': best_entry.defect.name, 'metadata': metadata, } - prim = SpacegroupAnalyzer(best_entry.defect.bulk_structure).get_primitive_standard_structure() + prim = SpacegroupAnalyzer(best_entry.defect.structure).get_primitive_standard_structure() data.update(StructureMetadata.from_structure(prim).dict()) return cls(**data) @classmethod - def get_defect_entry_from_tasks(cls, defect_task, bulk_task, dielectric=None, query='transformations.history.0.defect'): + def get_defect_entry_from_tasks(cls, defect_task: TaskDocument, bulk_task: TaskDocument, defect: Defect, dielectric=None): """ Extract a defect entry from a single pair (defect and bulk) of tasks. @@ -220,14 +220,14 @@ def get_defect_entry_from_tasks(cls, defect_task, bulk_task, dielectric=None, qu ) defect_entry = DefectEntry( - defect=cls.get_defect_from_task(query=query, task=defect_task), + defect=defect, charge_state=parameters['charge_state'], sc_entry=sc_entry, sc_defect_frac_coords=parameters['defect_frac_sc_coords'], corrections=corrections, ) - return defect_entry.as_dict() + return defect_entry @classmethod def get_correction_from_parameters(cls, parameters) -> Tuple[Dict, Dict]: @@ -240,7 +240,7 @@ def get_correction_from_parameters(cls, parameters) -> Tuple[Dict, Dict]: return corrections, metadata @classmethod - def get_freysold_correction(cls, parameters) -> Tuple[Dict, Dict]: + def get_freysoldt_correction(cls, parameters) -> Tuple[Dict, Dict]: if parameters['charge_state'] and not parameters.get("2d"): return get_freysoldt_correction( q=parameters['charge_state'], dielectric=parameters['dielectric'], @@ -279,11 +279,10 @@ def get_defect_from_task(cls, query, task): Unpack a Mongo-style query and retrieve a defect object from a task. """ defect = unpack(query.split('.'), task) - needed_keys = ['@module', '@class', 'structure', 'defect_site', 'charge', 'site_name'] - return MontyDecoder().process_decoded({k: v for k, v in defect.items() if k in needed_keys}) + return MontyDecoder().process_decoded(defect) @classmethod - def get_parameters_from_tasks(cls, defect_task, bulk_task): + def get_parameters_from_tasks(cls, defect_task: TaskDocument, bulk_task: TaskDocument): """ Get parameters necessary to create a defect entry from defect and bulk task dicts Args: @@ -291,9 +290,6 @@ def get_parameters_from_tasks(cls, defect_task, bulk_task): bulk_task: task dict for the bulk calculation """ - defect_task = TaskDocument(**defect_task) - bulk_task = TaskDocument(**bulk_task) - final_defect_structure = defect_task.structure final_bulk_structure = bulk_task.structure @@ -304,11 +300,9 @@ def get_parameters_from_tasks(cls, defect_task, bulk_task): defect_frac_sc_coords = DefectSiteFinder(SETTINGS.SYMPREC).get_defect_fpos(defect_structure=final_defect_structure, base_structure=final_bulk_structure) parameters = { - 'defect_energy': defect_task['output']['energy'], - 'bulk_energy': bulk_task['output']['energy'], + 'defect_energy': defect_task.output.energy, + 'bulk_energy': bulk_task.output.energy, 'final_defect_structure': final_defect_structure, - 'vbm': bulk_task['output']['vbm'], - 'cbm': bulk_task['output']['cbm'], 'charge_state': defect_task.output.structure.charge, 'defect_frac_sc_coords': defect_frac_sc_coords, 'defect_v_hartree': defect_task.cp2k_objects['v_hartree'], # TODO CP2K spec name From 132a6d4b81c287dd245ec7af4d62ffcaf86f13ec Mon Sep 17 00:00:00 2001 From: nwinner Date: Mon, 31 Oct 2022 11:05:29 -0700 Subject: [PATCH 05/50] Don't perturb when bulk --- src/atomate2/cp2k/flows/defect.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/atomate2/cp2k/flows/defect.py b/src/atomate2/cp2k/flows/defect.py index d23c914822..069d80df42 100644 --- a/src/atomate2/cp2k/flows/defect.py +++ b/src/atomate2/cp2k/flows/defect.py @@ -99,12 +99,12 @@ def __post_init__(self): if self.run_bulk == 'relax': if self.hybrid_functional: self.bulk_maker = DefectHybridCellOptMaker( - name="bulk hybrid relax", + name="bulk hybrid relax", transformations=None, initialize_with_pbe=self.initialize_with_pbe, hybrid_functional=self.hybrid_functional ) else: - self.bulk_maker = DefectCellOptMaker(name="bulk relax") + self.bulk_maker = DefectCellOptMaker(name="bulk relax", transformations=None) elif self.run_bulk == "static": if self.hybrid_functional: From a3feadd72d5594c40b0177dfd24d77ef09d71a30 Mon Sep 17 00:00:00 2001 From: nwinner Date: Mon, 31 Oct 2022 15:01:47 -0700 Subject: [PATCH 06/50] More robust for some reason --- src/atomate2/cp2k/flows/defect.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/atomate2/cp2k/flows/defect.py b/src/atomate2/cp2k/flows/defect.py index 069d80df42..20a1a265bd 100644 --- a/src/atomate2/cp2k/flows/defect.py +++ b/src/atomate2/cp2k/flows/defect.py @@ -161,7 +161,9 @@ def make( self.force_diagonal,) if self.run_bulk: - bulk_job = self.bulk_maker.make(bulk_structure * sc_mat, prev_cp2k_dir=prev_cp2k_dir) + s = bulk_structure.copy() + s.make_supercell(sc_mat) + bulk_job = self.bulk_maker.make(s, prev_cp2k_dir=prev_cp2k_dir) jobs.append(bulk_job) for defect in defects: From ab4cc6a2993d1d9bfb3b7b9c1e6bf27ea19284e0 Mon Sep 17 00:00:00 2001 From: nwinner Date: Mon, 31 Oct 2022 16:57:30 -0700 Subject: [PATCH 07/50] Expand parents I cannot for the life of me figure out why using the combined parent breaks everything, but I guess the multiple_input_updator can only go one level deep for inheriting --- src/atomate2/cp2k/jobs/defect.py | 3 ++- src/atomate2/cp2k/sets/defect.py | 10 +++++----- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/src/atomate2/cp2k/jobs/defect.py b/src/atomate2/cp2k/jobs/defect.py index 1d5db8f432..85f2ca5d6e 100644 --- a/src/atomate2/cp2k/jobs/defect.py +++ b/src/atomate2/cp2k/jobs/defect.py @@ -60,8 +60,9 @@ def make(self, defect: Defect | Structure, charge: int = 0, prev_cp2k_dir: str | "info.json": { "defect": deepcopy(defect), "defect_charge": charge, - "sc_mat": self.supercell_matrix} + "sc_mat": self.supercell_matrix } + } ) else: structure = deepcopy(defect) diff --git a/src/atomate2/cp2k/sets/defect.py b/src/atomate2/cp2k/sets/defect.py index df3a2cbe4c..2e454af5dc 100644 --- a/src/atomate2/cp2k/sets/defect.py +++ b/src/atomate2/cp2k/sets/defect.py @@ -9,7 +9,7 @@ from atomate2.cp2k.sets.base import Cp2kInputGenerator, multiple_input_updators from atomate2.cp2k.sets.core import ( - StaticSetGenerator, RelaxSetGenerator, CellOptSetGenerator, + HybridSetGenerator, StaticSetGenerator, RelaxSetGenerator, CellOptSetGenerator, HybridStaticSetGenerator, HybridRelaxSetGenerator, HybridCellOptSetGenerator ) logger = logging.getLogger(__name__) @@ -41,15 +41,15 @@ class DefectCellOptSetGenerator(DefectSetGenerator, CellOptSetGenerator): @dataclass @multiple_input_updators() -class DefectHybridStaticSetGenerator(DefectSetGenerator, HybridStaticSetGenerator): +class DefectHybridStaticSetGenerator(DefectSetGenerator, StaticSetGenerator, HybridSetGenerator): pass @dataclass @multiple_input_updators() -class DefectHybridRelaxSetGenerator(DefectSetGenerator, HybridRelaxSetGenerator): - pass +class DefectHybridRelaxSetGenerator(DefectSetGenerator, RelaxSetGenerator, HybridSetGenerator): + pass @dataclass @multiple_input_updators() -class DefectHybridCellOptSetGenerator(DefectSetGenerator, HybridCellOptSetGenerator): +class DefectHybridCellOptSetGenerator(DefectSetGenerator, CellOptSetGenerator, HybridSetGenerator): pass \ No newline at end of file From 99e3a55c3f16ff74f136d515743f08c01c2b84d2 Mon Sep 17 00:00:00 2001 From: nwinner Date: Mon, 31 Oct 2022 16:58:08 -0700 Subject: [PATCH 08/50] Ugly but functional I'll look for a way to clean this up later --- src/atomate2/cp2k/flows/defect.py | 28 ++++++++++++++++++++-------- 1 file changed, 20 insertions(+), 8 deletions(-) diff --git a/src/atomate2/cp2k/flows/defect.py b/src/atomate2/cp2k/flows/defect.py index 20a1a265bd..a404701bd5 100644 --- a/src/atomate2/cp2k/flows/defect.py +++ b/src/atomate2/cp2k/flows/defect.py @@ -121,15 +121,27 @@ def __post_init__(self): hybrid_functional=self.hybrid_functional, initialize_with_pbe=self.initialize_with_pbe, ) - else: - self.def_maker = DefectRelaxMaker() + self.def_maker.initialize_maker.supercell_matrix = self.supercell_matrix + self.def_maker.hybrid_maker.supercell_matrix = self.supercell_matrix + + self.def_maker.initialize_maker.max_atoms = self.max_atoms + self.def_maker.hybrid_maker.max_atoms = self.max_atoms + self.def_maker.initialize_maker.min_atoms = self.min_atoms + self.def_maker.hybrid_maker.min_atoms = self.min_atoms - self.def_maker.supercell_matrix = self.supercell_matrix - self.def_maker.max_atoms = self.max_atoms - self.def_maker.min_atoms = self.min_atoms - self.def_maker.min_length = self.min_length - self.def_maker.force_diagonal = self.force_diagonal + self.def_maker.initialize_maker.min_length = self.min_length + self.def_maker.hybrid_maker.min_length = self.min_length + + self.def_maker.initialize_maker.force_diagonal = self.force_diagonal + self.def_maker.hybrid_maker.force_diagonal = self.force_diagonal + else: + self.def_maker = DefectRelaxMaker() + self.def_maker.supercell_matrix = self.supercell_matrix + self.def_maker.max_atoms = self.max_atoms + self.def_maker.min_atoms = self.min_atoms + self.def_maker.min_length = self.min_length + self.def_maker.force_diagonal = self.force_diagonal def make( self, defects: Iterable[Defect], @@ -163,7 +175,7 @@ def make( if self.run_bulk: s = bulk_structure.copy() s.make_supercell(sc_mat) - bulk_job = self.bulk_maker.make(s, prev_cp2k_dir=prev_cp2k_dir) + bulk_job = self.bulk_maker.make(bulk_structure * sc_mat, prev_cp2k_dir=prev_cp2k_dir) jobs.append(bulk_job) for defect in defects: From 8daf86d9ffc3414b9926fbdc5ff7f97a291f3b07 Mon Sep 17 00:00:00 2001 From: nwinner Date: Thu, 3 Nov 2022 12:55:40 -0700 Subject: [PATCH 09/50] Updates for cluster testing --- src/atomate2/cp2k/builders/defect.py | 3 +- src/atomate2/cp2k/flows/defect.py | 2 +- src/atomate2/cp2k/jobs/defect.py | 12 +++---- src/atomate2/cp2k/schemas/defect.py | 51 ++++++++++++++++------------ 4 files changed, 39 insertions(+), 29 deletions(-) diff --git a/src/atomate2/cp2k/builders/defect.py b/src/atomate2/cp2k/builders/defect.py index 354502650d..2c49f1ee74 100644 --- a/src/atomate2/cp2k/builders/defect.py +++ b/src/atomate2/cp2k/builders/defect.py @@ -127,7 +127,8 @@ def __init__( 'output.output.structure', 'output.input', 'output.nsites', - 'output.cp2k_objects.v_hartree' + 'output.cp2k_objects.v_hartree', + 'output.additional_json.info.sc_mat' # TODO figure out how to remove this requirement ] self._required_bulk_properties = [ diff --git a/src/atomate2/cp2k/flows/defect.py b/src/atomate2/cp2k/flows/defect.py index a404701bd5..c780860b81 100644 --- a/src/atomate2/cp2k/flows/defect.py +++ b/src/atomate2/cp2k/flows/defect.py @@ -233,7 +233,7 @@ def collect_defect_outputs( defect_entry = DefectEntry( defect=defect, charge_state=charge, - sc_entry=ComputedStructureEntry(structure=bulk_output.structure, energy=bulk_output.output.energy) + sc_entry=ComputedStructureEntry(structure=bulk_output.structure, energy=output_with_charge.output.energy - bulk_output.output.energy) ) defect_entries.append(defect_entry) plot_data = defect_entry.get_freysoldt_correction( diff --git a/src/atomate2/cp2k/jobs/defect.py b/src/atomate2/cp2k/jobs/defect.py index 85f2ca5d6e..c9f21889cd 100644 --- a/src/atomate2/cp2k/jobs/defect.py +++ b/src/atomate2/cp2k/jobs/defect.py @@ -72,7 +72,7 @@ def make(self, defect: Defect | Structure, charge: int = 0, prev_cp2k_dir: str | @dataclass class DefectStaticMaker(BaseDefectMaker): - name: str = "defect static" + name: str = field(default="defect static") input_set_generator: DefectSetGenerator = field( default_factory=DefectStaticSetGenerator ) @@ -86,7 +86,7 @@ class DefectRelaxMaker(BaseDefectMaker): the hartree potential for finite size corrections. """ - name: str = "defect relax" + name: str = field(default="defect relax") input_set_generator: Cp2kInputGenerator = field(default_factory=DefectRelaxSetGenerator) transformations: tuple[str, ...] = field(default=("PerturbStructureTransformation",)) transformation_params: tuple[dict, ...] | None = field(default=({"distance": 0.01},)) @@ -100,7 +100,7 @@ class DefectCellOptMaker(BaseDefectMaker): the hartree potential for finite size corrections. """ - name: str = "defect relax" + name: str = field(default="defect relax") input_set_generator: Cp2kInputGenerator = field(default_factory=DefectCellOptSetGenerator) transformations: tuple[str, ...] = field(default=("PerturbStructureTransformation",)) transformation_params: tuple[dict, ...] | None = field(default=({"distance": 0.01},)) @@ -108,19 +108,19 @@ class DefectCellOptMaker(BaseDefectMaker): @dataclass class DefectHybridStaticMaker(DefectStaticMaker, HybridStaticMaker): - name: str = "defect hybrid static" + name: str = field(default="defect hybrid static") input_set_generator: DefectSetGenerator = field(default_factory=DefectHybridStaticSetGenerator) @dataclass class DefectHybridRelaxMaker(DefectRelaxMaker, HybridRelaxMaker): - name: str = "defect hybrid relax" + name: str = field(default="defect hybrid relax") input_set_generator: DefectSetGenerator = field(default_factory=DefectHybridRelaxSetGenerator) @dataclass class DefectHybridCellOptMaker(DefectCellOptMaker, HybridCellOptMaker): - name: str = "defect hybrid cell opt" + name: str = field(default="defect hybrid cell opt") input_set_generator: DefectSetGenerator = field(default_factory=DefectHybridCellOptSetGenerator) class GhostVacancy(Vacancy): diff --git a/src/atomate2/cp2k/schemas/defect.py b/src/atomate2/cp2k/schemas/defect.py index 736e592f64..0d04ab0c87 100644 --- a/src/atomate2/cp2k/schemas/defect.py +++ b/src/atomate2/cp2k/schemas/defect.py @@ -1,6 +1,6 @@ from datetime import datetime from tokenize import group -from typing import ClassVar, Dict, Tuple, Mapping, List +from typing import ClassVar, TypeVar, Type, Dict, Tuple, Mapping, List from pydantic import BaseModel, Field from pydantic import validator from itertools import groupby @@ -21,6 +21,10 @@ from atomate2.cp2k.schemas.calc_types.enums import CalcType, TaskType, RunType from atomate2.cp2k.schemas.task import TaskDocument +__all__ = ["DefectDoc"] + +T = TypeVar("T", bound="DefectDoc") + class DefectDoc(StructureMetadata): """ A document used to represent a single defect. e.g. a O vacancy with a -2 charge. @@ -29,9 +33,6 @@ class DefectDoc(StructureMetadata): calculation of each run_type. """ - class Config: - arbitrary_types_allowed = True - property_name: ClassVar[str] = "defect" defect: Defect = Field(None, description="Pymatgen defect object for this defect doc") @@ -40,8 +41,9 @@ class Config: material_id: str = Field(None, description="Unique material ID for the bulk material") #TODO Change to MPID + # TODO Should it be all (defect + bulk) ids? task_ids: List[str] = Field( - None, description="All task ids used in creating this defect doc." + None, description="All defect task ids used in creating this defect doc." ) calc_types: Mapping[str, CalcType] = Field( # type: ignore @@ -57,8 +59,12 @@ class Config: description="Run types for all the calculations that make up this material", ) - tasks: Mapping[RunType, Tuple[TaskDocument, TaskDocument]] = Field( - None, description="Task documents (defect task, bulk task) for the defect entry of RunType" + best_tasks: Mapping[RunType, Tuple[str, str]] = Field( + None, description="Task ids (defect task, bulk task) for all tasks of a RunType" + ) + + all_tasks: Mapping[RunType, List[Tuple[str, str]]] = Field( + None, description="Task ids (defect task, bulk task) for all tasks of a RunType" ) entries: Mapping[RunType, DefectEntry] = Field( @@ -130,7 +136,7 @@ def update_all(self, defect_tasks: List, bulk_tasks: List, dielectrics: List, qu self.update(defect_task=defect_task, bulk_task=bulk_task, dielectric=dielectric, query=query) @classmethod - def from_tasks(cls, defect_tasks: List, bulk_tasks: List, dielectrics: List, query='defect', key="task_id", material_id=None): + def from_tasks(cls: Type[T], defect_tasks: List, bulk_tasks: List, dielectrics: List, query='defect', key="task_id", material_id=None): """ The standard way to create this document. Args: @@ -138,7 +144,8 @@ def from_tasks(cls, defect_tasks: List, bulk_tasks: List, dielectrics: List, que series of DefectEntry objects. query: How to retrieve the defect object stored in the task. """ - task_ids = [defect_task[key] for defect_task in defect_tasks] + defect_task_ids = [defect_task[key] for defect_task in defect_tasks] + bulk_task_ids = [bulk_task[key] for bulk_task in bulk_tasks] bulk_tasks= [TaskDocument(**bulk_task['output']) for bulk_task in bulk_tasks] defects = [cls.get_defect_from_task(query=query, task=defect_task) for defect_task in defect_tasks] defect_tasks = [TaskDocument(**defect_task['output']) for defect_task in defect_tasks] @@ -151,9 +158,9 @@ def from_tasks(cls, defect_tasks: List, bulk_tasks: List, dielectrics: List, que # {task.task_id for task in task_group if not task.is_valid} #) - run_types = {id: task.calcs_reversed[0].run_type for id, task in zip(task_ids, defect_tasks)} - task_types = {id: task.calcs_reversed[0].task_type for id, task in zip(task_ids, defect_tasks)} - calc_types = {id: task.calcs_reversed[0].calc_type for id, task in zip(task_ids, defect_tasks)} + run_types = {id: task.calcs_reversed[0].run_type for id, task in zip(defect_task_ids, defect_tasks)} + task_types = {id: task.calcs_reversed[0].task_type for id, task in zip(defect_task_ids, defect_tasks)} + calc_types = {id: task.calcs_reversed[0].calc_type for id, task in zip(defect_task_ids, defect_tasks)} def _run_type(x): return x[0].calcs_reversed[0].run_type.value @@ -163,20 +170,21 @@ def _sort(x): return -x[0].nsites, x[0].output.energy entries = {} - final_tasks = {} + all_tasks = {} + best_tasks = {} metadata = {} - for key, tasks_for_runtype in groupby(sorted(zip(defect_tasks, bulk_tasks, defects, dielectrics), key=_run_type), key=_run_type): + for key, tasks_for_runtype in groupby(sorted(zip(defect_tasks, bulk_tasks, defects, dielectrics, defect_task_ids, bulk_task_ids), key=_run_type), key=_run_type): sorted_tasks = sorted(tasks_for_runtype, key=_sort) ents = [ cls.get_defect_entry_from_tasks(defect_task, bulk_task, defect, dielectric) - for defect_task, bulk_task, defect, dielectric in sorted_tasks + for defect_task, bulk_task, defect, dielectric, did, bid in sorted_tasks ] + rt = run_types[sorted_tasks[0][-2]] best_entry = ents[0] - best_defect_task = sorted_tasks[0][0] - best_bulk_task = sorted_tasks[0][1] + best_tasks[rt] = (sorted_tasks[0][-2], sorted_tasks[0][-1]) + all_tasks[rt] = [ (s[-2], s[-1]) for s in sorted_tasks ] metadata[key] = {'convergence': [(sorted_tasks[i][0].nsites, ents[i].corrected_energy) for i in range(len(ents))]} - entries[best_defect_task.calcs_reversed[0].run_type] = best_entry - final_tasks[best_defect_task.calcs_reversed[0].run_type] = (best_defect_task, best_bulk_task) + entries[rt] = ents[0] data = { 'entries': entries, @@ -185,9 +193,10 @@ def _sort(x): 'calc_types': calc_types, 'last_updated': last_updated, 'created_at': created_at, - 'task_ids': task_ids, + 'task_ids': defect_task_ids, #'deprecated_tasks': deprecated_tasks, - 'tasks': final_tasks, + 'all_tasks': all_tasks, + 'best_tasks': best_tasks, 'material_id': material_id if material_id else best_entry.parameters['material_id'], 'defect': best_entry.defect, 'metadata': metadata, From 804f4bbf301dca36f2b7670a7c665e66ab1488d3 Mon Sep 17 00:00:00 2001 From: nwinner Date: Tue, 8 Nov 2022 11:05:41 -0800 Subject: [PATCH 10/50] copy info --- src/atomate2/cp2k/flows/defect.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/src/atomate2/cp2k/flows/defect.py b/src/atomate2/cp2k/flows/defect.py index c780860b81..5098d16dfa 100644 --- a/src/atomate2/cp2k/flows/defect.py +++ b/src/atomate2/cp2k/flows/defect.py @@ -44,19 +44,25 @@ class DefectHybridStaticFlowMaker(HybridStaticFlowMaker): initialize_maker: BaseCp2kMaker = field(default_factory=DefectStaticMaker) - hybrid_maker: BaseCp2kMaker = field(default_factory=DefectHybridStaticMaker) + hybrid_maker: BaseCp2kMaker = field(default=DefectHybridStaticMaker( + copy_cp2k_kwargs={'additional_cp2k_files': ("info.json",)}) + ) @dataclass class DefectHybridRelaxFlowMaker(HybridRelaxFlowMaker): initialize_maker: BaseCp2kMaker = field(default_factory=DefectStaticMaker) - hybrid_maker: BaseCp2kMaker = field(default_factory=DefectHybridRelaxMaker) + hybrid_maker: BaseCp2kMaker = field(default=DefectHybridRelaxMaker( + copy_cp2k_kwargs={'additional_cp2k_files': ("info.json",)}) + ) @dataclass class DefectHybridCellOptFlowMaker(HybridCellOptFlowMaker): initialize_maker: BaseCp2kMaker = field(default_factory=DefectStaticMaker) - hybrid_maker: BaseCp2kMaker = field(default_factory=DefectHybridCellOptMaker) + hybrid_maker: BaseCp2kMaker = field(default=DefectHybridCellOptMaker( + copy_cp2k_kwargs={'additional_cp2k_files': ("info.json",)}) + ) # TODO close to being able to put this in common. Just need a switch that decides which core flow/job to use based on software @dataclass From 41a2504c113937fe7f0e474b16fa8380b6581011 Mon Sep 17 00:00:00 2001 From: nwinner Date: Sat, 26 Nov 2022 17:30:32 -0800 Subject: [PATCH 11/50] Defects --- src/atomate2/cp2k/builders/defect.py | 4 ++-- src/atomate2/cp2k/jobs/defect.py | 6 +++--- src/atomate2/cp2k/schemas/defect.py | 20 +++++++++++--------- src/atomate2/cp2k/schemas/task.py | 4 ++++ 4 files changed, 20 insertions(+), 14 deletions(-) diff --git a/src/atomate2/cp2k/builders/defect.py b/src/atomate2/cp2k/builders/defect.py index 2c49f1ee74..b6047e3b3a 100644 --- a/src/atomate2/cp2k/builders/defect.py +++ b/src/atomate2/cp2k/builders/defect.py @@ -543,8 +543,8 @@ def __get_item_bundle(self, task_ids): """ return [ ( - self.tasks.query_one(criteria={self.tasks.key: defect_tasks_id}), - self.tasks.query_one(criteria={self.tasks.key: bulk_tasks_id}), + self.tasks.query_one(criteria={self.tasks.key: defect_tasks_id}, load=True), + self.tasks.query_one(criteria={self.tasks.key: bulk_tasks_id}, load=True), # load all for now self.__get_dielectric(self._mpid_map[bulk_tasks_id]), ) for defect_tasks_id, bulk_tasks_id in task_ids diff --git a/src/atomate2/cp2k/jobs/defect.py b/src/atomate2/cp2k/jobs/defect.py index c9f21889cd..e1b4515b74 100644 --- a/src/atomate2/cp2k/jobs/defect.py +++ b/src/atomate2/cp2k/jobs/defect.py @@ -11,7 +11,7 @@ from pymatgen.core import Structure from pymatgen.analysis.defects.core import Defect, Vacancy -from atomate2.cp2k.sets.base import Cp2kInputGenerator +from atomate2.cp2k.sets.base import Cp2kInputGenerator, recursive_update from atomate2.cp2k.sets.defect import ( DefectSetGenerator, DefectStaticSetGenerator, DefectRelaxSetGenerator, DefectCellOptSetGenerator, DefectHybridStaticSetGenerator, DefectHybridRelaxSetGenerator, DefectHybridCellOptSetGenerator @@ -55,8 +55,7 @@ def make(self, defect: Defect | Structure, charge: int = 0, prev_cp2k_dir: str | ) # provenance stuff - self.write_additional_data.update( - { + recursive_update(self.write_additional_data, { "info.json": { "defect": deepcopy(defect), "defect_charge": charge, @@ -64,6 +63,7 @@ def make(self, defect: Defect | Structure, charge: int = 0, prev_cp2k_dir: str | } } ) + else: structure = deepcopy(defect) structure.set_charge(charge) diff --git a/src/atomate2/cp2k/schemas/defect.py b/src/atomate2/cp2k/schemas/defect.py index 0d04ab0c87..ece81f2959 100644 --- a/src/atomate2/cp2k/schemas/defect.py +++ b/src/atomate2/cp2k/schemas/defect.py @@ -270,15 +270,15 @@ def get_freysoldt2d_correction(cls, parameters): dielectric = (eps_parallel - 1) / (1 - 1/eps_perp) with ScratchDir('.'): - lref = VaspVolumetricData(structure=parameters['bulk_locpot'].structure, data=parameters['bulk_locpot'].data) - ldef = VaspVolumetricData(structure=parameters['defect_locpot'].structure, data=parameters['defect_locpot'].data) + lref = VaspVolumetricData(structure=parameters['bulk_v_hartree'].structure, data=parameters['bulk_v_hartree'].data) + ldef = VaspVolumetricData(structure=parameters['defect_v_hartree'].structure, data=parameters['defect_v_hartree'].data) lref.write_file("LOCPOT.ref") ldef.write_file("LOCPOT.def") return get_freysoldt2d_correction( - q=parameters['charge_state'], dielectric=dielectric, defect_locpot="LOCPOT.def", - bulk_locpot="LOCPOT.ref", defect_frac_coords=parameters['defect_frac_sc_coords'], - energy_cutoff=250, slab_buffer=2 + q=parameters['charge_state'], dielectric=dielectric, defect_locpot=ldef, + bulk_locpot=lref, defect_frac_coords=parameters['defect_frac_sc_coords'], + energy_cutoff=520, slab_buffer=2 ) return {}, {} @@ -304,20 +304,22 @@ def get_parameters_from_tasks(cls, defect_task: TaskDocument, bulk_task: TaskDoc ghost = [index for index, prop in enumerate(final_defect_structure.site_properties.get("ghost")) if prop] if ghost: - defect_frac_sc_coords = final_defect_structure[ghost[0]] + defect_frac_sc_coords = final_defect_structure[ghost[0]].frac_coords else: defect_frac_sc_coords = DefectSiteFinder(SETTINGS.SYMPREC).get_defect_fpos(defect_structure=final_defect_structure, base_structure=final_bulk_structure) - parameters = { 'defect_energy': defect_task.output.energy, 'bulk_energy': bulk_task.output.energy, 'final_defect_structure': final_defect_structure, 'charge_state': defect_task.output.structure.charge, 'defect_frac_sc_coords': defect_frac_sc_coords, - 'defect_v_hartree': defect_task.cp2k_objects['v_hartree'], # TODO CP2K spec name - 'bulk_v_hartree': bulk_task.cp2k_objects['v_hartree'], # TODO CP2K spec name + 'defect_v_hartree': MontyDecoder().process_decoded(defect_task.cp2k_objects['v_hartree']), # TODO CP2K spec name + 'bulk_v_hartree': MontyDecoder().process_decoded(bulk_task.cp2k_objects['v_hartree']), # TODO CP2K spec name } + if defect_task.tags and "2d" in defect_task.tags: + parameters['2d'] = True + return parameters def unpack(query, d): diff --git a/src/atomate2/cp2k/schemas/task.py b/src/atomate2/cp2k/schemas/task.py index 72905ddd40..c1e3a088ed 100644 --- a/src/atomate2/cp2k/schemas/task.py +++ b/src/atomate2/cp2k/schemas/task.py @@ -336,6 +336,10 @@ def from_directory( analysis = AnalysisSummary.from_cp2k_calc_docs(calcs_reversed) transformations, icsd_id, tags, author = _parse_transformations(dir_name) + if tags: + tags.extend(additional_fields.get("tags", [])) + else: + tags = additional_fields.get('tags') custodian = _parse_custodian(dir_name) orig_inputs = _parse_orig_inputs(dir_name) From a9fcc37c23c48fadcb6cbb4ccd422a4a1ae116d6 Mon Sep 17 00:00:00 2001 From: nwinner Date: Mon, 28 Nov 2022 14:40:50 -0800 Subject: [PATCH 12/50] Charge If structure is passed, use charge from structure. --- src/atomate2/cp2k/jobs/defect.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/atomate2/cp2k/jobs/defect.py b/src/atomate2/cp2k/jobs/defect.py index e1b4515b74..a5c849cf87 100644 --- a/src/atomate2/cp2k/jobs/defect.py +++ b/src/atomate2/cp2k/jobs/defect.py @@ -65,7 +65,9 @@ def make(self, defect: Defect | Structure, charge: int = 0, prev_cp2k_dir: str | ) else: + charge = charge if charge else defect.charge structure = deepcopy(defect) + structure.set_charge(charge) return super().make.original(self, structure=structure, prev_cp2k_dir=prev_cp2k_dir) From a0725aa2eb742d1b3e02a6edde673c9b5fe33c20 Mon Sep 17 00:00:00 2001 From: nwinner Date: Mon, 28 Nov 2022 16:17:43 -0800 Subject: [PATCH 13/50] Def Builder --- src/atomate2/cp2k/builders/defect.py | 112 +++++++++++++++++++-------- 1 file changed, 79 insertions(+), 33 deletions(-) diff --git a/src/atomate2/cp2k/builders/defect.py b/src/atomate2/cp2k/builders/defect.py index b6047e3b3a..fd90e5b8ae 100644 --- a/src/atomate2/cp2k/builders/defect.py +++ b/src/atomate2/cp2k/builders/defect.py @@ -5,6 +5,7 @@ from typing import Dict, Iterator, List, Literal, Optional from copy import deepcopy from math import ceil +import numpy as np from monty.json import MontyDecoder, jsanitize from maggma.builders import Builder @@ -13,6 +14,7 @@ from pymatgen.core import Structure from pymatgen.analysis.structure_matcher import ElementComparator, StructureMatcher +from pymatgen.electronic_structure.dos import CompleteDos from pymatgen.symmetry.analyzer import SpacegroupAnalyzer from atomate.utils.utils import load_class @@ -24,7 +26,7 @@ from atomate2.settings import Atomate2Settings from atomate2.cp2k.schemas.task import TaskDocument -from atomate2.cp2k.schemas.defect import DefectDoc +from atomate2.cp2k.schemas.defect import DefectDoc, DefectiveMaterialDoc from atomate2.cp2k.schemas.calc_types import TaskType from atomate2.cp2k.schemas.calc_types.utils import run_type @@ -33,8 +35,6 @@ __author__ = "Nicholas Winner " -# TODO this builder is very close to being code agnostic. We need only resolve the standard key names and -# how they are fed to the DefectDoc class. e.g. VASP calcs store "locpot", but CP2K store "v_hartree" class DefectBuilder(Builder): """ The DefectBuilder collects task documents performed on structures containing a single point defect. @@ -60,7 +60,11 @@ class DefectBuilder(Builder): """ #TODO how to incorporate into settings? - DEFAULT_ALLOWED_TASKS = [ + DEFAULT_ALLOWED_DFCT_TASKS = [ + TaskType.Structure_Optimization.value, + ] + + DEFAULT_ALLOWED_BULK_TASKS = [ TaskType.Structure_Optimization.value, TaskType.Static.value ] @@ -76,7 +80,8 @@ def __init__( task_validation: Optional[Store] = None, query: Optional[Dict] = None, bulk_query: Optional[Dict] = None, - allowed_task_types: Optional[List[str]] = DEFAULT_ALLOWED_TASKS, + allowed_dfct_types: Optional[List[str]] = DEFAULT_ALLOWED_DFCT_TASKS, + allowed_bulk_types: Optional[List[str]] = DEFAULT_ALLOWED_BULK_TASKS, task_schema: Literal["cp2k"] = "cp2k", # TODO cp2k specific right now, but this will go in common eventually settings: Dict | None = None, **kwargs, @@ -103,9 +108,9 @@ def __init__( self.electronic_structure = electronic_structure self.electrostatic_potentials = electrostatic_potentials self.task_validation = task_validation - self.allowed_task_types = allowed_task_types #TODO How to incorporate into getitems? + self._allowed_dfct_types = allowed_dfct_types #TODO How to incorporate into getitems? + self._allowed_bulk_types = allowed_bulk_types #TODO How to incorporate into getitems? - self._allowed_task_types = {TaskType(t) for t in self.allowed_task_types} settings = settings if settings else {} self.settings = Atomate2Settings(**settings) # TODO don't think this is right self.query = query if query else {} @@ -128,7 +133,6 @@ def __init__( 'output.input', 'output.nsites', 'output.cp2k_objects.v_hartree', - 'output.additional_json.info.sc_mat' # TODO figure out how to remove this requirement ] self._required_bulk_properties = [ @@ -187,6 +191,14 @@ def optional_bulk_properties(self) -> List: def mpid_map(self) -> Dict: return self._mpid_map + @property + def allowed_dfct_types(self) -> set: + return {TaskType(t) for t in self._allowed_dfct_types} + + @property + def allowed_bulk_types(self) -> set: + return {TaskType(t) for t in self._allowed_bulk_types} + def ensure_indexes(self): """ Ensures indicies on the tasks and materials collections @@ -276,7 +288,10 @@ def get_items(self) -> Iterator[List[Dict]]: self.logger.info("Defect builder started") self.logger.info( - f"Allowed task types: {[task_type.value for task_type in self._allowed_task_types]}" + f"Allowed defect types: {[task_type.value for task_type in self.allowed_dfct_types]}" + ) + self.logger.info( + f"Allowed bulk types: {[task_type.value for task_type in self.allowed_bulk_types]}" ) self.logger.info("Setting indexes") @@ -287,7 +302,7 @@ def get_items(self) -> Iterator[List[Dict]]: self.logger.info("Finding tasks to process") - # Get defect tasks + ##### Get defect tasks ##### temp_query = self.query.copy() temp_query.update({d: {'$exists': True, "$ne": None} for d in self.required_defect_properties}) temp_query.update({self.defect_query: {'$exists': True}, "output.state": "successful"}) @@ -296,7 +311,17 @@ def get_items(self) -> Iterator[List[Dict]]: for doc in self.tasks.query(criteria=temp_query, properties=[self.tasks.key]) } - # Get bulk tasks + # TODO Seems slow + not_allowed = { + doc[self.tasks.key] + for doc in self.tasks.query(criteria={self.tasks.key: {"$in": list(defect_tasks)}}) + if TaskDocument(**doc['output']).calcs_reversed[0].task_type not in self.allowed_dfct_types + } + if not_allowed: + self.logger.debug(f"{len(not_allowed)} defect tasks dropped. Not allowed TaskType") + defect_tasks = defect_tasks - not_allowed + + ##### Get bulk tasks ##### temp_query = self.bulk_query.copy() temp_query.update({d: {'$exists': True} for d in self.required_bulk_properties}) temp_query.update({self.defect_query: {'$exists': False}, "output.state": "successful"}) @@ -304,6 +329,16 @@ def get_items(self) -> Iterator[List[Dict]]: doc[self.tasks.key] for doc in self.tasks.query(criteria=temp_query, properties=[self.tasks.key]) } + + # TODO seems slow + not_allowed = { + doc[self.tasks.key] + for doc in self.tasks.query(criteria={self.tasks.key: {"$in": list(bulk_tasks)}}) + if TaskDocument(**doc['output']).calcs_reversed[0].task_type not in self.allowed_bulk_types + } + if not_allowed: + self.logger.debug(f"{len(not_allowed)} bulk tasks dropped. Not allowed TaskType") + bulk_tasks = bulk_tasks - not_allowed # TODO Not the same validation behavior as material builders? # If validation store exists, find tasks that are invalid and remove them @@ -393,7 +428,7 @@ def process_item(self, items): defect_tasks=defect_tasks, bulk_tasks=bulk_tasks, dielectrics=dielectrics, query=self.defect_query, key=self.tasks.key, material_id=material_id ) - return defect_doc.dict() + return jsanitize(defect_doc.dict(), allow_bson=True, enum_values=True, strict=True) return {} def update_targets(self, items): @@ -412,10 +447,7 @@ def update_targets(self, items): "task_ids": item['task_ids'], } ) - self.defects.update( - docs=jsanitize(items, allow_bson=True), - key='task_ids', - ) + self.defects.update(items, key='task_ids') else: self.logger.info("No items to update") @@ -462,19 +494,25 @@ def are_equal(x, y): To decide if defects are equal. Either the defect objects are equal, OR two different defect objects relaxed to the same final structure (common with interstitials). -:w TODO Need a way to do the output structure comparison for a X atom defect cell TODO which can be embedded in a Y atom defect cell up to tolerance. """ + + # Defects with diff charges return true for the native __eq__ + if x['structure'].charge != y['structure'].charge: + return False + + # Are the defect objects eq. if x['defect'] == y['defect']: return True - # TODO This is needed for ghost vacancy unfortunately, since sm.fit can't distinguish ghosts - if x['defect'].defect_composition == y['defect'].defect_composition and \ - x['defect'].charge == y['defect'].charge and \ + # Are the final structures equal + # element-changes needed for ghost vacancies, since sm.fit can't distinguish them + if x['defect'].element_changes == y['defect'].element_changes and \ sm.fit(x['structure'], y['structure']): return True + return False sorted_s_list = sorted(enumerate(defects), key=lambda x: key(x[1])) @@ -682,21 +720,28 @@ def __get_pristine_supercell(self, task): - If no follow up transform exists, the calculation input will be returned If defect cannot be found in task, return the input structure. + + scale_matrix = np.array(scaling_matrix, int) + if scale_matrix.shape != (3, 3): + scale_matrix = np.array(scale_matrix * np.eye(3), int) + new_lattice = Lattice(np.dot(scale_matrix, self._lattice.matrix)) """ d = unpack(query=self.defect_query, d=task) + out_structure = MontyDecoder().process_decoded(task['output']['output']['structure']) if d: defect = MontyDecoder().process_decoded(d) - sc_mat = task.get('output', {}).get('additional_json', {}).get("info", {}).get('sc_mat') s = defect.structure.copy() + sc_mat = out_structure.lattice.matrix.dot(np.linalg.inv(s.lattice.matrix)) s.make_supercell(sc_mat) return s else: - return MontyDecoder().process_decoded(task['output']['output']['structure']) + return out_structure #TODO Major problem with this builder. materials store is used to sync the diel, elec, and pd with a single material id #TODO This is a problem because the material id in vasp store is not synced to cp2k store #TODO Also the chempots needed to adjust entries must come from cp2k, but you need to give vasp to sync the others -class DefectThermoBuilder(Builder): +#TODO Thermo store is being replaced with a manual definition of chempots until further notice +class DefectiveMaterialBuilder(Builder): """ This builder creates collections of the DefectThermoDoc object. @@ -712,9 +757,9 @@ def __init__( defects: Store, defect_thermos: Store, materials: Store, - thermo: Store, electronic_structures: Store, dos: Store, + thermo: Dict, query: Optional[Dict] = None, **kwargs, ): @@ -731,14 +776,14 @@ def __init__( self.defect_thermos = defect_thermos self.materials = materials self.thermo = thermo - self.dos = dos self.electronic_structures = electronic_structures + self.dos = dos self.query = query if query else {} self.timestamp = None self.kwargs = kwargs - super().__init__(sources=[defects, materials, thermo, electronic_structures, dos], targets=[defect_thermos], **kwargs) + super().__init__(sources=[defects, materials, electronic_structures, dos], targets=[defect_thermos], **kwargs) def ensure_indexes(self): """ @@ -785,12 +830,13 @@ def get_items(self) -> Iterator[List[Dict]]: def filterfunc(x): # material for defect x exists - if not list(self.materials.query(criteria={'material_id': x['material_id']}, properties=None)): + if not self.materials.query_one(criteria={'material_id': x['material_id']}, properties=None): self.logger.debug(f"No material with MPID={x['material_id']} in the material store") return False - for el in load_class(x['defect']['@module'], x['defect']['@class']).from_dict(x['defect']).defect_composition: - if not list(self.thermo.query(criteria={'chemsys': str(el)}, properties=None)): + defect = MontyDecoder().process_decoded(x['defect']) + for el in defect.element_changes: + if el not in self.thermo: self.logger.debug(f"No entry for {el} in Thermo Store") return False @@ -805,7 +851,7 @@ def filterfunc(x): group = [g for g in group] try: mat = self.__get_materials(key) - thermo = self.__get_thermos(mat.composition) + thermo = self.thermo #self.__get_thermos(mat.composition) elec = self.__get_electronic_structure(group[0]['material_id']) yield (group, mat, thermo, elec) except LookupError as exception: @@ -816,10 +862,10 @@ def process_item(self, docs): Process a group of defects belonging to the same material into a defect thermo doc """ self.logger.info(f"Processing defects") - defects, material, thermos, elec_struc = docs + defects, material, thermo, dos = docs defects = [DefectDoc(**d) for d in defects] - thermos = [ThermoDoc(**t) for t in thermos] - defect_thermo_doc = DefectThermoDoc.from_docs(defects, thermos=thermos, electronic_structure=elec_struc) + dos = CompleteDos.from_dict(dos) + defect_thermo_doc = DefectiveMaterialDoc.from_docs(defects, thermo=thermo, dos=dos) return defect_thermo_doc.dict() def update_targets(self, items): From 725f7cae97634c6ce9169d313d7831d673b2150f Mon Sep 17 00:00:00 2001 From: nwinner Date: Mon, 28 Nov 2022 16:18:25 -0800 Subject: [PATCH 14/50] DefectDoc --- src/atomate2/cp2k/schemas/defect.py | 53 +++++++++++++++++------------ 1 file changed, 31 insertions(+), 22 deletions(-) diff --git a/src/atomate2/cp2k/schemas/defect.py b/src/atomate2/cp2k/schemas/defect.py index ece81f2959..d436d8e80d 100644 --- a/src/atomate2/cp2k/schemas/defect.py +++ b/src/atomate2/cp2k/schemas/defect.py @@ -9,10 +9,10 @@ from monty.tempfile import ScratchDir from pymatgen.core import Structure -from pymatgen.entries.computed_entries import ComputedStructureEntry +from pymatgen.entries.computed_entries import ComputedEntry, ComputedStructureEntry from pymatgen.analysis.defects.core import Defect from pymatgen.analysis.defects.corrections import get_freysoldt_correction, get_freysoldt2d_correction -from pymatgen.analysis.defects.thermo import DefectEntry, DefectSiteFinder +from pymatgen.analysis.defects.thermo import DefectEntry, DefectSiteFinder, FormationEnergyDiagram from pymatgen.symmetry.analyzer import SpacegroupAnalyzer from atomate2 import SETTINGS @@ -67,9 +67,13 @@ class DefectDoc(StructureMetadata): None, description="Task ids (defect task, bulk task) for all tasks of a RunType" ) - entries: Mapping[RunType, DefectEntry] = Field( + defect_entries: Mapping[RunType, DefectEntry] = Field( None, description="Dictionary for tracking entries for CP2K calculations" ) + + bulk_entries: Mapping[RunType, ComputedStructureEntry] = Field( + None, description="Computed structure entry for the bulk calc." + ) last_updated: datetime = Field( description="Timestamp for when this document was last updated", @@ -83,14 +87,6 @@ class DefectDoc(StructureMetadata): metadata: Dict = Field(description="Metadata for this defect") - # TODO How can monty serialization incorporate into pydantic? It seems like VASP MatDocs dont need this - @validator("entries", pre=True) - def decode(cls, entries): - for e in entries: - if isinstance(entries[e], dict): - entries[e] = MontyDecoder().process_decoded({k: v for k, v in entries[e].items()}) - return entries - def update(self, defect_task, bulk_task, dielectric, query='defect'): defect_task_doc = TaskDocument(**defect_task) @@ -136,7 +132,7 @@ def update_all(self, defect_tasks: List, bulk_tasks: List, dielectrics: List, qu self.update(defect_task=defect_task, bulk_task=bulk_task, dielectric=dielectric, query=query) @classmethod - def from_tasks(cls: Type[T], defect_tasks: List, bulk_tasks: List, dielectrics: List, query='defect', key="task_id", material_id=None): + def from_tasks(cls: Type[T], defect_tasks: List, bulk_tasks: List, dielectrics: List, query='defect', key="task_id", material_id=None) -> T: """ The standard way to create this document. Args: @@ -169,25 +165,30 @@ def _sort(x): # TODO return kpoint density, currently just does supercell size return -x[0].nsites, x[0].output.energy - entries = {} + defect_entries = {} + bulk_entries = {} all_tasks = {} best_tasks = {} metadata = {} for key, tasks_for_runtype in groupby(sorted(zip(defect_tasks, bulk_tasks, defects, dielectrics, defect_task_ids, bulk_task_ids), key=_run_type), key=_run_type): sorted_tasks = sorted(tasks_for_runtype, key=_sort) ents = [ - cls.get_defect_entry_from_tasks(defect_task, bulk_task, defect, dielectric) + ( + cls.get_defect_entry_from_tasks(defect_task, bulk_task, defect, dielectric), + cls.get_bulk_entry_from_task(bulk_task) + ) for defect_task, bulk_task, defect, dielectric, did, bid in sorted_tasks ] rt = run_types[sorted_tasks[0][-2]] - best_entry = ents[0] best_tasks[rt] = (sorted_tasks[0][-2], sorted_tasks[0][-1]) all_tasks[rt] = [ (s[-2], s[-1]) for s in sorted_tasks ] - metadata[key] = {'convergence': [(sorted_tasks[i][0].nsites, ents[i].corrected_energy) for i in range(len(ents))]} - entries[rt] = ents[0] + metadata[key] = {'convergence': [(sorted_tasks[i][0].nsites, ents[i][0].corrected_energy) for i in range(len(ents))]} + defect_entries[rt], bulk_entries[rt] = ents[0] + v = next(iter(defect_entries.values())) data = { - 'entries': entries, + 'defect_entries': defect_entries, + "bulk_entries": bulk_entries, 'run_types': run_types, 'task_types': task_types, 'calc_types': calc_types, @@ -197,11 +198,12 @@ def _sort(x): #'deprecated_tasks': deprecated_tasks, 'all_tasks': all_tasks, 'best_tasks': best_tasks, - 'material_id': material_id if material_id else best_entry.parameters['material_id'], - 'defect': best_entry.defect, + 'material_id': material_id if material_id else v.parameters['material_id'], + 'defect': v.defect, + "name": v.defect.name, 'metadata': metadata, } - prim = SpacegroupAnalyzer(best_entry.defect.structure).get_primitive_standard_structure() + prim = SpacegroupAnalyzer(v.defect.structure).get_primitive_standard_structure() data.update(StructureMetadata.from_structure(prim).dict()) return cls(**data) @@ -225,7 +227,7 @@ def get_defect_entry_from_tasks(cls, defect_task: TaskDocument, bulk_task: TaskD sc_entry = ComputedStructureEntry( structure=parameters['final_defect_structure'], - energy=parameters['defect_energy'] - parameters['bulk_energy'] + energy=parameters['defect_energy'] ) defect_entry = DefectEntry( @@ -238,6 +240,13 @@ def get_defect_entry_from_tasks(cls, defect_task: TaskDocument, bulk_task: TaskD return defect_entry + @classmethod + def get_bulk_entry_from_task(cls, bulk_task: TaskDocument): + return ComputedStructureEntry( + structure=bulk_task.structure, + energy=bulk_task.output.energy, + ) + @classmethod def get_correction_from_parameters(cls, parameters) -> Tuple[Dict, Dict]: corrections = {} From 441c4f87c1aadd3235926c261f19ec492af67c1e Mon Sep 17 00:00:00 2001 From: nwinner Date: Mon, 28 Nov 2022 16:38:33 -0800 Subject: [PATCH 15/50] DefectiveMat --- src/atomate2/cp2k/builders/defect.py | 3 -- src/atomate2/cp2k/schemas/defect.py | 64 ++++++++++++++++++++++++++++ 2 files changed, 64 insertions(+), 3 deletions(-) diff --git a/src/atomate2/cp2k/builders/defect.py b/src/atomate2/cp2k/builders/defect.py index fd90e5b8ae..128eb72817 100644 --- a/src/atomate2/cp2k/builders/defect.py +++ b/src/atomate2/cp2k/builders/defect.py @@ -19,11 +19,8 @@ from atomate.utils.utils import load_class -from emmet.core.thermo import ThermoDoc from emmet.core.material import MaterialsDoc -from emmet.builders.settings import EmmetBuildSettings - from atomate2.settings import Atomate2Settings from atomate2.cp2k.schemas.task import TaskDocument from atomate2.cp2k.schemas.defect import DefectDoc, DefectiveMaterialDoc diff --git a/src/atomate2/cp2k/schemas/defect.py b/src/atomate2/cp2k/schemas/defect.py index d436d8e80d..302dcbd1ee 100644 --- a/src/atomate2/cp2k/schemas/defect.py +++ b/src/atomate2/cp2k/schemas/defect.py @@ -24,6 +24,7 @@ __all__ = ["DefectDoc"] T = TypeVar("T", bound="DefectDoc") +S = TypeVar("S", bound="DefectiveMaterialDoc") class DefectDoc(StructureMetadata): """ @@ -331,6 +332,69 @@ def get_parameters_from_tasks(cls, defect_task: TaskDocument, bulk_task: TaskDoc return parameters +class DefectiveMaterialDoc(StructureMetadata): + """Document containing all / many defect tasks for a single material ID""" + + property_name: ClassVar[str] = "defective material" + + material_id: str = Field(None, description="Unique material ID for the bulk material") #TODO Change to MPID + + formation_energy_diagrams: Mapping[RunType, FormationEnergyDiagram] = Field(None, description="") + + last_updated: datetime = Field( + description="Timestamp for when this document was last updated", + default_factory=datetime.utcnow, + ) + + created_at: datetime = Field( + description="Timestamp for when this material document was first created", + default_factory=datetime.utcnow, + ) + + metadata: Dict = Field(None, description="Metadata for this object") + + @classmethod + def from_docs(cls: Type["S"], defect_docs: DefectDoc, thermo: Dict, dos) -> S: + """ + # Metadata + metadata = {} + last_updated = datetime.now() + created_at = datetime.now() + + bulk_ents = {} + dfct_ents = {} + formation_energy_diagrams = {} + els = set() + for doc in defect_docs: + els = els | set(doc.defect.element_changes.keys()) + for rt, defect_entry in doc.defect_entries.items(): + if rt not in dfct_ents: + dfct_ents[rt] = [] + dfct_ents[rt].append(defect_entry) + bulk_ents[rt] = doc.bulk_entries[rt] + + atomic_entries = [ComputedEntry(composition=str(el), energy=thermo[el]) for el in els] + + for rt in dfct_ents: + + pd = PhaseDiagram(mp_entries) + cbm, vbm = dos.get_cbm_vbm() + + adjusted_entries = _get_adjusted_pd_entries( + phase_diagram=pd, atomic_entries=atomic_entries + ) + + formation_energy_diagrams[rt] = FormationEnergyDiagram.with_atomic_entries( + bulk_entry=bulk_ents[rt], defect_entries=dfct_ents[rt], + atomic_entries=atomic_entries, phase_diagram=pd, vbm=vbm, + band_gap=cbm-vbm, + ) + """ + + raise NotImplementedError + + + def unpack(query, d): if not query: return d From 1ddeec51d63aed13d92537fd640324fd1a91274c Mon Sep 17 00:00:00 2001 From: nwinner Date: Mon, 28 Nov 2022 18:05:54 -0800 Subject: [PATCH 16/50] Print PDOS --- src/atomate2/cp2k/jobs/defect.py | 3 ++- src/atomate2/cp2k/sets/defect.py | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/src/atomate2/cp2k/jobs/defect.py b/src/atomate2/cp2k/jobs/defect.py index a5c849cf87..43ce4717d7 100644 --- a/src/atomate2/cp2k/jobs/defect.py +++ b/src/atomate2/cp2k/jobs/defect.py @@ -23,7 +23,8 @@ DEFECT_TASK_DOC = { "average_v_hartree": True, - "store_volumetric_data": ("v_hartree",) + "store_volumetric_data": ("v_hartree",), + "print_pdos": True, } @dataclass diff --git a/src/atomate2/cp2k/sets/defect.py b/src/atomate2/cp2k/sets/defect.py index 2e454af5dc..20c0fbbf12 100644 --- a/src/atomate2/cp2k/sets/defect.py +++ b/src/atomate2/cp2k/sets/defect.py @@ -22,7 +22,7 @@ class DefectSetGenerator(Cp2kInputGenerator): def get_input_updates(self, structure: Structure, *args, **kwargs) -> dict: """ """ - return {'print_v_hartree': True} + return {'print_v_hartree': True, "print_pdos": True} @dataclass @multiple_input_updators() From 9f2f7ecdb80bf3d480ac31f07c71b6f35b56e364 Mon Sep 17 00:00:00 2001 From: nwinner Date: Tue, 29 Nov 2022 08:41:21 -0800 Subject: [PATCH 17/50] Defects --- src/atomate2/cp2k/builders/defect.py | 7 ++++--- src/atomate2/cp2k/jobs/defect.py | 3 +-- src/atomate2/cp2k/schemas/defect.py | 7 ++++++- 3 files changed, 11 insertions(+), 6 deletions(-) diff --git a/src/atomate2/cp2k/builders/defect.py b/src/atomate2/cp2k/builders/defect.py index 128eb72817..2f381c3d4c 100644 --- a/src/atomate2/cp2k/builders/defect.py +++ b/src/atomate2/cp2k/builders/defect.py @@ -137,7 +137,8 @@ def __init__( 'output.output.energy', 'output.output.structure', 'output.input', - 'output.cp2k_objects.v_hartree' + 'output.cp2k_objects.v_hartree', + 'output.vbm', ] self._optional_defect_properties = [] @@ -281,7 +282,7 @@ def get_items(self) -> Iterator[List[Dict]]: task bundles bundle are all the tasks that correspond to the same defect and all possible bulk tasks that could be matched to them. -d """ + """ self.logger.info("Defect builder started") self.logger.info( @@ -938,4 +939,4 @@ def get_sg(struc, symprec=.01) -> int: try: return struc.get_space_group_info(symprec=symprec)[1] except Exception: - return -1 \ No newline at end of file + return -1 diff --git a/src/atomate2/cp2k/jobs/defect.py b/src/atomate2/cp2k/jobs/defect.py index 43ce4717d7..275db0ef73 100644 --- a/src/atomate2/cp2k/jobs/defect.py +++ b/src/atomate2/cp2k/jobs/defect.py @@ -24,7 +24,6 @@ DEFECT_TASK_DOC = { "average_v_hartree": True, "store_volumetric_data": ("v_hartree",), - "print_pdos": True, } @dataclass @@ -134,4 +133,4 @@ def defect_structure(self): """Returns the defect structure with the proper oxidation state""" struct = self.structure.copy() struct.add_site_property("ghost", [i == self.defect_site_index for i in range(len(struct))]) - return struct \ No newline at end of file + return struct diff --git a/src/atomate2/cp2k/schemas/defect.py b/src/atomate2/cp2k/schemas/defect.py index 302dcbd1ee..9732f63c8b 100644 --- a/src/atomate2/cp2k/schemas/defect.py +++ b/src/atomate2/cp2k/schemas/defect.py @@ -76,6 +76,8 @@ class DefectDoc(StructureMetadata): None, description="Computed structure entry for the bulk calc." ) + vbm: Mapping[RunType, float] = Field(None, description="VBM for bulk task of each run type. Used for aligning potential") + last_updated: datetime = Field( description="Timestamp for when this document was last updated", default_factory=datetime.utcnow, @@ -170,6 +172,7 @@ def _sort(x): bulk_entries = {} all_tasks = {} best_tasks = {} + vbm = {} metadata = {} for key, tasks_for_runtype in groupby(sorted(zip(defect_tasks, bulk_tasks, defects, dielectrics, defect_task_ids, bulk_task_ids), key=_run_type), key=_run_type): sorted_tasks = sorted(tasks_for_runtype, key=_sort) @@ -181,6 +184,7 @@ def _sort(x): for defect_task, bulk_task, defect, dielectric, did, bid in sorted_tasks ] rt = run_types[sorted_tasks[0][-2]] + vbm[rt] = sorted_tasks[0][1].output.vbm best_tasks[rt] = (sorted_tasks[0][-2], sorted_tasks[0][-1]) all_tasks[rt] = [ (s[-2], s[-1]) for s in sorted_tasks ] metadata[key] = {'convergence': [(sorted_tasks[i][0].nsites, ents[i][0].corrected_energy) for i in range(len(ents))]} @@ -202,6 +206,7 @@ def _sort(x): 'material_id': material_id if material_id else v.parameters['material_id'], 'defect': v.defect, "name": v.defect.name, + "vbm": vbm, 'metadata': metadata, } prim = SpacegroupAnalyzer(v.defect.structure).get_primitive_standard_structure() @@ -400,4 +405,4 @@ def unpack(query, d): return d if isinstance(d, List): return unpack(query[1:], d.__getitem__(int(query.pop(0)))) - return unpack(query[1:], d.__getitem__(query.pop(0))) \ No newline at end of file + return unpack(query[1:], d.__getitem__(query.pop(0))) From a4e3b01c411a1f520068a43c2fb4da495ee27de4 Mon Sep 17 00:00:00 2001 From: nwinner Date: Wed, 30 Nov 2022 16:26:39 -0800 Subject: [PATCH 18/50] Builder --- src/atomate2/cp2k/builders/defect.py | 65 +++++++++++++++++----------- 1 file changed, 40 insertions(+), 25 deletions(-) diff --git a/src/atomate2/cp2k/builders/defect.py b/src/atomate2/cp2k/builders/defect.py index 2f381c3d4c..a62ad8a8f7 100644 --- a/src/atomate2/cp2k/builders/defect.py +++ b/src/atomate2/cp2k/builders/defect.py @@ -16,8 +16,7 @@ from pymatgen.analysis.structure_matcher import ElementComparator, StructureMatcher from pymatgen.electronic_structure.dos import CompleteDos from pymatgen.symmetry.analyzer import SpacegroupAnalyzer - -from atomate.utils.utils import load_class +from pymatgen.io.cp2k.inputs import Cp2kInput from emmet.core.material import MaterialsDoc @@ -92,7 +91,8 @@ def __init__( materials: Store of materials documents electrostatic_potentials: Store of electrostatic potential data. These are generally stored in seperately from the tasks on GridFS due to their size. - task_validation: Store of task validation documents. + task_validation: Store of task validation documents. If true, then only tasks that have passed + validation will be considered. query: dictionary to limit tasks to be analyzed. NOT the same as the defect_query property allowed_task_types: list of task_types that can be processed settings: EmmetBuildSettings object @@ -138,7 +138,7 @@ def __init__( 'output.output.structure', 'output.input', 'output.cp2k_objects.v_hartree', - 'output.vbm', + 'output.output.vbm', ] self._optional_defect_properties = [] @@ -321,7 +321,7 @@ def get_items(self) -> Iterator[List[Dict]]: ##### Get bulk tasks ##### temp_query = self.bulk_query.copy() - temp_query.update({d: {'$exists': True} for d in self.required_bulk_properties}) + temp_query.update({d: {'$exists': True, "$ne": None} for d in self.required_bulk_properties}) temp_query.update({self.defect_query: {'$exists': False}, "output.state": "successful"}) bulk_tasks = { doc[self.tasks.key] @@ -456,7 +456,7 @@ def __filter_and_group_tasks(self, tasks): will be grouped together. Args: - tasks: task_ids for unprocessed defects + tasks: task_ids (according to self.tasks.key) for unprocessed defects returns: [ (defect, [task_ids] ), ...] where task_ids correspond to the same defect @@ -631,7 +631,7 @@ def __match_defects_to_bulks(self, bulk_ids, defect_ids): 'output.input', 'output.nsites', 'output.output.structure', - "output.additional_json.info.sc_mat" + 'output.calcs_reversed' ] defects = list(self.tasks.query(criteria={self.tasks.key: {'$in': list(defect_ids)}}, properties=props)) ps = self.__get_pristine_supercell(defects[0]) @@ -646,33 +646,48 @@ def __match_defects_to_bulks(self, bulk_ids, defect_ids): ) ) - # TODO add settings - sm = StructureMatcher( - primitive_cell=False, - scale=True, - attempt_supercell=False, - allow_subset=False, - comparator=ElementComparator(), - ) - - def _compare(b, d): - rtb = b.get('output').get('input').get('xc').split("+U")[0] - rtd = d.get('output').get('input').get('xc').split("+U")[0] - if rtb == rtd: - if sm.fit(self.__get_pristine_supercell(d), self.__get_pristine_supercell(b)): - return True - return False - pairs = [ (defect[self.tasks.key], bulk[self.tasks.key]) for bulk in bulks for defect in defects - if _compare(bulk, defect) + if self.__are_bulk_and_defect_commensurate(bulk, defect) ] self.logger.debug(f"Found {len(pairs)} commensurate bulk/defect pairs") return pairs + # TODO Checking for same dft settings (e.g. OT/diag) is a little cumbersome. + # Maybe, in future, task doc can be defined to have OT/diag as part of input summary + # for fast querying + def __are_bulk_and_defect_commensurate(self, b, d): + """ + Check if a bulk and defect task are commensurate. + + Checks for: + 1. Same run type. + 2. Same pristine structures with no supercell reduction + 3. Compatible DFT settings + """ + # TODO add settings + sm = StructureMatcher( + primitive_cell=False, + scale=True, + attempt_supercell=False, + allow_subset=False, + comparator=ElementComparator(), + ) + rtb = b.get('output').get('input').get('xc').split("+U")[0] + rtd = d.get('output').get('input').get('xc').split("+U")[0] + if rtb == rtd: + if sm.fit(self.__get_pristine_supercell(d), self.__get_pristine_supercell(b)): + cib = Cp2kInput.from_dict(b['output']['calcs_reversed'][0]['input']['cp2k_input']) + cid = Cp2kInput.from_dict(d['output']['calcs_reversed'][0]['input']['cp2k_input']) + bis_ot = cib.check("force_eval/dft/scf/ot") + dis_ot = cid.check("force_eval/dft/scf/ot") + if (bis_ot and dis_ot) or (not bis_ot and not dis_ot): + return True + return False + def __preprocess_bulk(self, task): """ Given a TaskDoc that could be a bulk for defect analysis, check to see if it can be used. Bulk From c2064b8239e2e455bc06e9389c7ccc110f82e615 Mon Sep 17 00:00:00 2001 From: nwinner Date: Mon, 5 Dec 2022 16:21:44 -0800 Subject: [PATCH 19/50] Defect --- src/atomate2/cp2k/builders/defect.py | 33 +- src/atomate2/cp2k/flows/defect.py | 6 +- src/atomate2/cp2k/jobs/defect.py | 12 +- src/atomate2/cp2k/schemas/defect.py | 452 +++++++++++++++++---------- src/atomate2/cp2k/sets/defect.py | 2 + 5 files changed, 309 insertions(+), 196 deletions(-) diff --git a/src/atomate2/cp2k/builders/defect.py b/src/atomate2/cp2k/builders/defect.py index a62ad8a8f7..ac19b55bf0 100644 --- a/src/atomate2/cp2k/builders/defect.py +++ b/src/atomate2/cp2k/builders/defect.py @@ -313,7 +313,7 @@ def get_items(self) -> Iterator[List[Dict]]: not_allowed = { doc[self.tasks.key] for doc in self.tasks.query(criteria={self.tasks.key: {"$in": list(defect_tasks)}}) - if TaskDocument(**doc['output']).calcs_reversed[0].task_type not in self.allowed_dfct_types + if TaskType(doc['output']['calcs_reversed'][0]['task_type']) not in self.allowed_dfct_types } if not_allowed: self.logger.debug(f"{len(not_allowed)} defect tasks dropped. Not allowed TaskType") @@ -332,7 +332,7 @@ def get_items(self) -> Iterator[List[Dict]]: not_allowed = { doc[self.tasks.key] for doc in self.tasks.query(criteria={self.tasks.key: {"$in": list(bulk_tasks)}}) - if TaskDocument(**doc['output']).calcs_reversed[0].task_type not in self.allowed_bulk_types + if TaskType(doc['output']['calcs_reversed'][0]['task_type']) not in self.allowed_bulk_types } if not_allowed: self.logger.debug(f"{len(not_allowed)} bulk tasks dropped. Not allowed TaskType") @@ -770,9 +770,6 @@ def __init__( defects: Store, defect_thermos: Store, materials: Store, - electronic_structures: Store, - dos: Store, - thermo: Dict, query: Optional[Dict] = None, **kwargs, ): @@ -788,15 +785,12 @@ def __init__( self.defects = defects self.defect_thermos = defect_thermos self.materials = materials - self.thermo = thermo - self.electronic_structures = electronic_structures - self.dos = dos self.query = query if query else {} self.timestamp = None self.kwargs = kwargs - super().__init__(sources=[defects, materials, electronic_structures, dos], targets=[defect_thermos], **kwargs) + super().__init__(sources=[defects, materials], targets=[defect_thermos], **kwargs) def ensure_indexes(self): """ @@ -842,11 +836,10 @@ def get_items(self) -> Iterator[List[Dict]]: self.logger.debug(f"Found {len(all_docs)} defect docs to process") def filterfunc(x): - # material for defect x exists if not self.materials.query_one(criteria={'material_id': x['material_id']}, properties=None): self.logger.debug(f"No material with MPID={x['material_id']} in the material store") return False - + return True defect = MontyDecoder().process_decoded(x['defect']) for el in defect.element_changes: if el not in self.thermo: @@ -861,24 +854,18 @@ def filterfunc(x): sorted(all_docs, key=lambda x: x['material_id']) ), key=lambda x: x['material_id'] ): - group = [g for g in group] try: - mat = self.__get_materials(key) - thermo = self.thermo #self.__get_thermos(mat.composition) - elec = self.__get_electronic_structure(group[0]['material_id']) - yield (group, mat, thermo, elec) + yield list(group) except LookupError as exception: raise exception - def process_item(self, docs): + def process_item(self, defects): """ Process a group of defects belonging to the same material into a defect thermo doc """ - self.logger.info(f"Processing defects") - defects, material, thermo, dos = docs - defects = [DefectDoc(**d) for d in defects] - dos = CompleteDos.from_dict(dos) - defect_thermo_doc = DefectiveMaterialDoc.from_docs(defects, thermo=thermo, dos=dos) + defect_docs = [DefectDoc(**d) for d in defects] + self.logger.info(f"Processing {len(defect_docs)} defects") + defect_thermo_doc = DefectiveMaterialDoc.from_docs(defect_docs, material_id=defect_docs[0].material_id) return defect_thermo_doc.dict() def update_targets(self, items): @@ -892,7 +879,7 @@ def update_targets(self, items): if len(items) > 0: self.logger.info(f"Updating {len(items)} defect thermo docs") self.defect_thermos.update( - docs=jsanitize(items, allow_bson=True), + docs=jsanitize(items, allow_bson=True, enum_values=True, strict=True), key=self.defect_thermos.key, ) else: diff --git a/src/atomate2/cp2k/flows/defect.py b/src/atomate2/cp2k/flows/defect.py index 5098d16dfa..5567d60502 100644 --- a/src/atomate2/cp2k/flows/defect.py +++ b/src/atomate2/cp2k/flows/defect.py @@ -153,7 +153,9 @@ def make( self, defects: Iterable[Defect], charges: bool | Iterable[int] = False, dielectric: NDArray | int | float | None = None, - prev_cp2k_dir: str | Path | None = None): + prev_cp2k_dir: str | Path | None = None, + collect_outputs: bool = True, + ): """Make a flow to run multiple defects in order to calculate their formation energy diagram. @@ -194,7 +196,7 @@ def make( jobs.append(defect_job) defect_outputs[defect.name][int(charge)] = (defect, defect_job.output) - if self.run_bulk and defects: + if self.run_bulk and defects and collect_outputs: collect_job = collect_defect_outputs( defect_outputs=defect_outputs, bulk_output=bulk_job.output if self.run_bulk else None, diff --git a/src/atomate2/cp2k/jobs/defect.py b/src/atomate2/cp2k/jobs/defect.py index 275db0ef73..1b712ed58e 100644 --- a/src/atomate2/cp2k/jobs/defect.py +++ b/src/atomate2/cp2k/jobs/defect.py @@ -39,21 +39,19 @@ class BaseDefectMaker(BaseCp2kMaker): @cp2k_job def make(self, defect: Defect | Structure, charge: int = 0, prev_cp2k_dir: str | Path | None = None): if isinstance(defect, Defect): - if isinstance(defect, Vacancy): - defect = GhostVacancy( - structure=defect.structure, site=defect.site, - multiplicity=defect.multiplicity, oxi_state=defect.oxi_state, - symprec=defect.symprec, angle_tolerance=defect.angle_tolerance - ) + structure = defect.get_supercell_structure( sc_mat=self.supercell_matrix, - dummy_species=None, + dummy_species=defect.site.species if isinstance(defect, Vacancy) else None, min_atoms=self.min_atoms, max_atoms=self.max_atoms, min_length=self.min_length, force_diagonal=self.force_diagonal, ) + if isinstance(defect, Vacancy): + structure.sites[-1].properties['ghost'] = True + # provenance stuff recursive_update(self.write_additional_data, { "info.json": { diff --git a/src/atomate2/cp2k/schemas/defect.py b/src/atomate2/cp2k/schemas/defect.py index 9732f63c8b..c606342292 100644 --- a/src/atomate2/cp2k/schemas/defect.py +++ b/src/atomate2/cp2k/schemas/defect.py @@ -5,17 +5,27 @@ from pydantic import validator from itertools import groupby -from monty.json import MontyDecoder +from monty.json import MontyDecoder from monty.tempfile import ScratchDir -from pymatgen.core import Structure +from pymatgen.core import Structure, Element from pymatgen.entries.computed_entries import ComputedEntry, ComputedStructureEntry -from pymatgen.analysis.defects.core import Defect -from pymatgen.analysis.defects.corrections import get_freysoldt_correction, get_freysoldt2d_correction -from pymatgen.analysis.defects.thermo import DefectEntry, DefectSiteFinder, FormationEnergyDiagram +from pymatgen.analysis.phase_diagram import PhaseDiagram +from pymatgen.analysis.defects.core import Defect, DefectType +from pymatgen.analysis.defects.corrections import ( + get_freysoldt_correction, + get_freysoldt2d_correction, +) +from pymatgen.analysis.defects.thermo import ( + DefectEntry, + DefectSiteFinder, + FormationEnergyDiagram, +) from pymatgen.symmetry.analyzer import SpacegroupAnalyzer -from atomate2 import SETTINGS +from emmet.core.utils import ValueEnum + +from atomate2 import SETTINGS from atomate2.common.schemas.structure import StructureMetadata from atomate2.cp2k.schemas.calc_types.utils import run_type, task_type, calc_type from atomate2.cp2k.schemas.calc_types.enums import CalcType, TaskType, RunType @@ -26,6 +36,7 @@ T = TypeVar("T", bound="DefectDoc") S = TypeVar("S", bound="DefectiveMaterialDoc") + class DefectDoc(StructureMetadata): """ A document used to represent a single defect. e.g. a O vacancy with a -2 charge. @@ -36,11 +47,19 @@ class DefectDoc(StructureMetadata): property_name: ClassVar[str] = "defect" - defect: Defect = Field(None, description="Pymatgen defect object for this defect doc") + defect: Defect = Field( + None, description="Pymatgen defect object for this defect doc" + ) + + charge: int = Field(None, description="Charge state for this defect") - name: str = Field(None, description="Name of this defect as generated by the defect object") + name: str = Field( + None, description="Name of this defect as generated by the defect object" + ) - material_id: str = Field(None, description="Unique material ID for the bulk material") #TODO Change to MPID + material_id: str = Field( + None, description="Unique material ID for the bulk material" + ) # TODO Change to MPID # TODO Should it be all (defect + bulk) ids? task_ids: List[str] = Field( @@ -71,12 +90,15 @@ class DefectDoc(StructureMetadata): defect_entries: Mapping[RunType, DefectEntry] = Field( None, description="Dictionary for tracking entries for CP2K calculations" ) - + bulk_entries: Mapping[RunType, ComputedStructureEntry] = Field( None, description="Computed structure entry for the bulk calc." ) - vbm: Mapping[RunType, float] = Field(None, description="VBM for bulk task of each run type. Used for aligning potential") + vbm: Mapping[RunType, float] = Field( + None, + description="VBM for bulk task of each run type. Used for aligning potential", + ) last_updated: datetime = Field( description="Timestamp for when this document was last updated", @@ -90,52 +112,64 @@ class DefectDoc(StructureMetadata): metadata: Dict = Field(description="Metadata for this defect") - def update(self, defect_task, bulk_task, dielectric, query='defect'): - - defect_task_doc = TaskDocument(**defect_task) - bulk_task_doc = TaskDocument(**bulk_task) - - rt = defect_task_doc.run_type - tt = defect_task_doc.task_type - ct = defect_task_doc.calc_type + def update(self, defect_task, bulk_task, dielectric, query="defect", key="task_id"): # Metadata - last_updated = max(dtsk.last_updated for dtsk, btsk in self.tasks.values()) if self.tasks else datetime.now() - created_at = min(dtsk.last_updated for dtsk, btsk in self.tasks.values()) if self.tasks else datetime.now() - - if defect_task_doc.task_id in self.task_ids: - return - else: - self.last_updated = last_updated - self.created_at = created_at - self.task_ids.append(defect_task_doc.task_id) - - def _run_type(x): - return run_type(x[0]['input']['dft']).value - - def _compare(new, old): - # TODO return kpoint density - return new['nsites'] > old.nsites - - if defect_task_doc.run_type not in self.tasks or _compare(defect_task, self.tasks[rt][0]): - self.run_types.update({defect_task_doc.task_id: rt}) - self.task_types.update({defect_task_doc.task_id: tt}) - self.calc_types.update({defect_task_doc.task_id: ct}) - entry = self.__class__.get_defect_entry_from_tasks( - defect_task=defect_task, - bulk_task=bulk_task, - dielectric=dielectric, - query=query - ) - self.entries[rt] = entry - self.tasks[rt] = (defect_task_doc, bulk_task_doc) - - def update_all(self, defect_tasks: List, bulk_tasks: List, dielectrics: List, query='defect'): - for defect_task, bulk_task, dielectric in zip(defect_tasks, bulk_tasks, dielectrics): - self.update(defect_task=defect_task, bulk_task=bulk_task, dielectric=dielectric, query=query) + self.last_updated = datetime.now() + self.created_at = datetime.now() + + defect = self.get_defect_from_task(query=query, task=defect_task) + d_id = defect_task[key] + b_id = bulk_task[key] + defect_task = TaskDocument(**defect_task) + bulk_task = TaskDocument(**bulk_task) + defect_entry = self.get_defect_entry_from_tasks( + defect_task, bulk_task, defect, dielectric + ) + bulk_entry = self.get_bulk_entry_from_task(bulk_task) + + rt = defect_task.calcs_reversed[0].run_type + current_largest_sc = self.defect_entries[rt].sc_entry.composition.num_atoms + potential_largest_sc = defect_entry.sc_entry.composition.num_atoms + if ( + rt not in self.defect_entries + or potential_largest_sc > current_largest_sc + or ( + potential_largest_sc == current_largest_sc + and defect_entry.sc_entry.energy + < self.defect_entries[rt].sc_entry.energy + ) + ): + self.defect_entries[rt] = defect_entry + self.bulk_entries[rt] = bulk_entry + self.best_tasks[rt] = (d_id, b_id) + + self.all_tasks[rt].append((d_id, b_id)) + self.metadata["convergence"].append((current_largest_sc, defect_entry.corrected_energy - bulk_entry.energy)) + + def update_all( + self, defect_tasks: List, bulk_tasks: List, dielectrics: List, query="defect" + ): + for defect_task, bulk_task, dielectric in zip( + defect_tasks, bulk_tasks, dielectrics + ): + self.update( + defect_task=defect_task, + bulk_task=bulk_task, + dielectric=dielectric, + query=query, + ) @classmethod - def from_tasks(cls: Type[T], defect_tasks: List, bulk_tasks: List, dielectrics: List, query='defect', key="task_id", material_id=None) -> T: + def from_tasks( + cls: Type[T], + defect_tasks: List, + bulk_tasks: List, + dielectrics: List, + query="defect", + key="task_id", + material_id=None, + ) -> T: """ The standard way to create this document. Args: @@ -145,21 +179,31 @@ def from_tasks(cls: Type[T], defect_tasks: List, bulk_tasks: List, dielectrics: """ defect_task_ids = [defect_task[key] for defect_task in defect_tasks] bulk_task_ids = [bulk_task[key] for bulk_task in bulk_tasks] - bulk_tasks= [TaskDocument(**bulk_task['output']) for bulk_task in bulk_tasks] - defects = [cls.get_defect_from_task(query=query, task=defect_task) for defect_task in defect_tasks] - defect_tasks = [TaskDocument(**defect_task['output']) for defect_task in defect_tasks] - + bulk_tasks = [TaskDocument(**bulk_task["output"]) for bulk_task in bulk_tasks] + defects = [ + cls.get_defect_from_task(query=query, task=defect_task) + for defect_task in defect_tasks + ] + defect_tasks = [ + TaskDocument(**defect_task["output"]) for defect_task in defect_tasks + ] + # Metadata last_updated = datetime.now() or max(task.last_updated for task in defect_tasks) created_at = datetime.now() or min(task.completed_at for task in defect_tasks) - #deprecated_tasks = list( - # {task.task_id for task in task_group if not task.is_valid} - #) - - run_types = {id: task.calcs_reversed[0].run_type for id, task in zip(defect_task_ids, defect_tasks)} - task_types = {id: task.calcs_reversed[0].task_type for id, task in zip(defect_task_ids, defect_tasks)} - calc_types = {id: task.calcs_reversed[0].calc_type for id, task in zip(defect_task_ids, defect_tasks)} + run_types = { + id: task.calcs_reversed[0].run_type + for id, task in zip(defect_task_ids, defect_tasks) + } + task_types = { + id: task.calcs_reversed[0].task_type + for id, task in zip(defect_task_ids, defect_tasks) + } + calc_types = { + id: task.calcs_reversed[0].calc_type + for id, task in zip(defect_task_ids, defect_tasks) + } def _run_type(x): return x[0].calcs_reversed[0].run_type.value @@ -174,47 +218,85 @@ def _sort(x): best_tasks = {} vbm = {} metadata = {} - for key, tasks_for_runtype in groupby(sorted(zip(defect_tasks, bulk_tasks, defects, dielectrics, defect_task_ids, bulk_task_ids), key=_run_type), key=_run_type): + for key, tasks_for_runtype in groupby( + sorted( + zip( + defect_tasks, + bulk_tasks, + defects, + dielectrics, + defect_task_ids, + bulk_task_ids, + ), + key=_run_type, + ), + key=_run_type, + ): sorted_tasks = sorted(tasks_for_runtype, key=_sort) ents = [ ( - cls.get_defect_entry_from_tasks(defect_task, bulk_task, defect, dielectric), - cls.get_bulk_entry_from_task(bulk_task) + cls.get_defect_entry_from_tasks( + defect_task, bulk_task, defect, dielectric + ), + cls.get_bulk_entry_from_task(bulk_task), ) for defect_task, bulk_task, defect, dielectric, did, bid in sorted_tasks - ] + ] rt = run_types[sorted_tasks[0][-2]] vbm[rt] = sorted_tasks[0][1].output.vbm - best_tasks[rt] = (sorted_tasks[0][-2], sorted_tasks[0][-1]) - all_tasks[rt] = [ (s[-2], s[-1]) for s in sorted_tasks ] - metadata[key] = {'convergence': [(sorted_tasks[i][0].nsites, ents[i][0].corrected_energy) for i in range(len(ents))]} + best_tasks[rt] = (sorted_tasks[0][-2], sorted_tasks[0][-1]) + all_tasks[rt] = [(s[-2], s[-1]) for s in sorted_tasks] defect_entries[rt], bulk_entries[rt] = ents[0] + metadata[key] = { + "convergence": [ + ( + sorted_tasks[i][0].nsites, + defect_entries[rt].corrected_energy - bulk_entries[rt].energy, + ) + for i in range(len(ents)) + ] + } + + v = next(iter(defect_entries.values())) + metadata["defect_origin"] = ( + "intrinsic" + if all( + el in v.defect.structure.composition + for el in v.defect.element_changes.keys() + ) + else "extrinsic" + ) - v = next(iter(defect_entries.values())) data = { - 'defect_entries': defect_entries, - "bulk_entries": bulk_entries, - 'run_types': run_types, - 'task_types': task_types, - 'calc_types': calc_types, - 'last_updated': last_updated, - 'created_at': created_at, - 'task_ids': defect_task_ids, - #'deprecated_tasks': deprecated_tasks, - 'all_tasks': all_tasks, - 'best_tasks': best_tasks, - 'material_id': material_id if material_id else v.parameters['material_id'], - 'defect': v.defect, - "name": v.defect.name, - "vbm": vbm, - 'metadata': metadata, + "defect_entries": defect_entries, + "bulk_entries": bulk_entries, + "run_types": run_types, + "task_types": task_types, + "calc_types": calc_types, + "last_updated": last_updated, + "created_at": created_at, + "task_ids": defect_task_ids, + "all_tasks": all_tasks, + "best_tasks": best_tasks, + "material_id": material_id if material_id else v.parameters["material_id"], + "defect": v.defect, + "charge": v.charge_state, + "name": v.defect.name, + "vbm": vbm, + "metadata": metadata, } prim = SpacegroupAnalyzer(v.defect.structure).get_primitive_standard_structure() data.update(StructureMetadata.from_structure(prim).dict()) return cls(**data) @classmethod - def get_defect_entry_from_tasks(cls, defect_task: TaskDocument, bulk_task: TaskDocument, defect: Defect, dielectric=None): + def get_defect_entry_from_tasks( + cls, + defect_task: TaskDocument, + bulk_task: TaskDocument, + defect: Defect, + dielectric=None, + ): """ Extract a defect entry from a single pair (defect and bulk) of tasks. @@ -225,22 +307,24 @@ def get_defect_entry_from_tasks(cls, defect_task: TaskDocument, bulk_task: TaskD corrections will be performed, even if the defect is charged. query: Mongo-style query to retrieve the defect object from the defect task """ - parameters = cls.get_parameters_from_tasks(defect_task=defect_task, bulk_task=bulk_task) + parameters = cls.get_parameters_from_tasks( + defect_task=defect_task, bulk_task=bulk_task + ) if dielectric: - parameters['dielectric'] = dielectric + parameters["dielectric"] = dielectric corrections, metadata = cls.get_correction_from_parameters(parameters) sc_entry = ComputedStructureEntry( - structure=parameters['final_defect_structure'], - energy=parameters['defect_energy'] - ) + structure=parameters["final_defect_structure"], + energy=parameters["defect_energy"], + ) defect_entry = DefectEntry( defect=defect, - charge_state=parameters['charge_state'], + charge_state=parameters["charge_state"], sc_entry=sc_entry, - sc_defect_frac_coords=parameters['defect_frac_sc_coords'], + sc_defect_frac_coords=parameters["defect_frac_sc_coords"], corrections=corrections, ) @@ -265,36 +349,49 @@ def get_correction_from_parameters(cls, parameters) -> Tuple[Dict, Dict]: @classmethod def get_freysoldt_correction(cls, parameters) -> Tuple[Dict, Dict]: - if parameters['charge_state'] and not parameters.get("2d"): + if parameters["charge_state"] and not parameters.get("2d"): return get_freysoldt_correction( - q=parameters['charge_state'], dielectric=parameters['dielectric'], - defect_locpot=parameters['defect_v_hartree'], - bulk_locpot=parameters['bulk_v_hartree'], - defect_frac_coords=parameters['defect_frac_sc_coords'], - ) + q=parameters["charge_state"], + dielectric=parameters["dielectric"], + defect_locpot=parameters["defect_v_hartree"], + bulk_locpot=parameters["bulk_v_hartree"], + defect_frac_coords=parameters["defect_frac_sc_coords"], + ) return {}, {} - + @classmethod def get_freysoldt2d_correction(cls, parameters): from pymatgen.io.vasp.outputs import VolumetricData as VaspVolumetricData - if parameters['charge_state'] and parameters.get("2d"): - eps_parallel = (parameters['dielectric'][0][0] + parameters['dielectric'][1][1]) / 2 - eps_perp = parameters['dielectric'][2][2] - dielectric = (eps_parallel - 1) / (1 - 1/eps_perp) - with ScratchDir('.'): - - lref = VaspVolumetricData(structure=parameters['bulk_v_hartree'].structure, data=parameters['bulk_v_hartree'].data) - ldef = VaspVolumetricData(structure=parameters['defect_v_hartree'].structure, data=parameters['defect_v_hartree'].data) + if parameters["charge_state"] and parameters.get("2d"): + eps_parallel = ( + parameters["dielectric"][0][0] + parameters["dielectric"][1][1] + ) / 2 + eps_perp = parameters["dielectric"][2][2] + dielectric = (eps_parallel - 1) / (1 - 1 / eps_perp) + with ScratchDir("."): + + lref = VaspVolumetricData( + structure=parameters["bulk_v_hartree"].structure, + data=parameters["bulk_v_hartree"].data, + ) + ldef = VaspVolumetricData( + structure=parameters["defect_v_hartree"].structure, + data=parameters["defect_v_hartree"].data, + ) lref.write_file("LOCPOT.ref") ldef.write_file("LOCPOT.def") return get_freysoldt2d_correction( - q=parameters['charge_state'], dielectric=dielectric, defect_locpot=ldef, - bulk_locpot=lref, defect_frac_coords=parameters['defect_frac_sc_coords'], - energy_cutoff=520, slab_buffer=2 - ) + q=parameters["charge_state"], + dielectric=dielectric, + defect_locpot=ldef, + bulk_locpot=lref, + defect_frac_coords=parameters["defect_frac_sc_coords"], + energy_cutoff=520, + slab_buffer=2, + ) return {}, {} @classmethod @@ -302,11 +399,13 @@ def get_defect_from_task(cls, query, task): """ Unpack a Mongo-style query and retrieve a defect object from a task. """ - defect = unpack(query.split('.'), task) + defect = unpack(query.split("."), task) return MontyDecoder().process_decoded(defect) @classmethod - def get_parameters_from_tasks(cls, defect_task: TaskDocument, bulk_task: TaskDocument): + def get_parameters_from_tasks( + cls, defect_task: TaskDocument, bulk_task: TaskDocument + ): """ Get parameters necessary to create a defect entry from defect and bulk task dicts Args: @@ -317,34 +416,50 @@ def get_parameters_from_tasks(cls, defect_task: TaskDocument, bulk_task: TaskDoc final_defect_structure = defect_task.structure final_bulk_structure = bulk_task.structure - ghost = [index for index, prop in enumerate(final_defect_structure.site_properties.get("ghost")) if prop] + ghost = [ + index + for index, prop in enumerate( + final_defect_structure.site_properties.get("ghost") + ) + if prop + ] if ghost: defect_frac_sc_coords = final_defect_structure[ghost[0]].frac_coords else: - defect_frac_sc_coords = DefectSiteFinder(SETTINGS.SYMPREC).get_defect_fpos(defect_structure=final_defect_structure, base_structure=final_bulk_structure) + defect_frac_sc_coords = DefectSiteFinder(SETTINGS.SYMPREC).get_defect_fpos( + defect_structure=final_defect_structure, + base_structure=final_bulk_structure, + ) parameters = { - 'defect_energy': defect_task.output.energy, - 'bulk_energy': bulk_task.output.energy, - 'final_defect_structure': final_defect_structure, - 'charge_state': defect_task.output.structure.charge, - 'defect_frac_sc_coords': defect_frac_sc_coords, - 'defect_v_hartree': MontyDecoder().process_decoded(defect_task.cp2k_objects['v_hartree']), # TODO CP2K spec name - 'bulk_v_hartree': MontyDecoder().process_decoded(bulk_task.cp2k_objects['v_hartree']), # TODO CP2K spec name + "defect_energy": defect_task.output.energy, + "bulk_energy": bulk_task.output.energy, + "final_defect_structure": final_defect_structure, + "charge_state": defect_task.output.structure.charge, + "defect_frac_sc_coords": defect_frac_sc_coords, + "defect_v_hartree": MontyDecoder().process_decoded( + defect_task.cp2k_objects["v_hartree"] + ), # TODO CP2K spec name + "bulk_v_hartree": MontyDecoder().process_decoded( + bulk_task.cp2k_objects["v_hartree"] + ), # TODO CP2K spec name } if defect_task.tags and "2d" in defect_task.tags: - parameters['2d'] = True + parameters["2d"] = True return parameters + class DefectiveMaterialDoc(StructureMetadata): """Document containing all / many defect tasks for a single material ID""" property_name: ClassVar[str] = "defective material" - material_id: str = Field(None, description="Unique material ID for the bulk material") #TODO Change to MPID + material_id: str = Field( + None, description="Unique material ID for the bulk material" + ) # TODO Change to MPID - formation_energy_diagrams: Mapping[RunType, FormationEnergyDiagram] = Field(None, description="") + defect_docs: List[DefectDoc] = Field(None, description="Defect Docs") last_updated: datetime = Field( description="Timestamp for when this document was last updated", @@ -359,46 +474,55 @@ class DefectiveMaterialDoc(StructureMetadata): metadata: Dict = Field(None, description="Metadata for this object") @classmethod - def from_docs(cls: Type["S"], defect_docs: DefectDoc, thermo: Dict, dos) -> S: - """ - # Metadata - metadata = {} - last_updated = datetime.now() - created_at = datetime.now() - - bulk_ents = {} - dfct_ents = {} - formation_energy_diagrams = {} - els = set() - for doc in defect_docs: - els = els | set(doc.defect.element_changes.keys()) - for rt, defect_entry in doc.defect_entries.items(): - if rt not in dfct_ents: - dfct_ents[rt] = [] - dfct_ents[rt].append(defect_entry) - bulk_ents[rt] = doc.bulk_entries[rt] + def from_docs(cls: Type["S"], defect_docs: DefectDoc, material_id: str) -> S: + return cls( + defect_docs=defect_docs, + material_id=material_id, + last_updated=max(d.last_updated for d in defect_docs), + created_at=datetime.now(), + ) - atomic_entries = [ComputedEntry(composition=str(el), energy=thermo[el]) for el in els] + @property + def element_set(self) -> set: + els = set(Element(e) for e in self.defect_docs[0].defect.structure.symbol_set) + for d in self.defect_docs: + els = els | set(d.defect.element_changes.keys()) + return els - for rt in dfct_ents: + def get_formation_energy_diagram( + self, + run_type: RunType | str, + atomic_entries: List[ComputedEntry], + phase_diagram: PhaseDiagram, + filters: Dict | None = None, + ) -> FormationEnergyDiagram: - pd = PhaseDiagram(mp_entries) - cbm, vbm = dos.get_cbm_vbm() - - adjusted_entries = _get_adjusted_pd_entries( - phase_diagram=pd, atomic_entries=atomic_entries - ) + filters = filters if filters else {} - formation_energy_diagrams[rt] = FormationEnergyDiagram.with_atomic_entries( - bulk_entry=bulk_ents[rt], defect_entries=dfct_ents[rt], - atomic_entries=atomic_entries, phase_diagram=pd, vbm=vbm, - band_gap=cbm-vbm, - ) - """ + els = set() + defect_entries = [] + bulk_entries = [] + vbms = [] + for doc in self.defect_docs: + els = els | set(doc.defect.element_changes.keys()) + defect_entries.append(doc.defect_entries.get(run_type)) + bulk_entries.append(doc.bulk_entries.get(run_type)) + vbms.append(doc.vbm.get(run_type)) + + # TODO bulks and vbms + # form en diagram takes one bulk entry and one bulk vbm + # These, however, can be different for each defect/bulk task pair + # Need to convert the differences into energy adjustments so that + # form en diagram is consistent with all of them + + return FormationEnergyDiagram.with_atomic_entries( + bulk_entry=bulk_entries[0], + defect_entries=defect_entries, + atomic_entries=atomic_entries, + phase_diagram=phase_diagram, + vbm=vbms[0], + ) - raise NotImplementedError - - def unpack(query, d): if not query: diff --git a/src/atomate2/cp2k/sets/defect.py b/src/atomate2/cp2k/sets/defect.py index 20c0fbbf12..62e68373dc 100644 --- a/src/atomate2/cp2k/sets/defect.py +++ b/src/atomate2/cp2k/sets/defect.py @@ -17,6 +17,8 @@ @dataclass class DefectSetGenerator(Cp2kInputGenerator): """ + Base input set generator for defect calculations. Adds printing of the + partial density of states and the electrostatic potential. """ def get_input_updates(self, structure: Structure, *args, **kwargs) -> dict: From 3b7b41cd9d5e1585ae4a2f11556cde289fd6b18b Mon Sep 17 00:00:00 2001 From: nwinner Date: Wed, 7 Dec 2022 21:43:16 -0800 Subject: [PATCH 20/50] Defects --- src/atomate2/cp2k/builders/defect.py | 25 +++++++++++++++++++++++-- src/atomate2/cp2k/jobs/defect.py | 2 +- src/atomate2/cp2k/schemas/defect.py | 6 +++--- 3 files changed, 27 insertions(+), 6 deletions(-) diff --git a/src/atomate2/cp2k/builders/defect.py b/src/atomate2/cp2k/builders/defect.py index ac19b55bf0..6f27bc0ea8 100644 --- a/src/atomate2/cp2k/builders/defect.py +++ b/src/atomate2/cp2k/builders/defect.py @@ -312,7 +312,10 @@ def get_items(self) -> Iterator[List[Dict]]: # TODO Seems slow not_allowed = { doc[self.tasks.key] - for doc in self.tasks.query(criteria={self.tasks.key: {"$in": list(defect_tasks)}}) + for doc in self.tasks.query( + criteria={self.tasks.key: {"$in": list(defect_tasks)}}, + properties=['output.calcs_reversed'] + ) if TaskType(doc['output']['calcs_reversed'][0]['task_type']) not in self.allowed_dfct_types } if not_allowed: @@ -331,7 +334,10 @@ def get_items(self) -> Iterator[List[Dict]]: # TODO seems slow not_allowed = { doc[self.tasks.key] - for doc in self.tasks.query(criteria={self.tasks.key: {"$in": list(bulk_tasks)}}) + for doc in self.tasks.query( + criteria={self.tasks.key: {"$in": list(bulk_tasks)}}, + properties=['output.calcs_reversed'] + ) if TaskType(doc['output']['calcs_reversed'][0]['task_type']) not in self.allowed_bulk_types } if not_allowed: @@ -670,6 +676,9 @@ def __are_bulk_and_defect_commensurate(self, b, d): """ # TODO add settings sm = StructureMatcher( + ltol = 1e-3, + stol = 0.1, + angle_tol = 1, primitive_cell=False, scale=True, attempt_supercell=False, @@ -678,6 +687,15 @@ def __are_bulk_and_defect_commensurate(self, b, d): ) rtb = b.get('output').get('input').get('xc').split("+U")[0] rtd = d.get('output').get('input').get('xc').split("+U")[0] + baux = { + dat['element']: dat.get('auxiliary_basis') + for dat in b['output']['input']['atomic_kind_info']['atomic_kinds'].values() + } + daux = { + dat['element']: dat.get('auxiliary_basis') + for dat in d['output']['input']['atomic_kind_info']['atomic_kinds'].values() + } + if rtb == rtd: if sm.fit(self.__get_pristine_supercell(d), self.__get_pristine_supercell(b)): cib = Cp2kInput.from_dict(b['output']['calcs_reversed'][0]['input']['cp2k_input']) @@ -685,6 +703,9 @@ def __are_bulk_and_defect_commensurate(self, b, d): bis_ot = cib.check("force_eval/dft/scf/ot") dis_ot = cid.check("force_eval/dft/scf/ot") if (bis_ot and dis_ot) or (not bis_ot and not dis_ot): + for el in baux: + if baux[el].upper() != daux[el].upper(): + return False return True return False diff --git a/src/atomate2/cp2k/jobs/defect.py b/src/atomate2/cp2k/jobs/defect.py index 1b712ed58e..3147532f23 100644 --- a/src/atomate2/cp2k/jobs/defect.py +++ b/src/atomate2/cp2k/jobs/defect.py @@ -50,7 +50,7 @@ def make(self, defect: Defect | Structure, charge: int = 0, prev_cp2k_dir: str | ) if isinstance(defect, Vacancy): - structure.sites[-1].properties['ghost'] = True + structure.add_site_property("ghost", [False]*(len(structure.sites)-1) + [True]) # provenance stuff recursive_update(self.write_additional_data, { diff --git a/src/atomate2/cp2k/schemas/defect.py b/src/atomate2/cp2k/schemas/defect.py index c606342292..0890e71231 100644 --- a/src/atomate2/cp2k/schemas/defect.py +++ b/src/atomate2/cp2k/schemas/defect.py @@ -20,6 +20,7 @@ DefectEntry, DefectSiteFinder, FormationEnergyDiagram, + MultiFormationEnergyDiagram ) from pymatgen.symmetry.analyzer import SpacegroupAnalyzer @@ -495,7 +496,7 @@ def get_formation_energy_diagram( atomic_entries: List[ComputedEntry], phase_diagram: PhaseDiagram, filters: Dict | None = None, - ) -> FormationEnergyDiagram: + ) -> MultiFormationEnergyDiagram: filters = filters if filters else {} @@ -515,7 +516,7 @@ def get_formation_energy_diagram( # Need to convert the differences into energy adjustments so that # form en diagram is consistent with all of them - return FormationEnergyDiagram.with_atomic_entries( + return MultiFormationEnergyDiagram.with_atomic_entries( bulk_entry=bulk_entries[0], defect_entries=defect_entries, atomic_entries=atomic_entries, @@ -523,7 +524,6 @@ def get_formation_energy_diagram( vbm=vbms[0], ) - def unpack(query, d): if not query: return d From 9c8b97776c4fdf6594b836d47a4efb49ae28a524 Mon Sep 17 00:00:00 2001 From: nwinner Date: Tue, 13 Dec 2022 16:09:35 -0800 Subject: [PATCH 21/50] Round sc_matrix --- src/atomate2/cp2k/builders/defect.py | 2 +- src/atomate2/cp2k/schemas/defect.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/src/atomate2/cp2k/builders/defect.py b/src/atomate2/cp2k/builders/defect.py index 6f27bc0ea8..d5dcee920d 100644 --- a/src/atomate2/cp2k/builders/defect.py +++ b/src/atomate2/cp2k/builders/defect.py @@ -766,7 +766,7 @@ def __get_pristine_supercell(self, task): defect = MontyDecoder().process_decoded(d) s = defect.structure.copy() sc_mat = out_structure.lattice.matrix.dot(np.linalg.inv(s.lattice.matrix)) - s.make_supercell(sc_mat) + s.make_supercell(sc_mat.round()) return s else: return out_structure diff --git a/src/atomate2/cp2k/schemas/defect.py b/src/atomate2/cp2k/schemas/defect.py index 0890e71231..0518fb0e2b 100644 --- a/src/atomate2/cp2k/schemas/defect.py +++ b/src/atomate2/cp2k/schemas/defect.py @@ -524,6 +524,7 @@ def get_formation_energy_diagram( vbm=vbms[0], ) + def unpack(query, d): if not query: return d From 1009f18d41224c7a0be841c81a97b0b693a8679c Mon Sep 17 00:00:00 2001 From: nwinner Date: Tue, 13 Dec 2022 16:28:14 -0800 Subject: [PATCH 22/50] Module import --- src/atomate2/cp2k/schemas/defect.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/atomate2/cp2k/schemas/defect.py b/src/atomate2/cp2k/schemas/defect.py index 0518fb0e2b..b9dca76b69 100644 --- a/src/atomate2/cp2k/schemas/defect.py +++ b/src/atomate2/cp2k/schemas/defect.py @@ -12,7 +12,7 @@ from pymatgen.entries.computed_entries import ComputedEntry, ComputedStructureEntry from pymatgen.analysis.phase_diagram import PhaseDiagram from pymatgen.analysis.defects.core import Defect, DefectType -from pymatgen.analysis.defects.corrections import ( +from pymatgen.analysis.defects.corrections.freysoldt import ( get_freysoldt_correction, get_freysoldt2d_correction, ) From 4566e9d9426073872f461a6c2cfd137030d525e9 Mon Sep 17 00:00:00 2001 From: nwinner Date: Fri, 16 Dec 2022 11:24:52 -0800 Subject: [PATCH 23/50] defect --- src/atomate2/cp2k/builders/defect.py | 68 ++++++----- src/atomate2/cp2k/schemas/defect.py | 170 ++++++++------------------- 2 files changed, 86 insertions(+), 152 deletions(-) diff --git a/src/atomate2/cp2k/builders/defect.py b/src/atomate2/cp2k/builders/defect.py index d5dcee920d..939bba1661 100644 --- a/src/atomate2/cp2k/builders/defect.py +++ b/src/atomate2/cp2k/builders/defect.py @@ -404,8 +404,9 @@ def get_items(self) -> Iterator[List[Dict]]: continue doc = self.__get_defect_doc(defect) item_bundle = self.__get_item_bundle(task_ids) - material_id = self.mpid_map[item_bundle[0][1][self.tasks.key]] - yield doc, item_bundle, material_id + m = next(iter(task_ids.values()))[1] + material_id = self.mpid_map[m] + yield doc, item_bundle, material_id, task_ids def process_item(self, items): """ @@ -418,20 +419,17 @@ def process_item(self, items): returns: the defect document as a dictionary """ - defect_doc, item_bundle, material_id = items + defect_doc, item_bundle, material_id, task_ids = items self.logger.info(f"Processing group of {len(item_bundle)} defects into DefectDoc") if item_bundle: - defect_tasks, bulk_tasks, dielectrics = list(zip(*item_bundle)) - if defect_doc: - defect_doc.update_all( - defect_tasks=defect_tasks, bulk_tasks=bulk_tasks, - dielectrics=dielectrics, query=self.defect_query - ) - else: - defect_doc = DefectDoc.from_tasks( - defect_tasks=defect_tasks, bulk_tasks=bulk_tasks, dielectrics=dielectrics, - query=self.defect_query, key=self.tasks.key, material_id=material_id - ) + for _, (defect_task, bulk_task, dielectric) in item_bundle.items(): + if not defect_doc: + defect_doc = DefectDoc.from_tasks( + defect_task=defect_task, bulk_task=bulk_task, dielectric=dielectric, + query=self.defect_query, key=self.tasks.key, material_id=material_id + ) + else: + defect_doc.update_one(defect_task, bulk_task, dielectric, query=self.defect_query, key=self.tasks.key) # TODO Atomate2Store wrapper return jsanitize(defect_doc.dict(), allow_bson=True, enum_values=True, strict=True) return {} @@ -581,16 +579,15 @@ def __get_item_bundle(self, task_ids): bulk_tasks: possible bulk tasks to match to defects defect_task_group: group of equivalent defects (defined by PointDefectComparator) - returns: [(defect task dict, bulk_task_dict, dielectric dict), ...] + returns: dict {run type: (defect task dict, bulk_task_dict, dielectric dict)} """ - return [ - ( - self.tasks.query_one(criteria={self.tasks.key: defect_tasks_id}, load=True), - self.tasks.query_one(criteria={self.tasks.key: bulk_tasks_id}, load=True), # load all for now - self.__get_dielectric(self._mpid_map[bulk_tasks_id]), - ) - for defect_tasks_id, bulk_tasks_id in task_ids - ] + return { + rt: ( + self.tasks.query_one(criteria={self.tasks.key: pairs[0]}, load=True), + self.tasks.query_one(criteria={self.tasks.key: pairs[1]}, load=True), + self.__get_dielectric(self._mpid_map[pairs[1]]) + ) for rt, pairs in task_ids.items() + } def _get_mpid(self, structure): """ @@ -615,17 +612,14 @@ def _get_mpid(self, structure): return m['material_id'] return None - def __match_defects_to_bulks(self, bulk_ids, defect_ids): + def __match_defects_to_bulks(self, bulk_ids, defect_ids) -> list[tuple]: """ Given task_ids of bulk and defect tasks, match the defects to a bulk task that has commensurate: - - Composition - Number of sites - Symmetry - """ - self.logger.debug(f"Finding bulk/defect task combinations.") self.logger.debug(f"Bulk tasks: {bulk_ids}") self.logger.debug(f"Defect tasks: {defect_ids}") @@ -637,6 +631,7 @@ def __match_defects_to_bulks(self, bulk_ids, defect_ids): 'output.input', 'output.nsites', 'output.output.structure', + 'output.output.energy', 'output.calcs_reversed' ] defects = list(self.tasks.query(criteria={self.tasks.key: {'$in': list(defect_ids)}}, properties=props)) @@ -653,14 +648,27 @@ def __match_defects_to_bulks(self, bulk_ids, defect_ids): ) pairs = [ - (defect[self.tasks.key], bulk[self.tasks.key]) + (defect, bulk) for bulk in bulks for defect in defects if self.__are_bulk_and_defect_commensurate(bulk, defect) ] - self.logger.debug(f"Found {len(pairs)} commensurate bulk/defect pairs") - return pairs + + def key(x): + return -x[0]['output']['nsites'], x[0]['output']['output']['energy'] + def _run_type(x): + return x[0]['output']['calcs_reversed'][0]['run_type'] + + rt_pairs = {} + for rt, group in groupby(pairs, key=_run_type): + rt_pairs[rt] = [ + (defect[self.tasks.key], bulk[self.tasks.key]) + for defect, bulk in sorted(list(group), key=key) + ] + + # Return only the first (best) pair for each rt + return {rt: lst[0] for rt, lst in rt_pairs.items()} # TODO Checking for same dft settings (e.g. OT/diag) is a little cumbersome. # Maybe, in future, task doc can be defined to have OT/diag as part of input summary diff --git a/src/atomate2/cp2k/schemas/defect.py b/src/atomate2/cp2k/schemas/defect.py index b9dca76b69..5202822c80 100644 --- a/src/atomate2/cp2k/schemas/defect.py +++ b/src/atomate2/cp2k/schemas/defect.py @@ -5,6 +5,8 @@ from pydantic import validator from itertools import groupby +import numpy as np + from monty.json import MontyDecoder from monty.tempfile import ScratchDir @@ -80,14 +82,6 @@ class DefectDoc(StructureMetadata): description="Run types for all the calculations that make up this material", ) - best_tasks: Mapping[RunType, Tuple[str, str]] = Field( - None, description="Task ids (defect task, bulk task) for all tasks of a RunType" - ) - - all_tasks: Mapping[RunType, List[Tuple[str, str]]] = Field( - None, description="Task ids (defect task, bulk task) for all tasks of a RunType" - ) - defect_entries: Mapping[RunType, DefectEntry] = Field( None, description="Dictionary for tracking entries for CP2K calculations" ) @@ -111,9 +105,10 @@ class DefectDoc(StructureMetadata): default_factory=datetime.utcnow, ) - metadata: Dict = Field(description="Metadata for this defect") + metadata: Dict = Field(None, description="Metadata for this defect") - def update(self, defect_task, bulk_task, dielectric, query="defect", key="task_id"): + # TODO The sorting here should also maybe be done by builder + def update_one(self, defect_task, bulk_task, dielectric, query="defect", key="task_id"): # Metadata self.last_updated = datetime.now() @@ -122,19 +117,20 @@ def update(self, defect_task, bulk_task, dielectric, query="defect", key="task_i defect = self.get_defect_from_task(query=query, task=defect_task) d_id = defect_task[key] b_id = bulk_task[key] - defect_task = TaskDocument(**defect_task) - bulk_task = TaskDocument(**bulk_task) + defect_task = TaskDocument(**defect_task['output']) + bulk_task = TaskDocument(**bulk_task['output']) # TODO Atomate2Store defect_entry = self.get_defect_entry_from_tasks( defect_task, bulk_task, defect, dielectric ) bulk_entry = self.get_bulk_entry_from_task(bulk_task) rt = defect_task.calcs_reversed[0].run_type - current_largest_sc = self.defect_entries[rt].sc_entry.composition.num_atoms + tt = defect_task.calcs_reversed[0].task_type + ct = defect_task.calcs_reversed[0].calc_type + current_largest_sc = self.defect_entries[rt].sc_entry.composition.num_atoms if rt in self.defect_entries else 0 potential_largest_sc = defect_entry.sc_entry.composition.num_atoms if ( - rt not in self.defect_entries - or potential_largest_sc > current_largest_sc + potential_largest_sc > current_largest_sc or ( potential_largest_sc == current_largest_sc and defect_entry.sc_entry.energy @@ -143,18 +139,19 @@ def update(self, defect_task, bulk_task, dielectric, query="defect", key="task_i ): self.defect_entries[rt] = defect_entry self.bulk_entries[rt] = bulk_entry - self.best_tasks[rt] = (d_id, b_id) + self.run_types[rt] = d_id + self.task_types[tt] = d_id + self.calc_types[ct] = d_id - self.all_tasks[rt].append((d_id, b_id)) - self.metadata["convergence"].append((current_largest_sc, defect_entry.corrected_energy - bulk_entry.energy)) + self.task_ids = list(set(self.task_ids) | set(d_id)) - def update_all( + def update_many( self, defect_tasks: List, bulk_tasks: List, dielectrics: List, query="defect" ): for defect_task, bulk_task, dielectric in zip( defect_tasks, bulk_tasks, dielectrics ): - self.update( + self.update_one( defect_task=defect_task, bulk_task=bulk_task, dielectric=dielectric, @@ -162,15 +159,7 @@ def update_all( ) @classmethod - def from_tasks( - cls: Type[T], - defect_tasks: List, - bulk_tasks: List, - dielectrics: List, - query="defect", - key="task_id", - material_id=None, - ) -> T: + def from_tasks(cls: Type[T], defect_task, bulk_task, dielectric, query="defect", key="task_id", material_id=None) -> T: """ The standard way to create this document. Args: @@ -178,92 +167,30 @@ def from_tasks( series of DefectEntry objects. query: How to retrieve the defect object stored in the task. """ - defect_task_ids = [defect_task[key] for defect_task in defect_tasks] - bulk_task_ids = [bulk_task[key] for bulk_task in bulk_tasks] - bulk_tasks = [TaskDocument(**bulk_task["output"]) for bulk_task in bulk_tasks] - defects = [ - cls.get_defect_from_task(query=query, task=defect_task) - for defect_task in defect_tasks - ] - defect_tasks = [ - TaskDocument(**defect_task["output"]) for defect_task in defect_tasks - ] + defect_task_id = defect_task[key] + defect = cls.get_defect_from_task(query=query, task=defect_task) + defect_task = TaskDocument(**defect_task["output"]) + bulk_task = TaskDocument(**bulk_task['output']) # Metadata - last_updated = datetime.now() or max(task.last_updated for task in defect_tasks) - created_at = datetime.now() or min(task.completed_at for task in defect_tasks) + last_updated = datetime.now() + created_at = datetime.now() - run_types = { - id: task.calcs_reversed[0].run_type - for id, task in zip(defect_task_ids, defect_tasks) - } - task_types = { - id: task.calcs_reversed[0].task_type - for id, task in zip(defect_task_ids, defect_tasks) - } - calc_types = { - id: task.calcs_reversed[0].calc_type - for id, task in zip(defect_task_ids, defect_tasks) - } - - def _run_type(x): - return x[0].calcs_reversed[0].run_type.value - - def _sort(x): - # TODO return kpoint density, currently just does supercell size - return -x[0].nsites, x[0].output.energy + rt = defect_task.calcs_reversed[0].run_type + run_types = {defect_task_id: defect_task.calcs_reversed[0].run_type} + task_types = {defect_task_id: defect_task.calcs_reversed[0].task_type} + calc_types = {defect_task_id: defect_task.calcs_reversed[0].calc_type} - defect_entries = {} - bulk_entries = {} - all_tasks = {} - best_tasks = {} - vbm = {} metadata = {} - for key, tasks_for_runtype in groupby( - sorted( - zip( - defect_tasks, - bulk_tasks, - defects, - dielectrics, - defect_task_ids, - bulk_task_ids, - ), - key=_run_type, - ), - key=_run_type, - ): - sorted_tasks = sorted(tasks_for_runtype, key=_sort) - ents = [ - ( - cls.get_defect_entry_from_tasks( - defect_task, bulk_task, defect, dielectric - ), - cls.get_bulk_entry_from_task(bulk_task), - ) - for defect_task, bulk_task, defect, dielectric, did, bid in sorted_tasks - ] - rt = run_types[sorted_tasks[0][-2]] - vbm[rt] = sorted_tasks[0][1].output.vbm - best_tasks[rt] = (sorted_tasks[0][-2], sorted_tasks[0][-1]) - all_tasks[rt] = [(s[-2], s[-1]) for s in sorted_tasks] - defect_entries[rt], bulk_entries[rt] = ents[0] - metadata[key] = { - "convergence": [ - ( - sorted_tasks[i][0].nsites, - defect_entries[rt].corrected_energy - bulk_entries[rt].energy, - ) - for i in range(len(ents)) - ] - } - - v = next(iter(defect_entries.values())) + defect_entries = {rt: cls.get_defect_entry_from_tasks(defect_task, bulk_task, defect, dielectric)} + bulk_entries = {rt: cls.get_bulk_entry_from_task(bulk_task)} + vbm = {rt: bulk_task.output.vbm} + metadata["defect_origin"] = ( "intrinsic" if all( - el in v.defect.structure.composition - for el in v.defect.element_changes.keys() + el in defect_entries[rt].defect.structure.composition + for el in defect_entries[rt].defect.element_changes.keys() ) else "extrinsic" ) @@ -276,17 +203,15 @@ def _sort(x): "calc_types": calc_types, "last_updated": last_updated, "created_at": created_at, - "task_ids": defect_task_ids, - "all_tasks": all_tasks, - "best_tasks": best_tasks, - "material_id": material_id if material_id else v.parameters["material_id"], - "defect": v.defect, - "charge": v.charge_state, - "name": v.defect.name, + "task_ids": list(defect_task_id), + "material_id": material_id, + "defect": defect_entries[rt].defect, + "charge": defect_entries[rt].charge_state, + "name": defect_entries[rt].defect.name, "vbm": vbm, "metadata": metadata, } - prim = SpacegroupAnalyzer(v.defect.structure).get_primitive_standard_structure() + prim = SpacegroupAnalyzer(defect_entries[rt].defect.structure).get_primitive_standard_structure() data.update(StructureMetadata.from_structure(prim).dict()) return cls(**data) @@ -343,21 +268,22 @@ def get_correction_from_parameters(cls, parameters) -> Tuple[Dict, Dict]: corrections = {} metadata = {} for correction in ["get_freysoldt_correction", "get_freysoldt2d_correction"]: - c, m = getattr(cls, correction)(parameters) - corrections.update(c) - metadata.update(m) + corr, met = getattr(cls, correction)(parameters) + corrections.update(corr) + metadata.update(met) return corrections, metadata @classmethod def get_freysoldt_correction(cls, parameters) -> Tuple[Dict, Dict]: if parameters["charge_state"] and not parameters.get("2d"): - return get_freysoldt_correction( + es, pot, met = get_freysoldt_correction( q=parameters["charge_state"], - dielectric=parameters["dielectric"], + dielectric=np.array(parameters["dielectric"]), # TODO pmg-analysis expects np array here defect_locpot=parameters["defect_v_hartree"], bulk_locpot=parameters["bulk_v_hartree"], defect_frac_coords=parameters["defect_frac_sc_coords"], ) + return {"electrostatic": es, "potential_alignment": pot}, met return {}, {} @classmethod @@ -384,7 +310,7 @@ def get_freysoldt2d_correction(cls, parameters): lref.write_file("LOCPOT.ref") ldef.write_file("LOCPOT.def") - return get_freysoldt2d_correction( + es, pot, met = get_freysoldt2d_correction( q=parameters["charge_state"], dielectric=dielectric, defect_locpot=ldef, @@ -393,6 +319,7 @@ def get_freysoldt2d_correction(cls, parameters): energy_cutoff=520, slab_buffer=2, ) + return {"electrostatic": es, "potential_alignment": pot}, met return {}, {} @classmethod @@ -413,7 +340,6 @@ def get_parameters_from_tasks( defect_task: task dict for the defect calculation bulk_task: task dict for the bulk calculation """ - final_defect_structure = defect_task.structure final_bulk_structure = bulk_task.structure From 8a2eb09de632518965569b6946098a942d222041 Mon Sep 17 00:00:00 2001 From: nwinner Date: Fri, 16 Dec 2022 12:07:04 -0800 Subject: [PATCH 24/50] task_ids --- src/atomate2/cp2k/schemas/defect.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/atomate2/cp2k/schemas/defect.py b/src/atomate2/cp2k/schemas/defect.py index 5202822c80..ba959f9922 100644 --- a/src/atomate2/cp2k/schemas/defect.py +++ b/src/atomate2/cp2k/schemas/defect.py @@ -143,7 +143,7 @@ def update_one(self, defect_task, bulk_task, dielectric, query="defect", key="ta self.task_types[tt] = d_id self.calc_types[ct] = d_id - self.task_ids = list(set(self.task_ids) | set(d_id)) + self.task_ids = list(set(self.task_ids) | {d_id}) def update_many( self, defect_tasks: List, bulk_tasks: List, dielectrics: List, query="defect" From c92f11da73e91b988e373a917614f9f8f5f0cdff Mon Sep 17 00:00:00 2001 From: nwinner Date: Fri, 16 Dec 2022 12:11:15 -0800 Subject: [PATCH 25/50] task_ids --- src/atomate2/cp2k/schemas/defect.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/atomate2/cp2k/schemas/defect.py b/src/atomate2/cp2k/schemas/defect.py index ba959f9922..097a5ea804 100644 --- a/src/atomate2/cp2k/schemas/defect.py +++ b/src/atomate2/cp2k/schemas/defect.py @@ -203,7 +203,7 @@ def from_tasks(cls: Type[T], defect_task, bulk_task, dielectric, query="defect", "calc_types": calc_types, "last_updated": last_updated, "created_at": created_at, - "task_ids": list(defect_task_id), + "task_ids": [defect_task_id], "material_id": material_id, "defect": defect_entries[rt].defect, "charge": defect_entries[rt].charge_state, From e1d5873725bae8b6b071abb7eb8db2d4bd197d03 Mon Sep 17 00:00:00 2001 From: nwinner Date: Mon, 19 Dec 2022 10:33:23 -0800 Subject: [PATCH 26/50] Fix --- src/atomate2/cp2k/builders/defect.py | 4 ++-- src/atomate2/cp2k/schemas/defect.py | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/atomate2/cp2k/builders/defect.py b/src/atomate2/cp2k/builders/defect.py index 939bba1661..305c9eb5f6 100644 --- a/src/atomate2/cp2k/builders/defect.py +++ b/src/atomate2/cp2k/builders/defect.py @@ -696,11 +696,11 @@ def __are_bulk_and_defect_commensurate(self, b, d): rtb = b.get('output').get('input').get('xc').split("+U")[0] rtd = d.get('output').get('input').get('xc').split("+U")[0] baux = { - dat['element']: dat.get('auxiliary_basis') + dat['element']: dat.get('auxiliary_basis', "") for dat in b['output']['input']['atomic_kind_info']['atomic_kinds'].values() } daux = { - dat['element']: dat.get('auxiliary_basis') + dat['element']: dat.get('auxiliary_basis', "") for dat in d['output']['input']['atomic_kind_info']['atomic_kinds'].values() } diff --git a/src/atomate2/cp2k/schemas/defect.py b/src/atomate2/cp2k/schemas/defect.py index 097a5ea804..6257c71942 100644 --- a/src/atomate2/cp2k/schemas/defect.py +++ b/src/atomate2/cp2k/schemas/defect.py @@ -139,9 +139,9 @@ def update_one(self, defect_task, bulk_task, dielectric, query="defect", key="ta ): self.defect_entries[rt] = defect_entry self.bulk_entries[rt] = bulk_entry - self.run_types[rt] = d_id - self.task_types[tt] = d_id - self.calc_types[ct] = d_id + self.run_types[d_id] = rt + self.task_types[d_id] = tt + self.calc_types[d_id] = ct self.task_ids = list(set(self.task_ids) | {d_id}) From 302cff84efe5fc94815fbd174a357f19ac1bab9c Mon Sep 17 00:00:00 2001 From: nwinner Date: Mon, 19 Dec 2022 11:16:17 -0800 Subject: [PATCH 27/50] defects --- src/atomate2/cp2k/builders/defect.py | 8 +++++--- src/atomate2/cp2k/schemas/defect.py | 10 ++++++---- 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/src/atomate2/cp2k/builders/defect.py b/src/atomate2/cp2k/builders/defect.py index 305c9eb5f6..029ab4ded0 100644 --- a/src/atomate2/cp2k/builders/defect.py +++ b/src/atomate2/cp2k/builders/defect.py @@ -696,13 +696,15 @@ def __are_bulk_and_defect_commensurate(self, b, d): rtb = b.get('output').get('input').get('xc').split("+U")[0] rtd = d.get('output').get('input').get('xc').split("+U")[0] baux = { - dat['element']: dat.get('auxiliary_basis', "") + dat['element']: dat.get('auxiliary_basis') for dat in b['output']['input']['atomic_kind_info']['atomic_kinds'].values() } daux = { - dat['element']: dat.get('auxiliary_basis', "") + dat['element']: dat.get('auxiliary_basis') for dat in d['output']['input']['atomic_kind_info']['atomic_kinds'].values() } + baux = baux.upper() if baux else baux + daux = daux.upper() if daux else daux if rtb == rtd: if sm.fit(self.__get_pristine_supercell(d), self.__get_pristine_supercell(b)): @@ -712,7 +714,7 @@ def __are_bulk_and_defect_commensurate(self, b, d): dis_ot = cid.check("force_eval/dft/scf/ot") if (bis_ot and dis_ot) or (not bis_ot and not dis_ot): for el in baux: - if baux[el].upper() != daux[el].upper(): + if baux[el] != daux[el]: return False return True return False diff --git a/src/atomate2/cp2k/schemas/defect.py b/src/atomate2/cp2k/schemas/defect.py index 6257c71942..cd9392fcac 100644 --- a/src/atomate2/cp2k/schemas/defect.py +++ b/src/atomate2/cp2k/schemas/defect.py @@ -142,6 +142,7 @@ def update_one(self, defect_task, bulk_task, dielectric, query="defect", key="ta self.run_types[d_id] = rt self.task_types[d_id] = tt self.calc_types[d_id] = ct + self.vbm[rt] = bulk_task.output.vbm self.task_ids = list(set(self.task_ids) | {d_id}) @@ -431,10 +432,11 @@ def get_formation_energy_diagram( bulk_entries = [] vbms = [] for doc in self.defect_docs: - els = els | set(doc.defect.element_changes.keys()) - defect_entries.append(doc.defect_entries.get(run_type)) - bulk_entries.append(doc.bulk_entries.get(run_type)) - vbms.append(doc.vbm.get(run_type)) + if doc.defect_entries.get(run_type): + els = els | set(doc.defect.element_changes.keys()) + defect_entries.append(doc.defect_entries.get(run_type)) + bulk_entries.append(doc.bulk_entries.get(run_type)) + vbms.append(doc.vbm.get(run_type)) # TODO bulks and vbms # form en diagram takes one bulk entry and one bulk vbm From 0abd29de3e5db8b5adde0589dcc73b2009f34998 Mon Sep 17 00:00:00 2001 From: nwinner Date: Mon, 19 Dec 2022 11:17:40 -0800 Subject: [PATCH 28/50] No upper --- src/atomate2/cp2k/builders/defect.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/atomate2/cp2k/builders/defect.py b/src/atomate2/cp2k/builders/defect.py index 029ab4ded0..ef24a33948 100644 --- a/src/atomate2/cp2k/builders/defect.py +++ b/src/atomate2/cp2k/builders/defect.py @@ -703,8 +703,6 @@ def __are_bulk_and_defect_commensurate(self, b, d): dat['element']: dat.get('auxiliary_basis') for dat in d['output']['input']['atomic_kind_info']['atomic_kinds'].values() } - baux = baux.upper() if baux else baux - daux = daux.upper() if daux else daux if rtb == rtd: if sm.fit(self.__get_pristine_supercell(d), self.__get_pristine_supercell(b)): From e4e456a357545d06c9679028a8094f63d57c4dfe Mon Sep 17 00:00:00 2001 From: nwinner Date: Tue, 20 Dec 2022 12:21:40 -0800 Subject: [PATCH 29/50] builder test --- src/atomate2/cp2k/builders/defect.py | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/src/atomate2/cp2k/builders/defect.py b/src/atomate2/cp2k/builders/defect.py index ef24a33948..51be413765 100644 --- a/src/atomate2/cp2k/builders/defect.py +++ b/src/atomate2/cp2k/builders/defect.py @@ -486,6 +486,7 @@ def __filter_and_group_tasks(self, tasks): # TODO remove oxidation state because spins/oxidation cause errors in comparison. # but they shouldnt if those props are close in value d['structure'].remove_oxidation_states() + d['defect'].user_charges = [d['structure'].charge] def key(x): s = x['defect'].structure @@ -505,10 +506,6 @@ def are_equal(x, y): if x['structure'].charge != y['structure'].charge: return False - # Are the defect objects eq. - if x['defect'] == y['defect']: - return True - # Are the final structures equal # element-changes needed for ghost vacancies, since sm.fit can't distinguish them if x['defect'].element_changes == y['defect'].element_changes and \ @@ -555,10 +552,24 @@ def __get_defect_doc(self, defect): for doc in self.defects.query(criteria={'material_id': material_id}, properties=None) ] for doc in docs: - if defect == doc.defect: + if self.__defect_match(defect, doc.defect): return doc return None + def __defect_match(self, x, y): + + sm = StructureMatcher() + + # Defects with diff charges return true for the native __eq__ + if x.user_charges[0] != y.user_charges[0]: + return False + + if x.element_changes == y.element_changes and \ + sm.fit(x.defect_structure, y.defect_structure): + return True + + return False + # TODO should move to returning dielectric doc or continue returning the total diel tensor? def __get_dielectric(self, key): """ From 66ef79336b86daeb341c98cb2bdfa326ffaf22c2 Mon Sep 17 00:00:00 2001 From: nwinner Date: Tue, 20 Dec 2022 13:54:21 -0800 Subject: [PATCH 30/50] 2d debug --- src/atomate2/cp2k/builders/defect.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/atomate2/cp2k/builders/defect.py b/src/atomate2/cp2k/builders/defect.py index 51be413765..94a1ea37f8 100644 --- a/src/atomate2/cp2k/builders/defect.py +++ b/src/atomate2/cp2k/builders/defect.py @@ -403,6 +403,8 @@ def get_items(self) -> Iterator[List[Dict]]: if not task_ids: continue doc = self.__get_defect_doc(defect) + if doc: + self.logger.info(f"DOC IS {doc.defect.__repr__()}") item_bundle = self.__get_item_bundle(task_ids) m = next(iter(task_ids.values()))[1] material_id = self.mpid_map[m] @@ -557,13 +559,12 @@ def __get_defect_doc(self, defect): return None def __defect_match(self, x, y): - + """Match two defects, including there charges""" sm = StructureMatcher() - - # Defects with diff charges return true for the native __eq__ if x.user_charges[0] != y.user_charges[0]: return False + # Elem. changes needed to distinguish ghost vacancies if x.element_changes == y.element_changes and \ sm.fit(x.defect_structure, y.defect_structure): return True From abf89aafbbdae265bc023753377502775f1879c0 Mon Sep 17 00:00:00 2001 From: nwinner Date: Wed, 21 Dec 2022 16:02:58 -0800 Subject: [PATCH 31/50] task ids --- src/atomate2/cp2k/builders/defect.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/atomate2/cp2k/builders/defect.py b/src/atomate2/cp2k/builders/defect.py index 94a1ea37f8..d2a135763b 100644 --- a/src/atomate2/cp2k/builders/defect.py +++ b/src/atomate2/cp2k/builders/defect.py @@ -408,7 +408,7 @@ def get_items(self) -> Iterator[List[Dict]]: item_bundle = self.__get_item_bundle(task_ids) m = next(iter(task_ids.values()))[1] material_id = self.mpid_map[m] - yield doc, item_bundle, material_id, task_ids + yield doc, item_bundle, material_id, defect_task_group def process_item(self, items): """ @@ -432,6 +432,7 @@ def process_item(self, items): ) else: defect_doc.update_one(defect_task, bulk_task, dielectric, query=self.defect_query, key=self.tasks.key) # TODO Atomate2Store wrapper + defect_doc.task_ids = list(set(task_ids + defect_doc.task_ids)) # TODO should I store the bulk id too? return jsanitize(defect_doc.dict(), allow_bson=True, enum_values=True, strict=True) return {} From a852fcbee12221c74e7df245b07eb30374a7c6ad Mon Sep 17 00:00:00 2001 From: nwinner Date: Fri, 23 Dec 2022 10:15:44 -0800 Subject: [PATCH 32/50] First pass at defect validation schema --- src/atomate2/cp2k/schemas/defect.py | 65 ++++++++++++++++++----------- 1 file changed, 40 insertions(+), 25 deletions(-) diff --git a/src/atomate2/cp2k/schemas/defect.py b/src/atomate2/cp2k/schemas/defect.py index cd9392fcac..26644009eb 100644 --- a/src/atomate2/cp2k/schemas/defect.py +++ b/src/atomate2/cp2k/schemas/defect.py @@ -11,9 +11,11 @@ from monty.tempfile import ScratchDir from pymatgen.core import Structure, Element +from pymatgen.symmetry.analyzer import SpacegroupAnalyzer from pymatgen.entries.computed_entries import ComputedEntry, ComputedStructureEntry +from pymatgen.io.cp2k.utils import get_truncated_coulomb_cutoff from pymatgen.analysis.phase_diagram import PhaseDiagram -from pymatgen.analysis.defects.core import Defect, DefectType +from pymatgen.analysis.defects.core import Defect from pymatgen.analysis.defects.corrections.freysoldt import ( get_freysoldt_correction, get_freysoldt2d_correction, @@ -21,16 +23,12 @@ from pymatgen.analysis.defects.thermo import ( DefectEntry, DefectSiteFinder, - FormationEnergyDiagram, MultiFormationEnergyDiagram ) -from pymatgen.symmetry.analyzer import SpacegroupAnalyzer - -from emmet.core.utils import ValueEnum +from pymatgen.analysis.defects.finder import DefectSiteFinder from atomate2 import SETTINGS from atomate2.common.schemas.structure import StructureMetadata -from atomate2.cp2k.schemas.calc_types.utils import run_type, task_type, calc_type from atomate2.cp2k.schemas.calc_types.enums import CalcType, TaskType, RunType from atomate2.cp2k.schemas.task import TaskDocument @@ -38,7 +36,7 @@ T = TypeVar("T", bound="DefectDoc") S = TypeVar("S", bound="DefectiveMaterialDoc") - +V = TypeVar("V", bound="DefectValidation") class DefectDoc(StructureMetadata): """ @@ -49,26 +47,19 @@ class DefectDoc(StructureMetadata): """ property_name: ClassVar[str] = "defect" - defect: Defect = Field( None, description="Pymatgen defect object for this defect doc" ) - charge: int = Field(None, description="Charge state for this defect") - name: str = Field( None, description="Name of this defect as generated by the defect object" ) - material_id: str = Field( None, description="Unique material ID for the bulk material" ) # TODO Change to MPID - - # TODO Should it be all (defect + bulk) ids? task_ids: List[str] = Field( None, description="All defect task ids used in creating this defect doc." ) - calc_types: Mapping[str, CalcType] = Field( # type: ignore None, description="Calculation types for all the calculations that make up this material", @@ -81,31 +72,26 @@ class DefectDoc(StructureMetadata): None, description="Run types for all the calculations that make up this material", ) - defect_entries: Mapping[RunType, DefectEntry] = Field( None, description="Dictionary for tracking entries for CP2K calculations" ) - bulk_entries: Mapping[RunType, ComputedStructureEntry] = Field( None, description="Computed structure entry for the bulk calc." ) - vbm: Mapping[RunType, float] = Field( None, description="VBM for bulk task of each run type. Used for aligning potential", ) - last_updated: datetime = Field( description="Timestamp for when this document was last updated", default_factory=datetime.utcnow, ) - created_at: datetime = Field( description="Timestamp for when this material document was first created", default_factory=datetime.utcnow, ) - metadata: Dict = Field(None, description="Metadata for this defect") + valid: Mapping[RunType, Dict] = Field(None, description="Whether each run type has a valid entry") # TODO The sorting here should also maybe be done by builder def update_one(self, defect_task, bulk_task, dielectric, query="defect", key="task_id"): @@ -119,7 +105,7 @@ def update_one(self, defect_task, bulk_task, dielectric, query="defect", key="ta b_id = bulk_task[key] defect_task = TaskDocument(**defect_task['output']) bulk_task = TaskDocument(**bulk_task['output']) # TODO Atomate2Store - defect_entry = self.get_defect_entry_from_tasks( + defect_entry, valid = self.get_defect_entry_from_tasks( defect_task, bulk_task, defect, dielectric ) bulk_entry = self.get_bulk_entry_from_task(bulk_task) @@ -143,6 +129,7 @@ def update_one(self, defect_task, bulk_task, dielectric, query="defect", key="ta self.task_types[d_id] = tt self.calc_types[d_id] = ct self.vbm[rt] = bulk_task.output.vbm + self.valid[rt] = valid self.task_ids = list(set(self.task_ids) | {d_id}) @@ -183,7 +170,9 @@ def from_tasks(cls: Type[T], defect_task, bulk_task, dielectric, query="defect", calc_types = {defect_task_id: defect_task.calcs_reversed[0].calc_type} metadata = {} - defect_entries = {rt: cls.get_defect_entry_from_tasks(defect_task, bulk_task, defect, dielectric)} + defect_entry, valid = cls.get_defect_entry_from_tasks(defect_task, bulk_task, defect, dielectric) + valid = {rt: valid} + defect_entries = {rt: defect_entry} bulk_entries = {rt: cls.get_bulk_entry_from_task(bulk_task)} vbm = {rt: bulk_task.output.vbm} @@ -211,6 +200,7 @@ def from_tasks(cls: Type[T], defect_task, bulk_task, dielectric, query="defect", "name": defect_entries[rt].defect.name, "vbm": vbm, "metadata": metadata, + "valid": valid, } prim = SpacegroupAnalyzer(defect_entries[rt].defect.structure).get_primitive_standard_structure() data.update(StructureMetadata.from_structure(prim).dict()) @@ -254,8 +244,8 @@ def get_defect_entry_from_tasks( sc_defect_frac_coords=parameters["defect_frac_sc_coords"], corrections=corrections, ) - - return defect_entry + valid = DefectValidation().process_entry(parameters) + return defect_entry, valid @classmethod def get_bulk_entry_from_task(cls, bulk_task: TaskDocument): @@ -292,7 +282,7 @@ def get_freysoldt2d_correction(cls, parameters): from pymatgen.io.vasp.outputs import VolumetricData as VaspVolumetricData - if parameters["charge_state"] and parameters.get("2d"): + if False: #parameters["charge_state"] and parameters.get("2d"): eps_parallel = ( parameters["dielectric"][0][0] + parameters["dielectric"][1][1] ) / 2 @@ -361,6 +351,7 @@ def get_parameters_from_tasks( parameters = { "defect_energy": defect_task.output.energy, "bulk_energy": bulk_task.output.energy, + "initial_defect_structure": defect_task.input.structure, "final_defect_structure": final_defect_structure, "charge_state": defect_task.output.structure.charge, "defect_frac_sc_coords": defect_frac_sc_coords, @@ -377,6 +368,30 @@ def get_parameters_from_tasks( return parameters +class DefectValidation(BaseModel): + """Validate a task document for defect processing""" + + MAX_ATOMIC_RELAXATION: float = Field( + 0.02, + description="Threshold for the mean absolute displacement of atoms outside a defect's radius of isolution" + ) + + def process_entry(self, parameters) -> V: + v = {} + v.update(self._atomic_relaxation(parameters)) + return v + + def _atomic_relaxation(self, parameters): + in_struc = parameters["initial_defect_structure"] + out_struc = parameters["final_defect_structure"] + sites = out_struc.get_sites_in_sphere(parameters['defect_frac_sc_coords'], get_truncated_coulomb_cutoff(in_struc), include_index=True) + inside_sphere = [site.index for site in sites] + outside_sphere = [i for i in range(len(out_struc)) if i not in inside_sphere] + distances = np.array([site.distance(in_struc[i]) for i, site in enumerate(out_struc)]) + distances_outside = distances[outside_sphere] + if np.mean(distances_outside) > self.MAX_ATOMIC_RELAXATION: + return {"atomic_relaxation": False} + return {"atomic_relaxation": True} class DefectiveMaterialDoc(StructureMetadata): """Document containing all / many defect tasks for a single material ID""" From 32a83e9cb449086dbba82ee7bbddc84ea6ff0b86 Mon Sep 17 00:00:00 2001 From: nwinner Date: Fri, 23 Dec 2022 10:30:03 -0800 Subject: [PATCH 33/50] Store bulk defect pair drop others --- src/atomate2/cp2k/schemas/defect.py | 29 +++++++---------------------- 1 file changed, 7 insertions(+), 22 deletions(-) diff --git a/src/atomate2/cp2k/schemas/defect.py b/src/atomate2/cp2k/schemas/defect.py index 26644009eb..a0835f5e8c 100644 --- a/src/atomate2/cp2k/schemas/defect.py +++ b/src/atomate2/cp2k/schemas/defect.py @@ -57,21 +57,11 @@ class DefectDoc(StructureMetadata): material_id: str = Field( None, description="Unique material ID for the bulk material" ) # TODO Change to MPID + defect_ids: Mapping[RunType, str] = Field(None, description="Map run types of defect entry to task id") + bulk_ids: Mapping[RunType, str] = Field(None, description="Map run types of bulk entry to task id") task_ids: List[str] = Field( None, description="All defect task ids used in creating this defect doc." ) - calc_types: Mapping[str, CalcType] = Field( # type: ignore - None, - description="Calculation types for all the calculations that make up this material", - ) - task_types: Mapping[str, TaskType] = Field( - None, - description="Task types for all the calculations that make up this material", - ) - run_types: Mapping[str, RunType] = Field( - None, - description="Run types for all the calculations that make up this material", - ) defect_entries: Mapping[RunType, DefectEntry] = Field( None, description="Dictionary for tracking entries for CP2K calculations" ) @@ -93,7 +83,6 @@ class DefectDoc(StructureMetadata): metadata: Dict = Field(None, description="Metadata for this defect") valid: Mapping[RunType, Dict] = Field(None, description="Whether each run type has a valid entry") - # TODO The sorting here should also maybe be done by builder def update_one(self, defect_task, bulk_task, dielectric, query="defect", key="task_id"): # Metadata @@ -124,10 +113,9 @@ def update_one(self, defect_task, bulk_task, dielectric, query="defect", key="ta ) ): self.defect_entries[rt] = defect_entry + self.defect_ids[rt] = d_id self.bulk_entries[rt] = bulk_entry - self.run_types[d_id] = rt - self.task_types[d_id] = tt - self.calc_types[d_id] = ct + self.bulk_ids[b_id] = b_id self.vbm[rt] = bulk_task.output.vbm self.valid[rt] = valid @@ -158,6 +146,7 @@ def from_tasks(cls: Type[T], defect_task, bulk_task, dielectric, query="defect", defect_task_id = defect_task[key] defect = cls.get_defect_from_task(query=query, task=defect_task) defect_task = TaskDocument(**defect_task["output"]) + bulk_task_id = bulk_task[key] bulk_task = TaskDocument(**bulk_task['output']) # Metadata @@ -165,9 +154,6 @@ def from_tasks(cls: Type[T], defect_task, bulk_task, dielectric, query="defect", created_at = datetime.now() rt = defect_task.calcs_reversed[0].run_type - run_types = {defect_task_id: defect_task.calcs_reversed[0].run_type} - task_types = {defect_task_id: defect_task.calcs_reversed[0].task_type} - calc_types = {defect_task_id: defect_task.calcs_reversed[0].calc_type} metadata = {} defect_entry, valid = cls.get_defect_entry_from_tasks(defect_task, bulk_task, defect, dielectric) @@ -188,9 +174,8 @@ def from_tasks(cls: Type[T], defect_task, bulk_task, dielectric, query="defect", data = { "defect_entries": defect_entries, "bulk_entries": bulk_entries, - "run_types": run_types, - "task_types": task_types, - "calc_types": calc_types, + "defect_ids": {rt: defect_task_id}, + "bulk_ids": {rt: bulk_task_id}, "last_updated": last_updated, "created_at": created_at, "task_ids": [defect_task_id], From eaa6361ea265cef65049ba61da01dc96fed81b04 Mon Sep 17 00:00:00 2001 From: nwinner Date: Fri, 23 Dec 2022 17:18:58 -0800 Subject: [PATCH 34/50] Defects --- src/atomate2/cp2k/builders/defect.py | 81 +++++++++++++++++----------- src/atomate2/cp2k/schemas/defect.py | 32 ++++++----- 2 files changed, 68 insertions(+), 45 deletions(-) diff --git a/src/atomate2/cp2k/builders/defect.py b/src/atomate2/cp2k/builders/defect.py index d2a135763b..9f2ccce408 100644 --- a/src/atomate2/cp2k/builders/defect.py +++ b/src/atomate2/cp2k/builders/defect.py @@ -1,10 +1,7 @@ from datetime import datetime -from itertools import chain, groupby, combinations -from re import A -from tkinter import W +from itertools import groupby from typing import Dict, Iterator, List, Literal, Optional -from copy import deepcopy -from math import ceil + import numpy as np from monty.json import MontyDecoder, jsanitize @@ -14,7 +11,6 @@ from pymatgen.core import Structure from pymatgen.analysis.structure_matcher import ElementComparator, StructureMatcher -from pymatgen.electronic_structure.dos import CompleteDos from pymatgen.symmetry.analyzer import SpacegroupAnalyzer from pymatgen.io.cp2k.inputs import Cp2kInput @@ -24,7 +20,6 @@ from atomate2.cp2k.schemas.task import TaskDocument from atomate2.cp2k.schemas.defect import DefectDoc, DefectiveMaterialDoc from atomate2.cp2k.schemas.calc_types import TaskType -from atomate2.cp2k.schemas.calc_types.utils import run_type from emmet.core.electronic_structure import ElectronicStructureDoc @@ -64,7 +59,7 @@ class DefectBuilder(Builder): TaskType.Structure_Optimization.value, TaskType.Static.value ] - + def __init__( self, tasks: Store, @@ -253,7 +248,7 @@ def prechunk(self, number_splits: int) -> Iterator[Dict]: for doc in self.tasks.query(criteria=temp_query, properties=[self.tasks.key]) } - N = ceil(len(defect_tasks) / number_splits) + N = np.ceil(len(defect_tasks) / number_splits) for task_chunk in grouper(defect_tasks, N): yield {"query": {"task_id": {"$in": task_chunk + list(bulk_tasks)}}} @@ -372,7 +367,6 @@ def get_items(self) -> Iterator[List[Dict]]: for d in self.defects.query({}, ["task_ids"]) for t_id in d.get("task_ids", []) } - all_tasks = defect_tasks | bulk_tasks self.logger.debug("All tasks: {}".format(len(all_tasks))) @@ -496,25 +490,11 @@ def key(x): return get_sg(s), s.composition.reduced_composition def are_equal(x, y): - """ - To decide if defects are equal. Either the defect objects are - equal, OR two different defect objects relaxed to the same final structure - (common with interstitials). - - TODO Need a way to do the output structure comparison for a X atom defect cell - TODO which can be embedded in a Y atom defect cell up to tolerance. - """ - - # Defects with diff charges return true for the native __eq__ + """To decide if defects are equal.""" if x['structure'].charge != y['structure'].charge: return False - - # Are the final structures equal - # element-changes needed for ghost vacancies, since sm.fit can't distinguish them - if x['defect'].element_changes == y['defect'].element_changes and \ - sm.fit(x['structure'], y['structure']): + if x['defect'] == y['defect']: return True - return False sorted_s_list = sorted(enumerate(defects), key=lambda x: key(x[1])) @@ -533,7 +513,7 @@ def are_equal(x, y): (defects[i]['defect'], [defects[i][self.tasks.key] for i in matches]) ) - self.logger.debug(f"All groups {all_groups}") + self.logger.debug(f"{len(all_groups)} groups") return all_groups def __get_defect_from_task(self, task): @@ -654,7 +634,7 @@ def __match_defects_to_bulks(self, bulk_ids, defect_ids) -> list[tuple]: self.tasks.query( criteria={ self.tasks.key: {'$in': list(bulk_ids)}, - 'output.composition_reduced': jsanitize(ps.composition.to_reduced_dict), + 'output.formula_pretty': jsanitize(ps.composition.reduced_formula), }, properties=props ) @@ -792,10 +772,7 @@ def __get_pristine_supercell(self, task): else: return out_structure -#TODO Major problem with this builder. materials store is used to sync the diel, elec, and pd with a single material id -#TODO This is a problem because the material id in vasp store is not synced to cp2k store -#TODO Also the chempots needed to adjust entries must come from cp2k, but you need to give vasp to sync the others -#TODO Thermo store is being replaced with a manual definition of chempots until further notice + class DefectiveMaterialBuilder(Builder): """ @@ -960,6 +937,46 @@ def __get_thermos(self, composition) -> List: return list(self.thermo.query(criteria={'elements': {"$size": 1}}, properties=None)) +class DefectValidator(Builder): + + def __init__( + self, + tasks: Store, + defect_validation: Store, + chunk_size: int = 1000, + defect_query = 'output.additional_json.info.defect', + ): + self.tasks = tasks + self.defect_validation = defect_validation + self.chunk_size = chunk_size + self.defect_query = defect_query + super().__init__(sources=tasks, targets=defect_validation, chunk_size=chunk_size) + + def get_items(self): + self.logger.info("Getting tasks") + tids = list(self.tasks.query(criteria={self.defect_query: {"$exists": True}}, properties=[self.tasks.key])) + self.logger.info(f"{len(tids)} to process") + for t in self.tasks.query(): + yield t + + def process_item(self, item): + from atomate2.cp2k.schemas.defect import DefectValidation + tid = item[self.tasks.key] + return jsanitize(DefectValidation.process_task(item, tid).dict(), allow_bson=True, enum_values=True, strict=True) + + def update_targets(self, items: List): + """ + Inserts the new task_types into the task_types collection + """ + items = [item for item in items if item] + if len(items) > 0: + self.logger.info(f"Updating {len(items)} defects") + self.defect_validation.update(items, key=self.defect_validation.key) + else: + self.logger.info("No items to update") + return super().update_targets(items) + + def unpack(query, d): """ Unpack a mongo-style query into dictionary retrieval diff --git a/src/atomate2/cp2k/schemas/defect.py b/src/atomate2/cp2k/schemas/defect.py index a0835f5e8c..5740f9e6ed 100644 --- a/src/atomate2/cp2k/schemas/defect.py +++ b/src/atomate2/cp2k/schemas/defect.py @@ -1,6 +1,6 @@ from datetime import datetime from tokenize import group -from typing import ClassVar, TypeVar, Type, Dict, Tuple, Mapping, List +from typing import ClassVar, TypeVar, Type, Dict, Tuple, Mapping, List, Callable from pydantic import BaseModel, Field from pydantic import validator from itertools import groupby @@ -15,7 +15,7 @@ from pymatgen.entries.computed_entries import ComputedEntry, ComputedStructureEntry from pymatgen.io.cp2k.utils import get_truncated_coulomb_cutoff from pymatgen.analysis.phase_diagram import PhaseDiagram -from pymatgen.analysis.defects.core import Defect +from pymatgen.analysis.defects.core import Defect, Adsorbate from pymatgen.analysis.defects.corrections.freysoldt import ( get_freysoldt_correction, get_freysoldt2d_correction, @@ -115,7 +115,7 @@ def update_one(self, defect_task, bulk_task, dielectric, query="defect", key="ta self.defect_entries[rt] = defect_entry self.defect_ids[rt] = d_id self.bulk_entries[rt] = bulk_entry - self.bulk_ids[b_id] = b_id + self.bulk_ids[rt] = b_id self.vbm[rt] = bulk_task.output.vbm self.valid[rt] = valid @@ -229,6 +229,7 @@ def get_defect_entry_from_tasks( sc_defect_frac_coords=parameters["defect_frac_sc_coords"], corrections=corrections, ) + parameters['defect'] = defect valid = DefectValidation().process_entry(parameters) return defect_entry, valid @@ -361,9 +362,12 @@ class DefectValidation(BaseModel): description="Threshold for the mean absolute displacement of atoms outside a defect's radius of isolution" ) + DESORPTION_DISTANCE: float = Field(3, description="Distance to consider adsorbate as desorbed") + def process_entry(self, parameters) -> V: v = {} v.update(self._atomic_relaxation(parameters)) + v.update(self._desorption(parameters)) return v def _atomic_relaxation(self, parameters): @@ -378,6 +382,15 @@ def _atomic_relaxation(self, parameters): return {"atomic_relaxation": False} return {"atomic_relaxation": True} + def _desorption(self, parameters): + if isinstance(parameters['defect'], Adsorbate): + out_struc = parameters["final_defect_structure"] + defect_site = out_struc.get_sites_in_sphere(parameters['defect_frac_sc_coords'], 0.1, include_index=True)[0] + distances = [defect_site.distance(site) for site in out_struc] + if all(d > self.DESORPTION_DISTANCE for d in distances): + return {'desorption': False} + return {'desorption': True} + class DefectiveMaterialDoc(StructureMetadata): """Document containing all / many defect tasks for a single material ID""" @@ -422,28 +435,21 @@ def get_formation_energy_diagram( run_type: RunType | str, atomic_entries: List[ComputedEntry], phase_diagram: PhaseDiagram, - filters: Dict | None = None, + filters: List[Callable, None] = None, ) -> MultiFormationEnergyDiagram: - filters = filters if filters else {} - + filters = filters if filters else lambda _: True els = set() defect_entries = [] bulk_entries = [] vbms = [] - for doc in self.defect_docs: + for doc in filter(lambda x: all(f(x) for f in filters), self.defect_docs): if doc.defect_entries.get(run_type): els = els | set(doc.defect.element_changes.keys()) defect_entries.append(doc.defect_entries.get(run_type)) bulk_entries.append(doc.bulk_entries.get(run_type)) vbms.append(doc.vbm.get(run_type)) - # TODO bulks and vbms - # form en diagram takes one bulk entry and one bulk vbm - # These, however, can be different for each defect/bulk task pair - # Need to convert the differences into energy adjustments so that - # form en diagram is consistent with all of them - return MultiFormationEnergyDiagram.with_atomic_entries( bulk_entry=bulk_entries[0], defect_entries=defect_entries, From a9388e8ff7c3068d69c46bc265da469fb48283c1 Mon Sep 17 00:00:00 2001 From: nwinner Date: Fri, 30 Dec 2022 21:44:20 -0800 Subject: [PATCH 35/50] Updates --- src/atomate2/cp2k/schemas/defect.py | 12 +++++++++--- src/atomate2/cp2k/sets/defect.py | 6 ++---- 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/src/atomate2/cp2k/schemas/defect.py b/src/atomate2/cp2k/schemas/defect.py index 5740f9e6ed..aa946c3d2a 100644 --- a/src/atomate2/cp2k/schemas/defect.py +++ b/src/atomate2/cp2k/schemas/defect.py @@ -365,12 +365,14 @@ class DefectValidation(BaseModel): DESORPTION_DISTANCE: float = Field(3, description="Distance to consider adsorbate as desorbed") def process_entry(self, parameters) -> V: + """Gets a dictionary of {validator: result}. Result true for passing, false for failing.""" v = {} v.update(self._atomic_relaxation(parameters)) v.update(self._desorption(parameters)) return v def _atomic_relaxation(self, parameters): + """Returns false if the mean displacement outside the isolation radius is greater than the cutoff""" in_struc = parameters["initial_defect_structure"] out_struc = parameters["final_defect_structure"] sites = out_struc.get_sites_in_sphere(parameters['defect_frac_sc_coords'], get_truncated_coulomb_cutoff(in_struc), include_index=True) @@ -383,10 +385,14 @@ def _atomic_relaxation(self, parameters): return {"atomic_relaxation": True} def _desorption(self, parameters): + """Returns false if any atom is too far from all other atoms.""" if isinstance(parameters['defect'], Adsorbate): out_struc = parameters["final_defect_structure"] - defect_site = out_struc.get_sites_in_sphere(parameters['defect_frac_sc_coords'], 0.1, include_index=True)[0] - distances = [defect_site.distance(site) for site in out_struc] + defect_site = out_struc.get_sites_in_sphere( + out_struc.lattice.get_cartesian_coords(parameters['defect_frac_sc_coords']), + 0.1, include_index=True + )[0] + distances = [defect_site.distance(site) for i, site in enumerate(out_struc) if i != defect_site.index] if all(d > self.DESORPTION_DISTANCE for d in distances): return {'desorption': False} return {'desorption': True} @@ -435,7 +441,7 @@ def get_formation_energy_diagram( run_type: RunType | str, atomic_entries: List[ComputedEntry], phase_diagram: PhaseDiagram, - filters: List[Callable, None] = None, + filters: List[Callable] | None = None, ) -> MultiFormationEnergyDiagram: filters = filters if filters else lambda _: True diff --git a/src/atomate2/cp2k/sets/defect.py b/src/atomate2/cp2k/sets/defect.py index 62e68373dc..0fc8574c86 100644 --- a/src/atomate2/cp2k/sets/defect.py +++ b/src/atomate2/cp2k/sets/defect.py @@ -10,7 +10,6 @@ from atomate2.cp2k.sets.base import Cp2kInputGenerator, multiple_input_updators from atomate2.cp2k.sets.core import ( HybridSetGenerator, StaticSetGenerator, RelaxSetGenerator, CellOptSetGenerator, - HybridStaticSetGenerator, HybridRelaxSetGenerator, HybridCellOptSetGenerator ) logger = logging.getLogger(__name__) @@ -22,9 +21,8 @@ class DefectSetGenerator(Cp2kInputGenerator): """ def get_input_updates(self, structure: Structure, *args, **kwargs) -> dict: - """ - """ - return {'print_v_hartree': True, "print_pdos": True} + """Get input updates""" + return {'print_v_hartree': True, "print_pdos": True, "print_dos": True} @dataclass @multiple_input_updators() From 38862bbe05fb670fad833da28033978df1df9caf Mon Sep 17 00:00:00 2001 From: nwinner Date: Tue, 3 Jan 2023 09:12:56 -0800 Subject: [PATCH 36/50] Make list --- src/atomate2/cp2k/schemas/defect.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/atomate2/cp2k/schemas/defect.py b/src/atomate2/cp2k/schemas/defect.py index aa946c3d2a..4e6120dfab 100644 --- a/src/atomate2/cp2k/schemas/defect.py +++ b/src/atomate2/cp2k/schemas/defect.py @@ -444,7 +444,7 @@ def get_formation_energy_diagram( filters: List[Callable] | None = None, ) -> MultiFormationEnergyDiagram: - filters = filters if filters else lambda _: True + filters = filters if filters else [lambda _: True] els = set() defect_entries = [] bulk_entries = [] From a52a676ec2dbd08be06aded0f399e5b0e4833e35 Mon Sep 17 00:00:00 2001 From: nwinner Date: Fri, 6 Jan 2023 10:30:15 -0800 Subject: [PATCH 37/50] basic def test --- tests/cp2k/sets/test_defect.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) create mode 100644 tests/cp2k/sets/test_defect.py diff --git a/tests/cp2k/sets/test_defect.py b/tests/cp2k/sets/test_defect.py new file mode 100644 index 0000000000..631c6825f2 --- /dev/null +++ b/tests/cp2k/sets/test_defect.py @@ -0,0 +1,18 @@ +import pytest + +def test_input_generators(si_structure): + from atomate2.cp2k.sets.defect import ( + DefectSetGenerator, DefectStaticSetGenerator, DefectRelaxSetGenerator, DefectCellOptSetGenerator, + DefectHybridStaticSetGenerator, DefectHybridRelaxSetGenerator, DefectHybridCellOptSetGenerator + ) + + # check that all generators give the correct printing + for gen in [ + DefectSetGenerator(), DefectStaticSetGenerator(), DefectRelaxSetGenerator(), + DefectCellOptSetGenerator(), DefectHybridStaticSetGenerator(), + DefectHybridRelaxSetGenerator(), DefectHybridCellOptSetGenerator() + ]: + input_set = gen.get_input_set(si_structure) + assert input_set.cp2k_input.check("FORCE_EVAL/DFT/PRINT/PDOS") or input_set.cp2k_input.check("FORCE_EVAL/DFT/PRINT/DOS") + assert input_set.cp2k_input.check("FORCE_EVAL/DFT/PRINT/V_HARTREE_CUBE") + From 802308eb919f953eb7fddacf8597638dace310af Mon Sep 17 00:00:00 2001 From: nwinner Date: Fri, 6 Jan 2023 13:42:37 -0800 Subject: [PATCH 38/50] Use __post_init__ instead --- src/atomate2/cp2k/sets/defect.py | 53 +++++++++++++++++++------------- 1 file changed, 31 insertions(+), 22 deletions(-) diff --git a/src/atomate2/cp2k/sets/defect.py b/src/atomate2/cp2k/sets/defect.py index 0fc8574c86..33f06e5019 100644 --- a/src/atomate2/cp2k/sets/defect.py +++ b/src/atomate2/cp2k/sets/defect.py @@ -7,16 +7,19 @@ from pymatgen.core import Structure -from atomate2.cp2k.sets.base import Cp2kInputGenerator, multiple_input_updators +from atomate2.cp2k.sets.base import Cp2kInputGenerator from atomate2.cp2k.sets.core import ( - HybridSetGenerator, StaticSetGenerator, RelaxSetGenerator, CellOptSetGenerator, -) + StaticSetGenerator, RelaxSetGenerator, CellOptSetGenerator, + HybridStaticSetGenerator, HybridRelaxSetGenerator, HybridCellOptSetGenerator +) logger = logging.getLogger(__name__) +DEFECT_SET_UPDATES = {'print_v_hartree': True, "print_pdos": True, "print_dos": True} + @dataclass class DefectSetGenerator(Cp2kInputGenerator): """ - Base input set generator for defect calculations. Adds printing of the + Base input set generator for defect calculations. Adds printing of the partial density of states and the electrostatic potential. """ @@ -25,31 +28,37 @@ def get_input_updates(self, structure: Structure, *args, **kwargs) -> dict: return {'print_v_hartree': True, "print_pdos": True, "print_dos": True} @dataclass -@multiple_input_updators() -class DefectStaticSetGenerator(DefectSetGenerator, StaticSetGenerator): - pass +class DefectStaticSetGenerator(StaticSetGenerator): + + def __post_init__(self): + self.user_input_settings.update(DEFECT_SET_UPDATES) @dataclass -@multiple_input_updators() -class DefectRelaxSetGenerator(DefectSetGenerator, RelaxSetGenerator): - pass +class DefectRelaxSetGenerator(RelaxSetGenerator): + + def __post_init__(self): + self.user_input_settings.update(DEFECT_SET_UPDATES) @dataclass -@multiple_input_updators() -class DefectCellOptSetGenerator(DefectSetGenerator, CellOptSetGenerator): - pass +class DefectCellOptSetGenerator(CellOptSetGenerator): + + def __post_init__(self): + self.user_input_settings.update(DEFECT_SET_UPDATES) @dataclass -@multiple_input_updators() -class DefectHybridStaticSetGenerator(DefectSetGenerator, StaticSetGenerator, HybridSetGenerator): - pass +class DefectHybridStaticSetGenerator(HybridStaticSetGenerator): + + def __post_init__(self): + self.user_input_settings.update(DEFECT_SET_UPDATES) @dataclass -@multiple_input_updators() -class DefectHybridRelaxSetGenerator(DefectSetGenerator, RelaxSetGenerator, HybridSetGenerator): - pass +class DefectHybridRelaxSetGenerator(HybridRelaxSetGenerator): + + def __post_init__(self): + self.user_input_settings.update(DEFECT_SET_UPDATES) @dataclass -@multiple_input_updators() -class DefectHybridCellOptSetGenerator(DefectSetGenerator, CellOptSetGenerator, HybridSetGenerator): - pass \ No newline at end of file +class DefectHybridCellOptSetGenerator(HybridCellOptSetGenerator): + + def __post_init__(self): + self.user_input_settings.update(DEFECT_SET_UPDATES) \ No newline at end of file From 791381c73b846807dbd27f915c467929e6746c94 Mon Sep 17 00:00:00 2001 From: nwinner Date: Mon, 9 Jan 2023 10:54:08 -0800 Subject: [PATCH 39/50] whitespace --- tests/cp2k/sets/test_defect.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tests/cp2k/sets/test_defect.py b/tests/cp2k/sets/test_defect.py index 631c6825f2..d2f42b505b 100644 --- a/tests/cp2k/sets/test_defect.py +++ b/tests/cp2k/sets/test_defect.py @@ -5,14 +5,13 @@ def test_input_generators(si_structure): DefectSetGenerator, DefectStaticSetGenerator, DefectRelaxSetGenerator, DefectCellOptSetGenerator, DefectHybridStaticSetGenerator, DefectHybridRelaxSetGenerator, DefectHybridCellOptSetGenerator ) - + # check that all generators give the correct printing for gen in [ - DefectSetGenerator(), DefectStaticSetGenerator(), DefectRelaxSetGenerator(), + DefectSetGenerator(), DefectStaticSetGenerator(), DefectRelaxSetGenerator(), DefectCellOptSetGenerator(), DefectHybridStaticSetGenerator(), DefectHybridRelaxSetGenerator(), DefectHybridCellOptSetGenerator() ]: input_set = gen.get_input_set(si_structure) assert input_set.cp2k_input.check("FORCE_EVAL/DFT/PRINT/PDOS") or input_set.cp2k_input.check("FORCE_EVAL/DFT/PRINT/DOS") assert input_set.cp2k_input.check("FORCE_EVAL/DFT/PRINT/V_HARTREE_CUBE") - From 2928336c6ac741ea3d2c02d49771f013f1d92b0b Mon Sep 17 00:00:00 2001 From: nwinner Date: Tue, 10 Jan 2023 11:17:18 -0800 Subject: [PATCH 40/50] Defect jobs --- src/atomate2/cp2k/flows/defect.py | 62 +++++++++++++------------------ src/atomate2/cp2k/jobs/defect.py | 43 ++++++++++----------- 2 files changed, 46 insertions(+), 59 deletions(-) diff --git a/src/atomate2/cp2k/flows/defect.py b/src/atomate2/cp2k/flows/defect.py index 5567d60502..79e5ae27f3 100644 --- a/src/atomate2/cp2k/flows/defect.py +++ b/src/atomate2/cp2k/flows/defect.py @@ -19,20 +19,9 @@ from pymatgen.analysis.defects.thermo import DefectEntry from pymatgen.analysis.defects.supercells import get_sc_fromstruct -from atomate2.cp2k.jobs.base import BaseCp2kMaker -from atomate2.cp2k.jobs.core import StaticMaker, HybridStaticMaker, RelaxMaker, HybridRelaxMaker, CellOptMaker, HybridCellOptMaker - -from atomate2.cp2k.schemas.defect import DefectDoc -from atomate2.cp2k.sets.core import ( - StaticSetGenerator, RelaxSetGenerator, CellOptSetGenerator -) - -from atomate2.cp2k.sets.defect import ( - DefectStaticSetGenerator, DefectRelaxSetGenerator, DefectCellOptSetGenerator, - DefectHybridStaticSetGenerator, DefectHybridRelaxSetGenerator, DefectHybridCellOptSetGenerator -) +from atomate2.cp2k.jobs.base import BaseCp2kMaker from atomate2.cp2k.jobs.defect import ( - BaseDefectMaker, DefectStaticMaker, DefectRelaxMaker, DefectCellOptMaker, + DefectStaticMaker, DefectRelaxMaker, DefectCellOptMaker, DefectHybridStaticMaker, DefectHybridRelaxMaker, DefectHybridCellOptMaker ) @@ -43,23 +32,23 @@ @dataclass class DefectHybridStaticFlowMaker(HybridStaticFlowMaker): - initialize_maker: BaseCp2kMaker = field(default_factory=DefectStaticMaker) + pbe_maker: BaseCp2kMaker = field(default_factory=DefectStaticMaker) hybrid_maker: BaseCp2kMaker = field(default=DefectHybridStaticMaker( copy_cp2k_kwargs={'additional_cp2k_files': ("info.json",)}) ) -@dataclass +@dataclass class DefectHybridRelaxFlowMaker(HybridRelaxFlowMaker): - initialize_maker: BaseCp2kMaker = field(default_factory=DefectStaticMaker) + pbe_maker: BaseCp2kMaker = field(default_factory=DefectStaticMaker) hybrid_maker: BaseCp2kMaker = field(default=DefectHybridRelaxMaker( copy_cp2k_kwargs={'additional_cp2k_files': ("info.json",)}) ) -@dataclass +@dataclass class DefectHybridCellOptFlowMaker(HybridCellOptFlowMaker): - initialize_maker: BaseCp2kMaker = field(default_factory=DefectStaticMaker) + pbe_maker: BaseCp2kMaker = field(default_factory=DefectStaticMaker) hybrid_maker: BaseCp2kMaker = field(default=DefectHybridCellOptMaker( copy_cp2k_kwargs={'additional_cp2k_files': ("info.json",)}) ) @@ -73,7 +62,7 @@ class FormationEnergyMaker(Maker): Parameters ---------- - name: This flow's name. i.e. "defect formation energy" + name: This flow's name. i.e. "defect formation energy" run_bulk: whether to run the bulk supercell as a static ("static") calculation, a full relaxation ("relax"), or to skip it (False) hybrid_functional: If provided, this activates hybrid version of the @@ -91,10 +80,10 @@ class FormationEnergyMaker(Maker): """ name: str = "defect formation energy" - run_bulk: Literal["static", "relax"] | bool = field(default="static") + run_bulk: Literal["static", "relax"] | bool = field(default="static") hybrid_functional: str | None = field(default=None) initialize_with_pbe: bool = field(default=True) - + supercell_matrix: NDArray = field(default=None) min_atoms: int = field(default=80) max_atoms: int = field(default=240) @@ -106,7 +95,7 @@ def __post_init__(self): if self.hybrid_functional: self.bulk_maker = DefectHybridCellOptMaker( name="bulk hybrid relax", transformations=None, - initialize_with_pbe=self.initialize_with_pbe, + initialize_with_pbe=self.initialize_with_pbe, hybrid_functional=self.hybrid_functional ) else: @@ -114,7 +103,7 @@ def __post_init__(self): elif self.run_bulk == "static": if self.hybrid_functional: - self.bulk_maker = DefectHybridStaticFlowMaker( + self.bulk_maker = DefectHybridStaticFlowMaker( name='bulk hybrid static', initialize_with_pbe=self.initialize_with_pbe, hybrid_functional=self.hybrid_functional, @@ -127,20 +116,21 @@ def __post_init__(self): hybrid_functional=self.hybrid_functional, initialize_with_pbe=self.initialize_with_pbe, ) - self.def_maker.initialize_maker.supercell_matrix = self.supercell_matrix + self.def_maker.pbe_maker.supercell_matrix = self.supercell_matrix self.def_maker.hybrid_maker.supercell_matrix = self.supercell_matrix - self.def_maker.initialize_maker.max_atoms = self.max_atoms + self.def_maker.pbe_maker.max_atoms = self.max_atoms self.def_maker.hybrid_maker.max_atoms = self.max_atoms - self.def_maker.initialize_maker.min_atoms = self.min_atoms + self.def_maker.pbe_maker.min_atoms = self.min_atoms self.def_maker.hybrid_maker.min_atoms = self.min_atoms - self.def_maker.initialize_maker.min_length = self.min_length + self.def_maker.pbe_maker.min_length = self.min_length self.def_maker.hybrid_maker.min_length = self.min_length - self.def_maker.initialize_maker.force_diagonal = self.force_diagonal + self.def_maker.pbe_maker.force_diagonal = self.force_diagonal self.def_maker.hybrid_maker.force_diagonal = self.force_diagonal + else: self.def_maker = DefectRelaxMaker() self.def_maker.supercell_matrix = self.supercell_matrix @@ -150,13 +140,13 @@ def __post_init__(self): self.def_maker.force_diagonal = self.force_diagonal def make( - self, defects: Iterable[Defect], - charges: bool | Iterable[int] = False, + self, defects: Iterable[Defect], + charges: bool | Iterable[int] = False, dielectric: NDArray | int | float | None = None, prev_cp2k_dir: str | Path | None = None, collect_outputs: bool = True, ): - """Make a flow to run multiple defects in order to calculate their formation + """Make a flow to run multiple defects in order to calculate their formation energy diagram. Parameters @@ -176,8 +166,8 @@ def make( sc_mat = self.supercell_matrix if self.supercell_matrix else \ get_sc_fromstruct( - bulk_structure, self.min_atoms, - self.max_atoms, self.min_length, + bulk_structure, self.min_atoms, + self.max_atoms, self.min_length, self.force_diagonal,) if self.run_bulk: @@ -192,7 +182,9 @@ def make( else: chgs = charges if charges else [0] for charge in chgs: - defect_job = self.def_maker.make(deepcopy(defect), charge) + dfct = deepcopy(defect) + dfct.user_charges = [charge] + defect_job = self.def_maker.make(dfct) jobs.append(defect_job) defect_outputs[defect.name][int(charge)] = (defect, defect_job.output) @@ -205,7 +197,6 @@ def make( jobs.append(collect_job) else: collect_job = None - return Flow( jobs=jobs, name=self.name, @@ -274,4 +265,3 @@ def ensure_defects_same_structure(defects: Iterable[Defect]): elif struct != defect.structure: raise ValueError("All defects must have the same host structure.") return struct - diff --git a/src/atomate2/cp2k/jobs/defect.py b/src/atomate2/cp2k/jobs/defect.py index 3147532f23..188e6ecf50 100644 --- a/src/atomate2/cp2k/jobs/defect.py +++ b/src/atomate2/cp2k/jobs/defect.py @@ -13,7 +13,7 @@ from pymatgen.analysis.defects.core import Defect, Vacancy from atomate2.cp2k.sets.base import Cp2kInputGenerator, recursive_update from atomate2.cp2k.sets.defect import ( - DefectSetGenerator, DefectStaticSetGenerator, DefectRelaxSetGenerator, DefectCellOptSetGenerator, + DefectSetGenerator, DefectStaticSetGenerator, DefectRelaxSetGenerator, DefectCellOptSetGenerator, DefectHybridStaticSetGenerator, DefectHybridRelaxSetGenerator, DefectHybridCellOptSetGenerator ) from atomate2.cp2k.jobs.base import BaseCp2kMaker, cp2k_job @@ -26,7 +26,7 @@ "store_volumetric_data": ("v_hartree",), } -@dataclass +@dataclass class BaseDefectMaker(BaseCp2kMaker): task_document_kwargs: dict = field(default_factory=lambda: DEFECT_TASK_DOC) @@ -37,12 +37,12 @@ class BaseDefectMaker(BaseCp2kMaker): force_diagonal: bool = field(default=False) @cp2k_job - def make(self, defect: Defect | Structure, charge: int = 0, prev_cp2k_dir: str | Path | None = None): + def make(self, defect: Defect | Structure, prev_cp2k_dir: str | Path | None = None): if isinstance(defect, Defect): structure = defect.get_supercell_structure( - sc_mat=self.supercell_matrix, - dummy_species=defect.site.species if isinstance(defect, Vacancy) else None, + sc_mat=self.supercell_matrix, + dummy_species=defect.site.species if isinstance(defect, Vacancy) else None, min_atoms=self.min_atoms, max_atoms=self.max_atoms, min_length=self.min_length, @@ -52,19 +52,26 @@ def make(self, defect: Defect | Structure, charge: int = 0, prev_cp2k_dir: str | if isinstance(defect, Vacancy): structure.add_site_property("ghost", [False]*(len(structure.sites)-1) + [True]) + if defect.user_charges: + if len(defect.user_charges) > 1: + raise ValueError("Multiple user charges found. Individual defect jobs can only contain 1.") + else: + charge = defect.user_charges[0] + else: + charge = 0 + # provenance stuff recursive_update(self.write_additional_data, { "info.json": { - "defect": deepcopy(defect), - "defect_charge": charge, + "defect": deepcopy(defect), "sc_mat": self.supercell_matrix } } ) - + else: - charge = charge if charge else defect.charge structure = deepcopy(defect) + charge = structure.charge structure.set_charge(charge) return super().make.original(self, structure=structure, prev_cp2k_dir=prev_cp2k_dir) @@ -106,29 +113,19 @@ class DefectCellOptMaker(BaseDefectMaker): transformation_params: tuple[dict, ...] | None = field(default=({"distance": 0.01},)) @dataclass -class DefectHybridStaticMaker(DefectStaticMaker, HybridStaticMaker): - +class DefectHybridStaticMaker(BaseDefectMaker): + name: str = field(default="defect hybrid static") input_set_generator: DefectSetGenerator = field(default_factory=DefectHybridStaticSetGenerator) @dataclass -class DefectHybridRelaxMaker(DefectRelaxMaker, HybridRelaxMaker): +class DefectHybridRelaxMaker(BaseDefectMaker): name: str = field(default="defect hybrid relax") input_set_generator: DefectSetGenerator = field(default_factory=DefectHybridRelaxSetGenerator) @dataclass -class DefectHybridCellOptMaker(DefectCellOptMaker, HybridCellOptMaker): +class DefectHybridCellOptMaker(BaseDefectMaker): name: str = field(default="defect hybrid cell opt") input_set_generator: DefectSetGenerator = field(default_factory=DefectHybridCellOptSetGenerator) - -class GhostVacancy(Vacancy): - """Custom override of vacancy to deal with basis set superposition error.""" - - @property - def defect_structure(self): - """Returns the defect structure with the proper oxidation state""" - struct = self.structure.copy() - struct.add_site_property("ghost", [i == self.defect_site_index for i in range(len(struct))]) - return struct From 4d5109530858faa80714ec2d4396c4eb7050a381 Mon Sep 17 00:00:00 2001 From: nwinner Date: Tue, 10 Jan 2023 11:17:32 -0800 Subject: [PATCH 41/50] lint --- src/atomate2/cp2k/flows/defect.py | 133 +++++++++++++++++++----------- src/atomate2/cp2k/jobs/defect.py | 95 ++++++++++++++------- 2 files changed, 154 insertions(+), 74 deletions(-) diff --git a/src/atomate2/cp2k/flows/defect.py b/src/atomate2/cp2k/flows/defect.py index 79e5ae27f3..1faefcf425 100644 --- a/src/atomate2/cp2k/flows/defect.py +++ b/src/atomate2/cp2k/flows/defect.py @@ -1,57 +1,71 @@ - """Flows used in the calculation of defect properties.""" from __future__ import annotations -from copy import deepcopy import logging +from copy import deepcopy from dataclasses import dataclass, field -from typing import Iterable, Literal, Mapping from pathlib import Path -from numpy.typing import NDArray -import itertools +from typing import Iterable, Literal, Mapping -from jobflow import Flow, Job, Maker, OutputReference, job -from pymatgen.core.structure import Structure -from pymatgen.io.common import VolumetricData -from pymatgen.entries.computed_entries import ComputedStructureEntry +from jobflow import Flow, Maker, OutputReference, job +from numpy.typing import NDArray from pymatgen.analysis.defects.core import Defect -from pymatgen.analysis.defects.thermo import DefectEntry from pymatgen.analysis.defects.supercells import get_sc_fromstruct +from pymatgen.analysis.defects.thermo import DefectEntry +from pymatgen.entries.computed_entries import ComputedStructureEntry +from pymatgen.io.common import VolumetricData +from atomate2.cp2k.flows.core import ( + HybridCellOptFlowMaker, + HybridRelaxFlowMaker, + HybridStaticFlowMaker, +) from atomate2.cp2k.jobs.base import BaseCp2kMaker from atomate2.cp2k.jobs.defect import ( - DefectStaticMaker, DefectRelaxMaker, DefectCellOptMaker, - DefectHybridStaticMaker, DefectHybridRelaxMaker, DefectHybridCellOptMaker + DefectCellOptMaker, + DefectHybridCellOptMaker, + DefectHybridRelaxMaker, + DefectHybridStaticMaker, + DefectRelaxMaker, + DefectStaticMaker, ) -from atomate2.cp2k.flows.core import HybridStaticFlowMaker, HybridRelaxFlowMaker, HybridCellOptFlowMaker - logger = logging.getLogger(__name__) + @dataclass class DefectHybridStaticFlowMaker(HybridStaticFlowMaker): pbe_maker: BaseCp2kMaker = field(default_factory=DefectStaticMaker) - hybrid_maker: BaseCp2kMaker = field(default=DefectHybridStaticMaker( - copy_cp2k_kwargs={'additional_cp2k_files': ("info.json",)}) + hybrid_maker: BaseCp2kMaker = field( + default=DefectHybridStaticMaker( + copy_cp2k_kwargs={"additional_cp2k_files": ("info.json",)} ) + ) + @dataclass class DefectHybridRelaxFlowMaker(HybridRelaxFlowMaker): pbe_maker: BaseCp2kMaker = field(default_factory=DefectStaticMaker) - hybrid_maker: BaseCp2kMaker = field(default=DefectHybridRelaxMaker( - copy_cp2k_kwargs={'additional_cp2k_files': ("info.json",)}) + hybrid_maker: BaseCp2kMaker = field( + default=DefectHybridRelaxMaker( + copy_cp2k_kwargs={"additional_cp2k_files": ("info.json",)} ) + ) + @dataclass class DefectHybridCellOptFlowMaker(HybridCellOptFlowMaker): pbe_maker: BaseCp2kMaker = field(default_factory=DefectStaticMaker) - hybrid_maker: BaseCp2kMaker = field(default=DefectHybridCellOptMaker( - copy_cp2k_kwargs={'additional_cp2k_files': ("info.json",)}) + hybrid_maker: BaseCp2kMaker = field( + default=DefectHybridCellOptMaker( + copy_cp2k_kwargs={"additional_cp2k_files": ("info.json",)} ) + ) + # TODO close to being able to put this in common. Just need a switch that decides which core flow/job to use based on software @dataclass @@ -91,23 +105,26 @@ class FormationEnergyMaker(Maker): force_diagonal: bool = field(default=False) def __post_init__(self): - if self.run_bulk == 'relax': + if self.run_bulk == "relax": if self.hybrid_functional: self.bulk_maker = DefectHybridCellOptMaker( - name="bulk hybrid relax", transformations=None, + name="bulk hybrid relax", + transformations=None, initialize_with_pbe=self.initialize_with_pbe, - hybrid_functional=self.hybrid_functional - ) + hybrid_functional=self.hybrid_functional, + ) else: - self.bulk_maker = DefectCellOptMaker(name="bulk relax", transformations=None) + self.bulk_maker = DefectCellOptMaker( + name="bulk relax", transformations=None + ) elif self.run_bulk == "static": if self.hybrid_functional: self.bulk_maker = DefectHybridStaticFlowMaker( - name='bulk hybrid static', + name="bulk hybrid static", initialize_with_pbe=self.initialize_with_pbe, hybrid_functional=self.hybrid_functional, - ) + ) else: self.bulk_maker = DefectStaticMaker(name="bulk static") @@ -140,12 +157,13 @@ def __post_init__(self): self.def_maker.force_diagonal = self.force_diagonal def make( - self, defects: Iterable[Defect], + self, + defects: Iterable[Defect], charges: bool | Iterable[int] = False, dielectric: NDArray | int | float | None = None, prev_cp2k_dir: str | Path | None = None, collect_outputs: bool = True, - ): + ): """Make a flow to run multiple defects in order to calculate their formation energy diagram. @@ -161,19 +179,29 @@ def make( The workflow to calculate the formation energy diagram. """ jobs, defect_outputs = [], {} - defect_outputs = {defect.name: {} for defect in defects} # TODO DEFECT NAMES ARE NOT UNIQUE HASHES + defect_outputs = { + defect.name: {} for defect in defects + } # TODO DEFECT NAMES ARE NOT UNIQUE HASHES bulk_structure = ensure_defects_same_structure(defects) - sc_mat = self.supercell_matrix if self.supercell_matrix else \ - get_sc_fromstruct( - bulk_structure, self.min_atoms, - self.max_atoms, self.min_length, - self.force_diagonal,) + sc_mat = ( + self.supercell_matrix + if self.supercell_matrix + else get_sc_fromstruct( + bulk_structure, + self.min_atoms, + self.max_atoms, + self.min_length, + self.force_diagonal, + ) + ) if self.run_bulk: s = bulk_structure.copy() s.make_supercell(sc_mat) - bulk_job = self.bulk_maker.make(bulk_structure * sc_mat, prev_cp2k_dir=prev_cp2k_dir) + bulk_job = self.bulk_maker.make( + bulk_structure * sc_mat, prev_cp2k_dir=prev_cp2k_dir + ) jobs.append(bulk_job) for defect in defects: @@ -192,8 +220,8 @@ def make( collect_job = collect_defect_outputs( defect_outputs=defect_outputs, bulk_output=bulk_job.output if self.run_bulk else None, - dielectric=dielectric - ) + dielectric=dielectric, + ) jobs.append(collect_job) else: collect_job = None @@ -203,10 +231,13 @@ def make( output=jobs[-1].output if collect_job else None, ) + # TODO this is totally code agnostic and should be in common @job def collect_defect_outputs( - defect_outputs: Mapping[str, Mapping[int, OutputReference]], bulk_output: OutputReference, dielectric: NDArray | int | float | None + defect_outputs: Mapping[str, Mapping[int, OutputReference]], + bulk_output: OutputReference, + dielectric: NDArray | int | float | None, ) -> dict: """Collect all the outputs from the defect calculations. This job will combine the structure and entry fields to create a @@ -222,7 +253,9 @@ def collect_defect_outputs( """ outputs = {"results": {}} if not dielectric: - logger.warn("Dielectric constant not provided. Defect formation energies will be uncorrected.") + logger.warn( + "Dielectric constant not provided. Defect formation energies will be uncorrected." + ) for defect_name, defects_with_charges in defect_outputs.items(): defect_entries = [] fnv_plots = {} @@ -232,21 +265,29 @@ def collect_defect_outputs( defect_entry = DefectEntry( defect=defect, charge_state=charge, - sc_entry=ComputedStructureEntry(structure=bulk_output.structure, energy=output_with_charge.output.energy - bulk_output.output.energy) + sc_entry=ComputedStructureEntry( + structure=bulk_output.structure, + energy=output_with_charge.output.energy - bulk_output.output.energy, + ), ) defect_entries.append(defect_entry) plot_data = defect_entry.get_freysoldt_correction( - defect_locpot=VolumetricData.from_dict(output_with_charge.cp2k_objects['v_hartree']), - bulk_locpot=VolumetricData.from_dict(output_with_charge.cp2k_objects['v_hartree']), - dielectric=dielectric - ) + defect_locpot=VolumetricData.from_dict( + output_with_charge.cp2k_objects["v_hartree"] + ), + bulk_locpot=VolumetricData.from_dict( + output_with_charge.cp2k_objects["v_hartree"] + ), + dielectric=dielectric, + ) fnv_plots[int(charge)] = plot_data outputs["results"][defect.name] = dict( defect=defect, defect_entries=defect_entries, fnv_plots=fnv_plots ) return outputs -#TODO should be in common + +# TODO should be in common def ensure_defects_same_structure(defects: Iterable[Defect]): """Ensure that the defects are valid. Parameters diff --git a/src/atomate2/cp2k/jobs/defect.py b/src/atomate2/cp2k/jobs/defect.py index 188e6ecf50..04898f46a0 100644 --- a/src/atomate2/cp2k/jobs/defect.py +++ b/src/atomate2/cp2k/jobs/defect.py @@ -3,21 +3,25 @@ from __future__ import annotations import logging -from pathlib import Path -from dataclasses import dataclass, field from copy import deepcopy -from tkinter import W -from numpy.typing import NDArray +from dataclasses import dataclass, field +from pathlib import Path -from pymatgen.core import Structure +from numpy.typing import NDArray from pymatgen.analysis.defects.core import Defect, Vacancy +from pymatgen.core import Structure + +from atomate2.cp2k.jobs.base import BaseCp2kMaker, cp2k_job from atomate2.cp2k.sets.base import Cp2kInputGenerator, recursive_update from atomate2.cp2k.sets.defect import ( - DefectSetGenerator, DefectStaticSetGenerator, DefectRelaxSetGenerator, DefectCellOptSetGenerator, - DefectHybridStaticSetGenerator, DefectHybridRelaxSetGenerator, DefectHybridCellOptSetGenerator + DefectCellOptSetGenerator, + DefectHybridCellOptSetGenerator, + DefectHybridRelaxSetGenerator, + DefectHybridStaticSetGenerator, + DefectRelaxSetGenerator, + DefectSetGenerator, + DefectStaticSetGenerator, ) -from atomate2.cp2k.jobs.base import BaseCp2kMaker, cp2k_job -from atomate2.cp2k.jobs.core import HybridStaticMaker, HybridRelaxMaker, HybridCellOptMaker logger = logging.getLogger(__name__) @@ -26,6 +30,7 @@ "store_volumetric_data": ("v_hartree",), } + @dataclass class BaseDefectMaker(BaseCp2kMaker): @@ -42,7 +47,9 @@ def make(self, defect: Defect | Structure, prev_cp2k_dir: str | Path | None = No structure = defect.get_supercell_structure( sc_mat=self.supercell_matrix, - dummy_species=defect.site.species if isinstance(defect, Vacancy) else None, + dummy_species=defect.site.species + if isinstance(defect, Vacancy) + else None, min_atoms=self.min_atoms, max_atoms=self.max_atoms, min_length=self.min_length, @@ -50,23 +57,29 @@ def make(self, defect: Defect | Structure, prev_cp2k_dir: str | Path | None = No ) if isinstance(defect, Vacancy): - structure.add_site_property("ghost", [False]*(len(structure.sites)-1) + [True]) + structure.add_site_property( + "ghost", [False] * (len(structure.sites) - 1) + [True] + ) if defect.user_charges: if len(defect.user_charges) > 1: - raise ValueError("Multiple user charges found. Individual defect jobs can only contain 1.") + raise ValueError( + "Multiple user charges found. Individual defect jobs can only contain 1." + ) else: charge = defect.user_charges[0] else: charge = 0 # provenance stuff - recursive_update(self.write_additional_data, { - "info.json": { - "defect": deepcopy(defect), - "sc_mat": self.supercell_matrix + recursive_update( + self.write_additional_data, + { + "info.json": { + "defect": deepcopy(defect), + "sc_mat": self.supercell_matrix, } - } + }, ) else: @@ -74,7 +87,10 @@ def make(self, defect: Defect | Structure, prev_cp2k_dir: str | Path | None = No charge = structure.charge structure.set_charge(charge) - return super().make.original(self, structure=structure, prev_cp2k_dir=prev_cp2k_dir) + return super().make.original( + self, structure=structure, prev_cp2k_dir=prev_cp2k_dir + ) + @dataclass class DefectStaticMaker(BaseDefectMaker): @@ -82,7 +98,8 @@ class DefectStaticMaker(BaseDefectMaker): name: str = field(default="defect static") input_set_generator: DefectSetGenerator = field( default_factory=DefectStaticSetGenerator - ) + ) + @dataclass class DefectRelaxMaker(BaseDefectMaker): @@ -94,9 +111,16 @@ class DefectRelaxMaker(BaseDefectMaker): """ name: str = field(default="defect relax") - input_set_generator: Cp2kInputGenerator = field(default_factory=DefectRelaxSetGenerator) - transformations: tuple[str, ...] = field(default=("PerturbStructureTransformation",)) - transformation_params: tuple[dict, ...] | None = field(default=({"distance": 0.01},)) + input_set_generator: Cp2kInputGenerator = field( + default_factory=DefectRelaxSetGenerator + ) + transformations: tuple[str, ...] = field( + default=("PerturbStructureTransformation",) + ) + transformation_params: tuple[dict, ...] | None = field( + default=({"distance": 0.01},) + ) + @dataclass class DefectCellOptMaker(BaseDefectMaker): @@ -108,24 +132,39 @@ class DefectCellOptMaker(BaseDefectMaker): """ name: str = field(default="defect relax") - input_set_generator: Cp2kInputGenerator = field(default_factory=DefectCellOptSetGenerator) - transformations: tuple[str, ...] = field(default=("PerturbStructureTransformation",)) - transformation_params: tuple[dict, ...] | None = field(default=({"distance": 0.01},)) + input_set_generator: Cp2kInputGenerator = field( + default_factory=DefectCellOptSetGenerator + ) + transformations: tuple[str, ...] = field( + default=("PerturbStructureTransformation",) + ) + transformation_params: tuple[dict, ...] | None = field( + default=({"distance": 0.01},) + ) + @dataclass class DefectHybridStaticMaker(BaseDefectMaker): name: str = field(default="defect hybrid static") - input_set_generator: DefectSetGenerator = field(default_factory=DefectHybridStaticSetGenerator) + input_set_generator: DefectSetGenerator = field( + default_factory=DefectHybridStaticSetGenerator + ) + @dataclass class DefectHybridRelaxMaker(BaseDefectMaker): name: str = field(default="defect hybrid relax") - input_set_generator: DefectSetGenerator = field(default_factory=DefectHybridRelaxSetGenerator) + input_set_generator: DefectSetGenerator = field( + default_factory=DefectHybridRelaxSetGenerator + ) + @dataclass class DefectHybridCellOptMaker(BaseDefectMaker): name: str = field(default="defect hybrid cell opt") - input_set_generator: DefectSetGenerator = field(default_factory=DefectHybridCellOptSetGenerator) + input_set_generator: DefectSetGenerator = field( + default_factory=DefectHybridCellOptSetGenerator + ) From 7317dab4d93ed52d1f8c784189f0335393bcafc4 Mon Sep 17 00:00:00 2001 From: nwinner Date: Thu, 12 Jan 2023 10:04:45 -0800 Subject: [PATCH 42/50] dfct --- src/atomate2/cp2k/jobs/defect.py | 33 ++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/src/atomate2/cp2k/jobs/defect.py b/src/atomate2/cp2k/jobs/defect.py index 04898f46a0..5637f79504 100644 --- a/src/atomate2/cp2k/jobs/defect.py +++ b/src/atomate2/cp2k/jobs/defect.py @@ -147,24 +147,57 @@ class DefectCellOptMaker(BaseDefectMaker): class DefectHybridStaticMaker(BaseDefectMaker): name: str = field(default="defect hybrid static") + hybrid_functional: str = "PBE0" input_set_generator: DefectSetGenerator = field( default_factory=DefectHybridStaticSetGenerator ) + def __post_init__(self): + """Update the input settings with hybrid_functional attribute""" + self.input_set_generator.user_input_settings.update( + {"activate_hybrid": {"hybrid_functional": self.hybrid_functional}} + ) + @dataclass class DefectHybridRelaxMaker(BaseDefectMaker): name: str = field(default="defect hybrid relax") + hybrid_functional: str = "PBE0" input_set_generator: DefectSetGenerator = field( default_factory=DefectHybridRelaxSetGenerator ) + transformations: tuple[str, ...] = field( + default=("PerturbStructureTransformation",) + ) + transformation_params: tuple[dict, ...] | None = field( + default=({"distance": 0.01},) + ) + + def __post_init__(self): + """Update the input settings with hybrid_functional attribute""" + self.input_set_generator.user_input_settings.update( + {"activate_hybrid": {"hybrid_functional": self.hybrid_functional}} + ) @dataclass class DefectHybridCellOptMaker(BaseDefectMaker): name: str = field(default="defect hybrid cell opt") + hybrid_functional: str = "PBE0" input_set_generator: DefectSetGenerator = field( default_factory=DefectHybridCellOptSetGenerator ) + transformations: tuple[str, ...] = field( + default=("PerturbStructureTransformation",) + ) + transformation_params: tuple[dict, ...] | None = field( + default=({"distance": 0.01},) + ) + + def __post_init__(self): + """Update the input settings with hybrid_functional attribute""" + self.input_set_generator.user_input_settings.update( + {"activate_hybrid": {"hybrid_functional": self.hybrid_functional}} + ) \ No newline at end of file From 7e79c82f5fa4caf5b921dd5e7c3c0dcd65af08c2 Mon Sep 17 00:00:00 2001 From: nwinner Date: Mon, 16 Jan 2023 18:00:00 -0800 Subject: [PATCH 43/50] Remove endline --- src/atomate2/cp2k/jobs/defect.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/atomate2/cp2k/jobs/defect.py b/src/atomate2/cp2k/jobs/defect.py index 5637f79504..39a4542679 100644 --- a/src/atomate2/cp2k/jobs/defect.py +++ b/src/atomate2/cp2k/jobs/defect.py @@ -200,4 +200,4 @@ def __post_init__(self): """Update the input settings with hybrid_functional attribute""" self.input_set_generator.user_input_settings.update( {"activate_hybrid": {"hybrid_functional": self.hybrid_functional}} - ) \ No newline at end of file + ) From 8f8e86fc49808bb448cafde83437e18f749f1c04 Mon Sep 17 00:00:00 2001 From: nwinner Date: Tue, 17 Jan 2023 11:06:59 -0800 Subject: [PATCH 44/50] trim --- src/atomate2/cp2k/builders/defect.py | 547 +++++++++++++++++---------- src/atomate2/cp2k/schemas/defect.py | 151 +++++--- 2 files changed, 430 insertions(+), 268 deletions(-) diff --git a/src/atomate2/cp2k/builders/defect.py b/src/atomate2/cp2k/builders/defect.py index 9f2ccce408..cc829b1d84 100644 --- a/src/atomate2/cp2k/builders/defect.py +++ b/src/atomate2/cp2k/builders/defect.py @@ -3,25 +3,20 @@ from typing import Dict, Iterator, List, Literal, Optional import numpy as np -from monty.json import MontyDecoder, jsanitize - +from emmet.core.electronic_structure import ElectronicStructureDoc +from emmet.core.material import MaterialsDoc from maggma.builders import Builder from maggma.stores import Store from maggma.utils import grouper - -from pymatgen.core import Structure +from monty.json import MontyDecoder, jsanitize from pymatgen.analysis.structure_matcher import ElementComparator, StructureMatcher -from pymatgen.symmetry.analyzer import SpacegroupAnalyzer +from pymatgen.core import Structure from pymatgen.io.cp2k.inputs import Cp2kInput +from pymatgen.symmetry.analyzer import SpacegroupAnalyzer -from emmet.core.material import MaterialsDoc - -from atomate2.settings import Atomate2Settings -from atomate2.cp2k.schemas.task import TaskDocument -from atomate2.cp2k.schemas.defect import DefectDoc, DefectiveMaterialDoc from atomate2.cp2k.schemas.calc_types import TaskType - -from emmet.core.electronic_structure import ElectronicStructureDoc +from atomate2.cp2k.schemas.defect import DefectDoc, DefectiveMaterialDoc +from atomate2.settings import Atomate2Settings __author__ = "Nicholas Winner " @@ -50,15 +45,15 @@ class DefectBuilder(Builder): 6.) Update the defect store """ - #TODO how to incorporate into settings? + # TODO how to incorporate into settings? DEFAULT_ALLOWED_DFCT_TASKS = [ - TaskType.Structure_Optimization.value, - ] + TaskType.Structure_Optimization.value, + ] DEFAULT_ALLOWED_BULK_TASKS = [ - TaskType.Structure_Optimization.value, - TaskType.Static.value - ] + TaskType.Structure_Optimization.value, + TaskType.Static.value, + ] def __init__( self, @@ -72,8 +67,10 @@ def __init__( query: Optional[Dict] = None, bulk_query: Optional[Dict] = None, allowed_dfct_types: Optional[List[str]] = DEFAULT_ALLOWED_DFCT_TASKS, - allowed_bulk_types: Optional[List[str]] = DEFAULT_ALLOWED_BULK_TASKS, - task_schema: Literal["cp2k"] = "cp2k", # TODO cp2k specific right now, but this will go in common eventually + allowed_bulk_types: Optional[List[str]] = DEFAULT_ALLOWED_BULK_TASKS, + task_schema: Literal[ + "cp2k" + ] = "cp2k", # TODO cp2k specific right now, but this will go in common eventually settings: Dict | None = None, **kwargs, ): @@ -100,11 +97,15 @@ def __init__( self.electronic_structure = electronic_structure self.electrostatic_potentials = electrostatic_potentials self.task_validation = task_validation - self._allowed_dfct_types = allowed_dfct_types #TODO How to incorporate into getitems? - self._allowed_bulk_types = allowed_bulk_types #TODO How to incorporate into getitems? + self._allowed_dfct_types = ( + allowed_dfct_types # TODO How to incorporate into getitems? + ) + self._allowed_bulk_types = ( + allowed_bulk_types # TODO How to incorporate into getitems? + ) settings = settings if settings else {} - self.settings = Atomate2Settings(**settings) # TODO don't think this is right + self.settings = Atomate2Settings(**settings) # TODO don't think this is right self.query = query if query else {} self.bulk_query = bulk_query if bulk_query else {} self.timestamp = None @@ -113,33 +114,49 @@ def __init__( self.kwargs = kwargs # TODO Long term, schemas should be part of the matching and grouping process so that a builder can be run on a mixture - self.query.update({'output.@module': f"atomate2.{self.task_schema}.schemas.task", "output.@class": "TaskDocument"}) - self.bulk_query.update({'output.@module': f"atomate2.{self.task_schema}.schemas.task", "output.@class": "TaskDocument"}) - self._defect_query = 'output.additional_json.info.defect' + self.query.update( + { + "output.@module": f"atomate2.{self.task_schema}.schemas.task", + "output.@class": "TaskDocument", + } + ) + self.bulk_query.update( + { + "output.@module": f"atomate2.{self.task_schema}.schemas.task", + "output.@class": "TaskDocument", + } + ) + self._defect_query = "output.additional_json.info.defect" self._required_defect_properties = [ self._defect_query, self.tasks.key, - 'output.output.energy', - 'output.output.structure', - 'output.input', - 'output.nsites', - 'output.cp2k_objects.v_hartree', - ] + "output.output.energy", + "output.output.structure", + "output.input", + "output.nsites", + "output.cp2k_objects.v_hartree", + ] self._required_bulk_properties = [ self.tasks.key, - 'output.output.energy', - 'output.output.structure', - 'output.input', - 'output.cp2k_objects.v_hartree', - 'output.output.vbm', - ] + "output.output.energy", + "output.output.structure", + "output.input", + "output.cp2k_objects.v_hartree", + "output.output.vbm", + ] self._optional_defect_properties = [] self._optional_bulk_properties = [] - sources = [tasks, dielectric, electronic_structure, materials, electrostatic_potentials] + sources = [ + tasks, + dielectric, + electronic_structure, + materials, + electrostatic_potentials, + ] if self.task_validation: sources.append(self.task_validation) super().__init__(sources=sources, targets=[defects], **kwargs) @@ -151,7 +168,7 @@ def defect_query(self) -> str: """ return self._defect_query - #TODO Hartree pot should be required but only for charged defects + # TODO Hartree pot should be required but only for charged defects @property def required_defect_properties(self) -> List: """ @@ -191,7 +208,7 @@ def allowed_dfct_types(self) -> set: @property def allowed_bulk_types(self) -> set: return {TaskType(t) for t in self._allowed_bulk_types} - + def ensure_indexes(self): """ Ensures indicies on the tasks and materials collections @@ -201,7 +218,7 @@ def ensure_indexes(self): self.tasks.ensure_index(self.tasks.key) self.tasks.ensure_index("output.last_updated") self.tasks.ensure_index("output.state") - self.tasks.ensure_index("output.formula_pretty") # TODO is necessary? + self.tasks.ensure_index("output.formula_pretty") # TODO is necessary? # Search index for materials self.materials.ensure_index("material_id") @@ -219,7 +236,7 @@ def ensure_indexes(self): def prechunk(self, number_splits: int) -> Iterator[Dict]: - tag_query = {} + tag_query = {} if len(self.settings.BUILD_TAGS) > 0 and len(self.settings.EXCLUDED_TAGS) > 0: tag_query["$and"] = [ {"tags": {"$in": self.settings.BUILD_TAGS}}, @@ -231,21 +248,29 @@ def prechunk(self, number_splits: int) -> Iterator[Dict]: # Get defect tasks temp_query = self.query.copy() temp_query.update(tag_query) - temp_query.update({d: {'$exists': True, "$ne": None} for d in self.required_defect_properties}) - temp_query.update({self.defect_query: {'$exists': True}, "state": "successful"}) + temp_query.update( + {d: {"$exists": True, "$ne": None} for d in self.required_defect_properties} + ) + temp_query.update({self.defect_query: {"$exists": True}, "state": "successful"}) defect_tasks = { doc[self.tasks.key] - for doc in self.tasks.query(criteria=temp_query, properties=[self.tasks.key]) + for doc in self.tasks.query( + criteria=temp_query, properties=[self.tasks.key] + ) } # Get bulk tasks temp_query = self.bulk_query.copy() temp_query.update(tag_query) - temp_query.update({d: {'$exists': True} for d in self.required_bulk_properties}) - temp_query.update({self.defect_query: {'$exists': False}, "state": "successful"}) + temp_query.update({d: {"$exists": True} for d in self.required_bulk_properties}) + temp_query.update( + {self.defect_query: {"$exists": False}, "state": "successful"} + ) bulk_tasks = { doc[self.tasks.key] - for doc in self.tasks.query(criteria=temp_query, properties=[self.tasks.key]) + for doc in self.tasks.query( + criteria=temp_query, properties=[self.tasks.key] + ) } N = np.ceil(len(defect_tasks) / number_splits) @@ -263,9 +288,9 @@ def get_items(self) -> Iterator[List[Dict]]: 1. Get all tasks with standard "defect" query tag 2. Filter all tasks by skipping tasks which are already in the Defect Store 3. Get all tasks that could be used as bulk - 4. Filter all bulks which do not have corresponding Dielectric and + 4. Filter all bulks which do not have corresponding Dielectric and ElectronicStructure data (if a band gap exists for that task). - 5. Group defect tasks by defect matching + 5. Group defect tasks by defect matching 6. Given defect object in a group, bundle them with bulk tasks identified with structure matching 7. Yield the item bundles @@ -274,7 +299,7 @@ def get_items(self) -> Iterator[List[Dict]]: Iterator of (defect documents, task bundles) The defect document is an existing defect doc to be updated with new data, or None - + task bundles bundle are all the tasks that correspond to the same defect and all possible bulk tasks that could be matched to them. """ @@ -297,46 +322,64 @@ def get_items(self) -> Iterator[List[Dict]]: ##### Get defect tasks ##### temp_query = self.query.copy() - temp_query.update({d: {'$exists': True, "$ne": None} for d in self.required_defect_properties}) - temp_query.update({self.defect_query: {'$exists': True}, "output.state": "successful"}) + temp_query.update( + {d: {"$exists": True, "$ne": None} for d in self.required_defect_properties} + ) + temp_query.update( + {self.defect_query: {"$exists": True}, "output.state": "successful"} + ) defect_tasks = { doc[self.tasks.key] - for doc in self.tasks.query(criteria=temp_query, properties=[self.tasks.key]) + for doc in self.tasks.query( + criteria=temp_query, properties=[self.tasks.key] + ) } # TODO Seems slow not_allowed = { - doc[self.tasks.key] + doc[self.tasks.key] for doc in self.tasks.query( - criteria={self.tasks.key: {"$in": list(defect_tasks)}}, - properties=['output.calcs_reversed'] - ) - if TaskType(doc['output']['calcs_reversed'][0]['task_type']) not in self.allowed_dfct_types + criteria={self.tasks.key: {"$in": list(defect_tasks)}}, + properties=["output.calcs_reversed"], + ) + if TaskType(doc["output"]["calcs_reversed"][0]["task_type"]) + not in self.allowed_dfct_types } if not_allowed: - self.logger.debug(f"{len(not_allowed)} defect tasks dropped. Not allowed TaskType") + self.logger.debug( + f"{len(not_allowed)} defect tasks dropped. Not allowed TaskType" + ) defect_tasks = defect_tasks - not_allowed ##### Get bulk tasks ##### temp_query = self.bulk_query.copy() - temp_query.update({d: {'$exists': True, "$ne": None} for d in self.required_bulk_properties}) - temp_query.update({self.defect_query: {'$exists': False}, "output.state": "successful"}) + temp_query.update( + {d: {"$exists": True, "$ne": None} for d in self.required_bulk_properties} + ) + temp_query.update( + {self.defect_query: {"$exists": False}, "output.state": "successful"} + ) bulk_tasks = { doc[self.tasks.key] - for doc in self.tasks.query(criteria=temp_query, properties=[self.tasks.key]) + for doc in self.tasks.query( + criteria=temp_query, properties=[self.tasks.key] + ) } - + # TODO seems slow not_allowed = { - doc[self.tasks.key] + doc[self.tasks.key] for doc in self.tasks.query( criteria={self.tasks.key: {"$in": list(bulk_tasks)}}, - properties=['output.calcs_reversed'] - ) - if TaskType(doc['output']['calcs_reversed'][0]['task_type']) not in self.allowed_bulk_types + properties=["output.calcs_reversed"], + ) + if TaskType(doc["output"]["calcs_reversed"][0]["task_type"]) + not in self.allowed_bulk_types } if not_allowed: - self.logger.debug(f"{len(not_allowed)} bulk tasks dropped. Not allowed TaskType") + self.logger.debug( + f"{len(not_allowed)} bulk tasks dropped. Not allowed TaskType" + ) bulk_tasks = bulk_tasks - not_allowed # TODO Not the same validation behavior as material builders? @@ -344,9 +387,7 @@ def get_items(self) -> Iterator[List[Dict]]: if self.task_validation: validated = { doc[self.tasks.key] - for doc in self.task_validation.query( - {}, [self.task_validation.key] - ) + for doc in self.task_validation.query({}, [self.task_validation.key]) } defect_tasks = defect_tasks.intersection(validated) @@ -358,7 +399,7 @@ def get_items(self) -> Iterator[List[Dict]]: {"is_valid": False}, [self.task_validation.key] ) } - self.logger.info("Removing {} invalid tasks".format(len(invalid_ids))) + self.logger.info(f"Removing {len(invalid_ids)} invalid tasks") defect_tasks = defect_tasks - invalid_ids bulk_tasks = bulk_tasks - invalid_ids @@ -369,11 +410,11 @@ def get_items(self) -> Iterator[List[Dict]]: } all_tasks = defect_tasks | bulk_tasks - self.logger.debug("All tasks: {}".format(len(all_tasks))) - self.logger.debug("Bulk tasks before filter: {}".format(len(bulk_tasks))) + self.logger.debug(f"All tasks: {len(all_tasks)}") + self.logger.debug(f"Bulk tasks before filter: {len(bulk_tasks)}") bulk_tasks = set(filter(self.__preprocess_bulk, bulk_tasks)) - self.logger.debug("Bulk tasks after filter: {}".format(len(bulk_tasks))) - self.logger.debug("All defect tasks: {}".format(len(defect_tasks))) + self.logger.debug(f"Bulk tasks after filter: {len(bulk_tasks)}") + self.logger.debug(f"All defect tasks: {len(defect_tasks)}") unprocessed_defect_tasks = defect_tasks - processed_defect_tasks if not unprocessed_defect_tasks: @@ -383,8 +424,12 @@ def get_items(self) -> Iterator[List[Dict]]: self.logger.info("No compatible bulk calculations. Exiting.") return - self.logger.info(f"Found {len(unprocessed_defect_tasks)} unprocessed defect tasks") - self.logger.info(f"Found {len(bulk_tasks)} bulk tasks with dielectric properties") + self.logger.info( + f"Found {len(unprocessed_defect_tasks)} unprocessed defect tasks" + ) + self.logger.info( + f"Found {len(bulk_tasks)} bulk tasks with dielectric properties" + ) # Set total for builder bars to have a total self.total = len(unprocessed_defect_tasks) @@ -392,10 +437,12 @@ def get_items(self) -> Iterator[List[Dict]]: # yield list of defects that are of the same type, matched to an appropriate bulk calc self.logger.info(f"Starting defect matching.") - for defect, defect_task_group in self.__filter_and_group_tasks(unprocessed_defect_tasks): + for defect, defect_task_group in self.__filter_and_group_tasks( + unprocessed_defect_tasks + ): task_ids = self.__match_defects_to_bulks(bulk_tasks, defect_task_group) if not task_ids: - continue + continue doc = self.__get_defect_doc(defect) if doc: self.logger.info(f"DOC IS {doc.defect.__repr__()}") @@ -416,18 +463,34 @@ def process_item(self, items): returns: the defect document as a dictionary """ defect_doc, item_bundle, material_id, task_ids = items - self.logger.info(f"Processing group of {len(item_bundle)} defects into DefectDoc") + self.logger.info( + f"Processing group of {len(item_bundle)} defects into DefectDoc" + ) if item_bundle: for _, (defect_task, bulk_task, dielectric) in item_bundle.items(): if not defect_doc: defect_doc = DefectDoc.from_tasks( - defect_task=defect_task, bulk_task=bulk_task, dielectric=dielectric, - query=self.defect_query, key=self.tasks.key, material_id=material_id - ) + defect_task=defect_task, + bulk_task=bulk_task, + dielectric=dielectric, + query=self.defect_query, + key=self.tasks.key, + material_id=material_id, + ) else: - defect_doc.update_one(defect_task, bulk_task, dielectric, query=self.defect_query, key=self.tasks.key) # TODO Atomate2Store wrapper - defect_doc.task_ids = list(set(task_ids + defect_doc.task_ids)) # TODO should I store the bulk id too? - return jsanitize(defect_doc.dict(), allow_bson=True, enum_values=True, strict=True) + defect_doc.update_one( + defect_task, + bulk_task, + dielectric, + query=self.defect_query, + key=self.tasks.key, + ) # TODO Atomate2Store wrapper + defect_doc.task_ids = list( + set(task_ids + defect_doc.task_ids) + ) # TODO should I store the bulk id too? + return jsanitize( + defect_doc.dict(), allow_bson=True, enum_values=True, strict=True + ) return {} def update_targets(self, items): @@ -443,10 +506,10 @@ def update_targets(self, items): item.update({"_bt": self.timestamp}) self.defects.remove_docs( { - "task_ids": item['task_ids'], + "task_ids": item["task_ids"], } ) - self.defects.update(items, key='task_ids') + self.defects.update(items, key="task_ids") else: self.logger.info("No items to update") @@ -463,37 +526,36 @@ def __filter_and_group_tasks(self, tasks): [ (defect, [task_ids] ), ...] where task_ids correspond to the same defect """ - props = [ - self.defect_query, - self.tasks.key, - 'output.structure' - ] + props = [self.defect_query, self.tasks.key, "output.structure"] self.logger.debug(f"Finding equivalent tasks for {len(tasks)} defects") - sm = StructureMatcher(allow_subset=False) #TODO build settings + sm = StructureMatcher(allow_subset=False) # TODO build settings defects = [ { - self.tasks.key: t[self.tasks.key], 'defect': self.__get_defect_from_task(t), - 'structure': Structure.from_dict(t['output']['structure']) + self.tasks.key: t[self.tasks.key], + "defect": self.__get_defect_from_task(t), + "structure": Structure.from_dict(t["output"]["structure"]), } - for t in self.tasks.query(criteria={self.tasks.key: {'$in': list(tasks)}}, properties=props) + for t in self.tasks.query( + criteria={self.tasks.key: {"$in": list(tasks)}}, properties=props + ) ] for d in defects: # TODO remove oxidation state because spins/oxidation cause errors in comparison. # but they shouldnt if those props are close in value - d['structure'].remove_oxidation_states() - d['defect'].user_charges = [d['structure'].charge] + d["structure"].remove_oxidation_states() + d["defect"].user_charges = [d["structure"].charge] def key(x): - s = x['defect'].structure + s = x["defect"].structure return get_sg(s), s.composition.reduced_composition def are_equal(x, y): """To decide if defects are equal.""" - if x['structure'].charge != y['structure'].charge: + if x["structure"].charge != y["structure"].charge: return False - if x['defect'] == y['defect']: + if x["defect"] == y["defect"]: return True return False @@ -506,11 +568,21 @@ def are_equal(x, y): while len(unmatched) > 0: i, refs = unmatched.pop(0) matches = [i] - inds = list(filter(lambda j: are_equal(refs, unmatched[j][1]), list(range(len(unmatched))))) + inds = list( + filter( + lambda j: are_equal(refs, unmatched[j][1]), + list(range(len(unmatched))), + ) + ) matches.extend([unmatched[i][0] for i in inds]) - unmatched = [unmatched[i] for i in range(len(unmatched)) if i not in inds] + unmatched = [ + unmatched[i] for i in range(len(unmatched)) if i not in inds + ] all_groups.append( - (defects[i]['defect'], [defects[i][self.tasks.key] for i in matches]) + ( + defects[i]["defect"], + [defects[i][self.tasks.key] for i in matches], + ) ) self.logger.debug(f"{len(all_groups)} groups") @@ -532,7 +604,9 @@ def __get_defect_doc(self, defect): material_id = self._get_mpid(defect.structure) docs = [ DefectDoc(**doc) - for doc in self.defects.query(criteria={'material_id': material_id}, properties=None) + for doc in self.defects.query( + criteria={"material_id": material_id}, properties=None + ) ] for doc in docs: if self.__defect_match(defect, doc.defect): @@ -546,8 +620,9 @@ def __defect_match(self, x, y): return False # Elem. changes needed to distinguish ghost vacancies - if x.element_changes == y.element_changes and \ - sm.fit(x.defect_structure, y.defect_structure): + if x.element_changes == y.element_changes and sm.fit( + x.defect_structure, y.defect_structure + ): return True return False @@ -559,11 +634,13 @@ def __get_dielectric(self, key): and retrieve the total dielectric tensor for defect analysis. If no dielectric exists, as would be the case for metallic systems, return None. """ - for diel in self.dielectric.query(criteria={"material_id": key}, properties=['total']): - return diel['total'] + for diel in self.dielectric.query( + criteria={"material_id": key}, properties=["total"] + ): + return diel["total"] return None - #TODO retrieving the electrostatic potential is by far the most expesive part of the builder. Any way to reduce? + # TODO retrieving the electrostatic potential is by far the most expesive part of the builder. Any way to reduce? def __get_item_bundle(self, task_ids): """ Gets a group of items that can be processed together into a defect document. @@ -576,11 +653,12 @@ def __get_item_bundle(self, task_ids): """ return { rt: ( - self.tasks.query_one(criteria={self.tasks.key: pairs[0]}, load=True), - self.tasks.query_one(criteria={self.tasks.key: pairs[1]}, load=True), - self.__get_dielectric(self._mpid_map[pairs[1]]) - ) for rt, pairs in task_ids.items() - } + self.tasks.query_one(criteria={self.tasks.key: pairs[0]}, load=True), + self.tasks.query_one(criteria={self.tasks.key: pairs[1]}, load=True), + self.__get_dielectric(self._mpid_map[pairs[1]]), + ) + for rt, pairs in task_ids.items() + } def _get_mpid(self, structure): """ @@ -592,17 +670,22 @@ def _get_mpid(self, structure): returns: material_id, if one exists, else None """ - sga = SpacegroupAnalyzer(structure, symprec=self.settings.SYMPREC) # TODO Add angle tolerance + sga = SpacegroupAnalyzer( + structure, symprec=self.settings.SYMPREC + ) # TODO Add angle tolerance mats = self.materials.query( criteria={ - 'chemsys': structure.composition.chemical_system, - }, properties=['structure', 'material_id'] + "chemsys": structure.composition.chemical_system, + }, + properties=["structure", "material_id"], ) # TODO coudl more than one material match true? - sm = StructureMatcher() # TODO add tolerances + sm = StructureMatcher( + primitive_cell=True, comparator=ElementComparator() + ) # TODO add tolerances for m in mats: - if sm.fit(structure, Structure.from_dict(m['structure'])): - return m['material_id'] + if sm.fit(structure, Structure.from_dict(m["structure"])): + return m["material_id"] return None def __match_defects_to_bulks(self, bulk_ids, defect_ids) -> list[tuple]: @@ -620,26 +703,30 @@ def __match_defects_to_bulks(self, bulk_ids, defect_ids) -> list[tuple]: # TODO mongo projection on array doesn't work (see above) props = [ self.tasks.key, - self.defect_query, - 'output.input', - 'output.nsites', - 'output.output.structure', - 'output.output.energy', - 'output.calcs_reversed' + self.defect_query, + "output.input", + "output.nsites", + "output.output.structure", + "output.output.energy", + "output.calcs_reversed", ] - defects = list(self.tasks.query(criteria={self.tasks.key: {'$in': list(defect_ids)}}, properties=props)) + defects = list( + self.tasks.query( + criteria={self.tasks.key: {"$in": list(defect_ids)}}, properties=props + ) + ) ps = self.__get_pristine_supercell(defects[0]) - ps.remove_oxidation_states() # TODO might cause problems + ps.remove_oxidation_states() # TODO might cause problems bulks = list( self.tasks.query( criteria={ - self.tasks.key: {'$in': list(bulk_ids)}, - 'output.formula_pretty': jsanitize(ps.composition.reduced_formula), + self.tasks.key: {"$in": list(bulk_ids)}, + "output.formula_pretty": jsanitize(ps.composition.reduced_formula), }, - properties=props + properties=props, ) - ) - + ) + pairs = [ (defect, bulk) for bulk in bulks @@ -649,22 +736,23 @@ def __match_defects_to_bulks(self, bulk_ids, defect_ids) -> list[tuple]: self.logger.debug(f"Found {len(pairs)} commensurate bulk/defect pairs") def key(x): - return -x[0]['output']['nsites'], x[0]['output']['output']['energy'] + return -x[0]["output"]["nsites"], x[0]["output"]["output"]["energy"] + def _run_type(x): - return x[0]['output']['calcs_reversed'][0]['run_type'] + return x[0]["output"]["calcs_reversed"][0]["run_type"] rt_pairs = {} for rt, group in groupby(pairs, key=_run_type): rt_pairs[rt] = [ - (defect[self.tasks.key], bulk[self.tasks.key]) + (defect[self.tasks.key], bulk[self.tasks.key]) for defect, bulk in sorted(list(group), key=key) - ] + ] # Return only the first (best) pair for each rt return {rt: lst[0] for rt, lst in rt_pairs.items()} - # TODO Checking for same dft settings (e.g. OT/diag) is a little cumbersome. - # Maybe, in future, task doc can be defined to have OT/diag as part of input summary + # TODO Checking for same dft settings (e.g. OT/diag) is a little cumbersome. + # Maybe, in future, task doc can be defined to have OT/diag as part of input summary # for fast querying def __are_bulk_and_defect_commensurate(self, b, d): """ @@ -673,41 +761,47 @@ def __are_bulk_and_defect_commensurate(self, b, d): Checks for: 1. Same run type. 2. Same pristine structures with no supercell reduction - 3. Compatible DFT settings + 3. Compatible DFT settings """ # TODO add settings sm = StructureMatcher( - ltol = 1e-3, - stol = 0.1, - angle_tol = 1, + ltol=1e-3, + stol=0.1, + angle_tol=1, primitive_cell=False, scale=True, attempt_supercell=False, allow_subset=False, comparator=ElementComparator(), ) - rtb = b.get('output').get('input').get('xc').split("+U")[0] - rtd = d.get('output').get('input').get('xc').split("+U")[0] + rtb = b.get("output").get("input").get("xc").split("+U")[0] + rtd = d.get("output").get("input").get("xc").split("+U")[0] baux = { - dat['element']: dat.get('auxiliary_basis') - for dat in b['output']['input']['atomic_kind_info']['atomic_kinds'].values() - } + dat["element"]: dat.get("auxiliary_basis") + for dat in b["output"]["input"]["atomic_kind_info"]["atomic_kinds"].values() + } daux = { - dat['element']: dat.get('auxiliary_basis') - for dat in d['output']['input']['atomic_kind_info']['atomic_kinds'].values() - } + dat["element"]: dat.get("auxiliary_basis") + for dat in d["output"]["input"]["atomic_kind_info"]["atomic_kinds"].values() + } - if rtb == rtd: - if sm.fit(self.__get_pristine_supercell(d), self.__get_pristine_supercell(b)): - cib = Cp2kInput.from_dict(b['output']['calcs_reversed'][0]['input']['cp2k_input']) - cid = Cp2kInput.from_dict(d['output']['calcs_reversed'][0]['input']['cp2k_input']) - bis_ot = cib.check("force_eval/dft/scf/ot") - dis_ot = cid.check("force_eval/dft/scf/ot") - if (bis_ot and dis_ot) or (not bis_ot and not dis_ot): - for el in baux: - if baux[el] != daux[el]: - return False - return True + if rtb == rtd: + if sm.fit( + self.__get_pristine_supercell(d), self.__get_pristine_supercell(b) + ): + cib = Cp2kInput.from_dict( + b["output"]["calcs_reversed"][0]["input"]["cp2k_input"] + ) + cid = Cp2kInput.from_dict( + d["output"]["calcs_reversed"][0]["input"]["cp2k_input"] + ) + bis_ot = cib.check("force_eval/dft/scf/ot") + dis_ot = cid.check("force_eval/dft/scf/ot") + if (bis_ot and dis_ot) or (not bis_ot and not dis_ot): + for el in baux: + if baux[el] != daux[el]: + return False + return True return False def __preprocess_bulk(self, task): @@ -720,10 +814,17 @@ def __preprocess_bulk(self, task): (3) If bulk is not a metal, electronic structure document must exist in the store """ - self.logger.debug("Preprocessing bulk task {}".format(task)) - t = next(self.tasks.query(criteria={self.tasks.key: task}, properties=['output.output.structure', 'mpid'])) + self.logger.debug(f"Preprocessing bulk task {task}") + t = next( + self.tasks.query( + criteria={self.tasks.key: task}, + properties=["output.output.structure", "mpid"], + ) + ) - struc = Structure.from_dict(t.get('output').get('output').get('structure')) # TODO specific to atomate2 + struc = Structure.from_dict( + t.get("output").get("output").get("structure") + ) # TODO specific to atomate2 mpid = self._get_mpid(struc) if not mpid: self.logger.debug(f"No material id found for bulk task {task}") @@ -732,18 +833,20 @@ def __preprocess_bulk(self, task): self.logger.debug(f"Material ID: {mpid}") elec = self.electronic_structure.query_one( - properties=['band_gap'], criteria={self.electronic_structure.key: mpid} - ) + properties=["band_gap"], criteria={self.electronic_structure.key: mpid} + ) if not elec: self.logger.debug(f"Electronic structure data not found for {mpid}") return False # TODO right now pulling dos from electronic structure, should just pull summary document - if elec['band_gap'] > 0: + if elec["band_gap"] > 0: diel = self.__get_dielectric(mpid) if not diel: - self.logger.info(f"Task {task} for {mpid} ({struc.composition.reduced_formula}) requires " - f"dielectric properties, but none found in dielectric store") + self.logger.info( + f"Task {task} for {mpid} ({struc.composition.reduced_formula}) requires " + f"dielectric properties, but none found in dielectric store" + ) return False return True @@ -762,7 +865,9 @@ def __get_pristine_supercell(self, task): new_lattice = Lattice(np.dot(scale_matrix, self._lattice.matrix)) """ d = unpack(query=self.defect_query, d=task) - out_structure = MontyDecoder().process_decoded(task['output']['output']['structure']) + out_structure = MontyDecoder().process_decoded( + task["output"]["output"]["structure"] + ) if d: defect = MontyDecoder().process_decoded(d) s = defect.structure.copy() @@ -785,12 +890,12 @@ class DefectiveMaterialBuilder(Builder): """ def __init__( - self, - defects: Store, - defect_thermos: Store, - materials: Store, - query: Optional[Dict] = None, - **kwargs, + self, + defects: Store, + defect_thermos: Store, + materials: Store, + query: Optional[Dict] = None, + **kwargs, ): """ Args: @@ -809,7 +914,9 @@ def __init__( self.timestamp = None self.kwargs = kwargs - super().__init__(sources=[defects, materials], targets=[defect_thermos], **kwargs) + super().__init__( + sources=[defects, materials], targets=[defect_thermos], **kwargs + ) def ensure_indexes(self): """ @@ -848,19 +955,23 @@ def get_items(self) -> Iterator[List[Dict]]: temp_query = dict(self.query) temp_query["state"] = "successful" - #unprocessed_defect_tasks = all_tasks - processed_defect_tasks + # unprocessed_defect_tasks = all_tasks - processed_defect_tasks all_docs = [doc for doc in self.defects.query(self.query)] self.logger.debug(f"Found {len(all_docs)} defect docs to process") def filterfunc(x): - if not self.materials.query_one(criteria={'material_id': x['material_id']}, properties=None): - self.logger.debug(f"No material with MPID={x['material_id']} in the material store") + if not self.materials.query_one( + criteria={"material_id": x["material_id"]}, properties=None + ): + self.logger.debug( + f"No material with MPID={x['material_id']} in the material store" + ) return False return True - defect = MontyDecoder().process_decoded(x['defect']) - for el in defect.element_changes: + defect = MontyDecoder().process_decoded(x["defect"]) + for el in defect.element_changes: if el not in self.thermo: self.logger.debug(f"No entry for {el} in Thermo Store") return False @@ -868,10 +979,8 @@ def filterfunc(x): return True for key, group in groupby( - filter( - filterfunc, - sorted(all_docs, key=lambda x: x['material_id']) - ), key=lambda x: x['material_id'] + filter(filterfunc, sorted(all_docs, key=lambda x: x["material_id"])), + key=lambda x: x["material_id"], ): try: yield list(group) @@ -884,7 +993,9 @@ def process_item(self, defects): """ defect_docs = [DefectDoc(**d) for d in defects] self.logger.info(f"Processing {len(defect_docs)} defects") - defect_thermo_doc = DefectiveMaterialDoc.from_docs(defect_docs, material_id=defect_docs[0].material_id) + defect_thermo_doc = DefectiveMaterialDoc.from_docs( + defect_docs, material_id=defect_docs[0].material_id + ) return defect_thermo_doc.dict() def update_targets(self, items): @@ -917,8 +1028,10 @@ def __get_electronic_structure(self, material_id): criteria={self.electronic_structures.key: material_id}, properties=None, ) - t_id = ElectronicStructureDoc(**dosdoc).dos.total['1'].task_id - dos = self.dos.query_one(criteria={'task_id': int(t_id)}, properties=None) #TODO MPID str/int issues + t_id = ElectronicStructureDoc(**dosdoc).dos.total["1"].task_id + dos = self.dos.query_one( + criteria={"task_id": int(t_id)}, properties=None + ) # TODO MPID str/int issues return dos def __get_materials(self, key) -> List: @@ -926,7 +1039,7 @@ def __get_materials(self, key) -> List: Given a group of DefectDocs, use the bulk material_id to get materials in the chemsys from the materials store. """ - bulk = self.materials.query_one(criteria={'material_id': key}, properties=None) + bulk = self.materials.query_one(criteria={"material_id": key}, properties=None) if not bulk: raise LookupError( f"The bulk material ({key}) for these defects cannot be found in the materials store" @@ -934,35 +1047,48 @@ def __get_materials(self, key) -> List: return MaterialsDoc(**bulk) def __get_thermos(self, composition) -> List: - return list(self.thermo.query(criteria={'elements': {"$size": 1}}, properties=None)) + return list( + self.thermo.query(criteria={"elements": {"$size": 1}}, properties=None) + ) class DefectValidator(Builder): - def __init__( - self, - tasks: Store, - defect_validation: Store, + self, + tasks: Store, + defect_validation: Store, chunk_size: int = 1000, - defect_query = 'output.additional_json.info.defect', - ): + defect_query="output.additional_json.info.defect", + ): self.tasks = tasks self.defect_validation = defect_validation self.chunk_size = chunk_size self.defect_query = defect_query - super().__init__(sources=tasks, targets=defect_validation, chunk_size=chunk_size) + super().__init__( + sources=tasks, targets=defect_validation, chunk_size=chunk_size + ) def get_items(self): self.logger.info("Getting tasks") - tids = list(self.tasks.query(criteria={self.defect_query: {"$exists": True}}, properties=[self.tasks.key])) + tids = list( + self.tasks.query( + criteria={self.defect_query: {"$exists": True}}, + properties=[self.tasks.key], + ) + ) self.logger.info(f"{len(tids)} to process") - for t in self.tasks.query(): - yield t - + yield from self.tasks.query() + def process_item(self, item): from atomate2.cp2k.schemas.defect import DefectValidation + tid = item[self.tasks.key] - return jsanitize(DefectValidation.process_task(item, tid).dict(), allow_bson=True, enum_values=True, strict=True) + return jsanitize( + DefectValidation.process_task(item, tid).dict(), + allow_bson=True, + enum_values=True, + strict=True, + ) def update_targets(self, items: List): """ @@ -994,8 +1120,9 @@ def unpack(query, d): return unpack(query.split("."), d) return unpack(query[1:], d.__getitem__(query.pop(0))) + # TODO SHOULD GO IN COMMON -def get_sg(struc, symprec=.01) -> int: +def get_sg(struc, symprec=0.01) -> int: """helper function to get spacegroup with a loose tolerance""" try: return struc.get_space_group_info(symprec=symprec)[1] diff --git a/src/atomate2/cp2k/schemas/defect.py b/src/atomate2/cp2k/schemas/defect.py index 4e6120dfab..800f50dbf4 100644 --- a/src/atomate2/cp2k/schemas/defect.py +++ b/src/atomate2/cp2k/schemas/defect.py @@ -1,35 +1,30 @@ from datetime import datetime -from tokenize import group -from typing import ClassVar, TypeVar, Type, Dict, Tuple, Mapping, List, Callable -from pydantic import BaseModel, Field -from pydantic import validator -from itertools import groupby +from typing import Callable, ClassVar, Dict, List, Mapping, Tuple, Type, TypeVar import numpy as np - from monty.json import MontyDecoder from monty.tempfile import ScratchDir - -from pymatgen.core import Structure, Element -from pymatgen.symmetry.analyzer import SpacegroupAnalyzer -from pymatgen.entries.computed_entries import ComputedEntry, ComputedStructureEntry -from pymatgen.io.cp2k.utils import get_truncated_coulomb_cutoff -from pymatgen.analysis.phase_diagram import PhaseDiagram -from pymatgen.analysis.defects.core import Defect, Adsorbate +from pydantic import BaseModel, Field +from pymatgen.analysis.defects.core import Adsorbate, Defect from pymatgen.analysis.defects.corrections.freysoldt import ( - get_freysoldt_correction, get_freysoldt2d_correction, + get_freysoldt_correction, ) +from pymatgen.analysis.defects.finder import DefectSiteFinder from pymatgen.analysis.defects.thermo import ( DefectEntry, DefectSiteFinder, - MultiFormationEnergyDiagram + MultiFormationEnergyDiagram, ) -from pymatgen.analysis.defects.finder import DefectSiteFinder +from pymatgen.analysis.phase_diagram import PhaseDiagram +from pymatgen.core import Element +from pymatgen.entries.computed_entries import ComputedEntry, ComputedStructureEntry +from pymatgen.io.cp2k.utils import get_truncated_coulomb_cutoff +from pymatgen.symmetry.analyzer import SpacegroupAnalyzer from atomate2 import SETTINGS from atomate2.common.schemas.structure import StructureMetadata -from atomate2.cp2k.schemas.calc_types.enums import CalcType, TaskType, RunType +from atomate2.cp2k.schemas.calc_types.enums import RunType from atomate2.cp2k.schemas.task import TaskDocument __all__ = ["DefectDoc"] @@ -38,6 +33,7 @@ S = TypeVar("S", bound="DefectiveMaterialDoc") V = TypeVar("V", bound="DefectValidation") + class DefectDoc(StructureMetadata): """ A document used to represent a single defect. e.g. a O vacancy with a -2 charge. @@ -57,8 +53,12 @@ class DefectDoc(StructureMetadata): material_id: str = Field( None, description="Unique material ID for the bulk material" ) # TODO Change to MPID - defect_ids: Mapping[RunType, str] = Field(None, description="Map run types of defect entry to task id") - bulk_ids: Mapping[RunType, str] = Field(None, description="Map run types of bulk entry to task id") + defect_ids: Mapping[RunType, str] = Field( + None, description="Map run types of defect entry to task id" + ) + bulk_ids: Mapping[RunType, str] = Field( + None, description="Map run types of bulk entry to task id" + ) task_ids: List[str] = Field( None, description="All defect task ids used in creating this defect doc." ) @@ -81,9 +81,13 @@ class DefectDoc(StructureMetadata): default_factory=datetime.utcnow, ) metadata: Dict = Field(None, description="Metadata for this defect") - valid: Mapping[RunType, Dict] = Field(None, description="Whether each run type has a valid entry") + valid: Mapping[RunType, Dict] = Field( + None, description="Whether each run type has a valid entry" + ) - def update_one(self, defect_task, bulk_task, dielectric, query="defect", key="task_id"): + def update_one( + self, defect_task, bulk_task, dielectric, query="defect", key="task_id" + ): # Metadata self.last_updated = datetime.now() @@ -92,25 +96,25 @@ def update_one(self, defect_task, bulk_task, dielectric, query="defect", key="ta defect = self.get_defect_from_task(query=query, task=defect_task) d_id = defect_task[key] b_id = bulk_task[key] - defect_task = TaskDocument(**defect_task['output']) - bulk_task = TaskDocument(**bulk_task['output']) # TODO Atomate2Store + defect_task = TaskDocument(**defect_task["output"]) + bulk_task = TaskDocument(**bulk_task["output"]) # TODO Atomate2Store defect_entry, valid = self.get_defect_entry_from_tasks( defect_task, bulk_task, defect, dielectric ) bulk_entry = self.get_bulk_entry_from_task(bulk_task) rt = defect_task.calcs_reversed[0].run_type - tt = defect_task.calcs_reversed[0].task_type - ct = defect_task.calcs_reversed[0].calc_type - current_largest_sc = self.defect_entries[rt].sc_entry.composition.num_atoms if rt in self.defect_entries else 0 + defect_task.calcs_reversed[0].task_type + defect_task.calcs_reversed[0].calc_type + current_largest_sc = ( + self.defect_entries[rt].sc_entry.composition.num_atoms + if rt in self.defect_entries + else 0 + ) potential_largest_sc = defect_entry.sc_entry.composition.num_atoms - if ( - potential_largest_sc > current_largest_sc - or ( - potential_largest_sc == current_largest_sc - and defect_entry.sc_entry.energy - < self.defect_entries[rt].sc_entry.energy - ) + if potential_largest_sc > current_largest_sc or ( + potential_largest_sc == current_largest_sc + and defect_entry.sc_entry.energy < self.defect_entries[rt].sc_entry.energy ): self.defect_entries[rt] = defect_entry self.defect_ids[rt] = d_id @@ -135,7 +139,15 @@ def update_many( ) @classmethod - def from_tasks(cls: Type[T], defect_task, bulk_task, dielectric, query="defect", key="task_id", material_id=None) -> T: + def from_tasks( + cls: Type[T], + defect_task, + bulk_task, + dielectric, + query="defect", + key="task_id", + material_id=None, + ) -> T: """ The standard way to create this document. Args: @@ -147,16 +159,18 @@ def from_tasks(cls: Type[T], defect_task, bulk_task, dielectric, query="defect", defect = cls.get_defect_from_task(query=query, task=defect_task) defect_task = TaskDocument(**defect_task["output"]) bulk_task_id = bulk_task[key] - bulk_task = TaskDocument(**bulk_task['output']) + bulk_task = TaskDocument(**bulk_task["output"]) # Metadata - last_updated = datetime.now() - created_at = datetime.now() + last_updated = datetime.now() + created_at = datetime.now() rt = defect_task.calcs_reversed[0].run_type metadata = {} - defect_entry, valid = cls.get_defect_entry_from_tasks(defect_task, bulk_task, defect, dielectric) + defect_entry, valid = cls.get_defect_entry_from_tasks( + defect_task, bulk_task, defect, dielectric + ) valid = {rt: valid} defect_entries = {rt: defect_entry} bulk_entries = {rt: cls.get_bulk_entry_from_task(bulk_task)} @@ -187,7 +201,9 @@ def from_tasks(cls: Type[T], defect_task, bulk_task, dielectric, query="defect", "metadata": metadata, "valid": valid, } - prim = SpacegroupAnalyzer(defect_entries[rt].defect.structure).get_primitive_standard_structure() + prim = SpacegroupAnalyzer( + defect_entries[rt].defect.structure + ).get_primitive_standard_structure() data.update(StructureMetadata.from_structure(prim).dict()) return cls(**data) @@ -229,7 +245,7 @@ def get_defect_entry_from_tasks( sc_defect_frac_coords=parameters["defect_frac_sc_coords"], corrections=corrections, ) - parameters['defect'] = defect + parameters["defect"] = defect valid = DefectValidation().process_entry(parameters) return defect_entry, valid @@ -255,7 +271,9 @@ def get_freysoldt_correction(cls, parameters) -> Tuple[Dict, Dict]: if parameters["charge_state"] and not parameters.get("2d"): es, pot, met = get_freysoldt_correction( q=parameters["charge_state"], - dielectric=np.array(parameters["dielectric"]), # TODO pmg-analysis expects np array here + dielectric=np.array( + parameters["dielectric"] + ), # TODO pmg-analysis expects np array here defect_locpot=parameters["defect_v_hartree"], bulk_locpot=parameters["bulk_v_hartree"], defect_frac_coords=parameters["defect_frac_sc_coords"], @@ -268,7 +286,7 @@ def get_freysoldt2d_correction(cls, parameters): from pymatgen.io.vasp.outputs import VolumetricData as VaspVolumetricData - if False: #parameters["charge_state"] and parameters.get("2d"): + if parameters["charge_state"] and parameters.get("2d"): eps_parallel = ( parameters["dielectric"][0][0] + parameters["dielectric"][1][1] ) / 2 @@ -354,19 +372,22 @@ def get_parameters_from_tasks( return parameters + class DefectValidation(BaseModel): """Validate a task document for defect processing""" MAX_ATOMIC_RELAXATION: float = Field( - 0.02, - description="Threshold for the mean absolute displacement of atoms outside a defect's radius of isolution" - ) + 0.02, + description="Threshold for the mean absolute displacement of atoms outside a defect's radius of isolution", + ) - DESORPTION_DISTANCE: float = Field(3, description="Distance to consider adsorbate as desorbed") + DESORPTION_DISTANCE: float = Field( + 3, description="Distance to consider adsorbate as desorbed" + ) def process_entry(self, parameters) -> V: """Gets a dictionary of {validator: result}. Result true for passing, false for failing.""" - v = {} + v = {} v.update(self._atomic_relaxation(parameters)) v.update(self._desorption(parameters)) return v @@ -375,10 +396,16 @@ def _atomic_relaxation(self, parameters): """Returns false if the mean displacement outside the isolation radius is greater than the cutoff""" in_struc = parameters["initial_defect_structure"] out_struc = parameters["final_defect_structure"] - sites = out_struc.get_sites_in_sphere(parameters['defect_frac_sc_coords'], get_truncated_coulomb_cutoff(in_struc), include_index=True) + sites = out_struc.get_sites_in_sphere( + parameters["defect_frac_sc_coords"], + get_truncated_coulomb_cutoff(in_struc), + include_index=True, + ) inside_sphere = [site.index for site in sites] outside_sphere = [i for i in range(len(out_struc)) if i not in inside_sphere] - distances = np.array([site.distance(in_struc[i]) for i, site in enumerate(out_struc)]) + distances = np.array( + [site.distance(in_struc[i]) for i, site in enumerate(out_struc)] + ) distances_outside = distances[outside_sphere] if np.mean(distances_outside) > self.MAX_ATOMIC_RELAXATION: return {"atomic_relaxation": False} @@ -386,16 +413,24 @@ def _atomic_relaxation(self, parameters): def _desorption(self, parameters): """Returns false if any atom is too far from all other atoms.""" - if isinstance(parameters['defect'], Adsorbate): + if isinstance(parameters["defect"], Adsorbate): out_struc = parameters["final_defect_structure"] - defect_site = out_struc.get_sites_in_sphere( - out_struc.lattice.get_cartesian_coords(parameters['defect_frac_sc_coords']), - 0.1, include_index=True - )[0] - distances = [defect_site.distance(site) for i, site in enumerate(out_struc) if i != defect_site.index] + defect_site = out_struc.get_sites_in_sphere( + out_struc.lattice.get_cartesian_coords( + parameters["defect_frac_sc_coords"] + ), + 0.1, + include_index=True, + )[0] + distances = [ + defect_site.distance(site) + for i, site in enumerate(out_struc) + if i != defect_site.index + ] if all(d > self.DESORPTION_DISTANCE for d in distances): - return {'desorption': False} - return {'desorption': True} + return {"desorption": False} + return {"desorption": True} + class DefectiveMaterialDoc(StructureMetadata): """Document containing all / many defect tasks for a single material ID""" @@ -431,7 +466,7 @@ def from_docs(cls: Type["S"], defect_docs: DefectDoc, material_id: str) -> S: @property def element_set(self) -> set: - els = set(Element(e) for e in self.defect_docs[0].defect.structure.symbol_set) + els = {Element(e) for e in self.defect_docs[0].defect.structure.symbol_set} for d in self.defect_docs: els = els | set(d.defect.element_changes.keys()) return els From bc3e3775cb4f9a96e07d387ef3bddeacfd8c729d Mon Sep 17 00:00:00 2001 From: nwinner Date: Fri, 20 Jan 2023 11:48:03 -0800 Subject: [PATCH 45/50] Temporary v_hartree solution for 2d --- src/atomate2/cp2k/schemas/defect.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/atomate2/cp2k/schemas/defect.py b/src/atomate2/cp2k/schemas/defect.py index 800f50dbf4..e2a2466a36 100644 --- a/src/atomate2/cp2k/schemas/defect.py +++ b/src/atomate2/cp2k/schemas/defect.py @@ -294,6 +294,11 @@ def get_freysoldt2d_correction(cls, parameters): dielectric = (eps_parallel - 1) / (1 - 1 / eps_perp) with ScratchDir("."): + # TODO builder ensure structures are commensurate, but the sxdefectalign2d requires exact match + # between structures (to about 6 digits of precision). No good solution right now, + # Just setting def lattice with bulk lattice, which will shift the locpot data + parameters["defect_v_hartree"].structure.lattice = parameters["bulk_v_hartree"].structure + lref = VaspVolumetricData( structure=parameters["bulk_v_hartree"].structure, data=parameters["bulk_v_hartree"].data, From e41ee3df267b33b510a7a549bf4f32a6fa515b72 Mon Sep 17 00:00:00 2001 From: nwinner Date: Fri, 20 Jan 2023 11:48:22 -0800 Subject: [PATCH 46/50] lint --- src/atomate2/cp2k/schemas/defect.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/atomate2/cp2k/schemas/defect.py b/src/atomate2/cp2k/schemas/defect.py index e2a2466a36..8312a1c9dc 100644 --- a/src/atomate2/cp2k/schemas/defect.py +++ b/src/atomate2/cp2k/schemas/defect.py @@ -297,7 +297,9 @@ def get_freysoldt2d_correction(cls, parameters): # TODO builder ensure structures are commensurate, but the sxdefectalign2d requires exact match # between structures (to about 6 digits of precision). No good solution right now, # Just setting def lattice with bulk lattice, which will shift the locpot data - parameters["defect_v_hartree"].structure.lattice = parameters["bulk_v_hartree"].structure + parameters["defect_v_hartree"].structure.lattice = parameters[ + "bulk_v_hartree" + ].structure lref = VaspVolumetricData( structure=parameters["bulk_v_hartree"].structure, From 7a392347da5c1d772f307e8f3bc366359336a38e Mon Sep 17 00:00:00 2001 From: nwinner Date: Fri, 20 Jan 2023 15:12:18 -0800 Subject: [PATCH 47/50] updates --- src/atomate2/cp2k/jobs/defect.py | 30 ++++---------------- src/atomate2/cp2k/sets/defect.py | 48 +++++++++++++++++--------------- 2 files changed, 30 insertions(+), 48 deletions(-) diff --git a/src/atomate2/cp2k/jobs/defect.py b/src/atomate2/cp2k/jobs/defect.py index 39a4542679..37af4f658c 100644 --- a/src/atomate2/cp2k/jobs/defect.py +++ b/src/atomate2/cp2k/jobs/defect.py @@ -19,7 +19,6 @@ DefectHybridRelaxSetGenerator, DefectHybridStaticSetGenerator, DefectRelaxSetGenerator, - DefectSetGenerator, DefectStaticSetGenerator, ) @@ -96,7 +95,7 @@ def make(self, defect: Defect | Structure, prev_cp2k_dir: str | Path | None = No class DefectStaticMaker(BaseDefectMaker): name: str = field(default="defect static") - input_set_generator: DefectSetGenerator = field( + input_set_generator: Cp2kInputGenerator = field( default_factory=DefectStaticSetGenerator ) @@ -148,23 +147,16 @@ class DefectHybridStaticMaker(BaseDefectMaker): name: str = field(default="defect hybrid static") hybrid_functional: str = "PBE0" - input_set_generator: DefectSetGenerator = field( + input_set_generator: Cp2kInputGenerator = field( default_factory=DefectHybridStaticSetGenerator ) - def __post_init__(self): - """Update the input settings with hybrid_functional attribute""" - self.input_set_generator.user_input_settings.update( - {"activate_hybrid": {"hybrid_functional": self.hybrid_functional}} - ) - - @dataclass class DefectHybridRelaxMaker(BaseDefectMaker): name: str = field(default="defect hybrid relax") hybrid_functional: str = "PBE0" - input_set_generator: DefectSetGenerator = field( + input_set_generator: Cp2kInputGenerator = field( default_factory=DefectHybridRelaxSetGenerator ) transformations: tuple[str, ...] = field( @@ -174,19 +166,13 @@ class DefectHybridRelaxMaker(BaseDefectMaker): default=({"distance": 0.01},) ) - def __post_init__(self): - """Update the input settings with hybrid_functional attribute""" - self.input_set_generator.user_input_settings.update( - {"activate_hybrid": {"hybrid_functional": self.hybrid_functional}} - ) - @dataclass class DefectHybridCellOptMaker(BaseDefectMaker): name: str = field(default="defect hybrid cell opt") hybrid_functional: str = "PBE0" - input_set_generator: DefectSetGenerator = field( + input_set_generator: Cp2kInputGenerator = field( default_factory=DefectHybridCellOptSetGenerator ) transformations: tuple[str, ...] = field( @@ -194,10 +180,4 @@ class DefectHybridCellOptMaker(BaseDefectMaker): ) transformation_params: tuple[dict, ...] | None = field( default=({"distance": 0.01},) - ) - - def __post_init__(self): - """Update the input settings with hybrid_functional attribute""" - self.input_set_generator.user_input_settings.update( - {"activate_hybrid": {"hybrid_functional": self.hybrid_functional}} - ) + ) \ No newline at end of file diff --git a/src/atomate2/cp2k/sets/defect.py b/src/atomate2/cp2k/sets/defect.py index 33f06e5019..3aee7ed977 100644 --- a/src/atomate2/cp2k/sets/defect.py +++ b/src/atomate2/cp2k/sets/defect.py @@ -6,6 +6,7 @@ from dataclasses import dataclass from pymatgen.core import Structure +from pymatgen.io.cp2k.utils import get_truncated_coulomb_cutoff from atomate2.cp2k.sets.base import Cp2kInputGenerator from atomate2.cp2k.sets.core import ( @@ -16,49 +17,50 @@ DEFECT_SET_UPDATES = {'print_v_hartree': True, "print_pdos": True, "print_dos": True} -@dataclass -class DefectSetGenerator(Cp2kInputGenerator): - """ - Base input set generator for defect calculations. Adds printing of the - partial density of states and the electrostatic potential. - """ - - def get_input_updates(self, structure: Structure, *args, **kwargs) -> dict: - """Get input updates""" - return {'print_v_hartree': True, "print_pdos": True, "print_dos": True} - @dataclass class DefectStaticSetGenerator(StaticSetGenerator): - def __post_init__(self): - self.user_input_settings.update(DEFECT_SET_UPDATES) + def get_input_updates(self, *args, **kwargs) -> dict: + updates = super().get_input_updates(*args, **kwargs) + updates.update(DEFECT_SET_UPDATES) + return updates @dataclass class DefectRelaxSetGenerator(RelaxSetGenerator): - def __post_init__(self): - self.user_input_settings.update(DEFECT_SET_UPDATES) + def get_input_updates(self, *args, **kwargs) -> dict: + updates = super().get_input_updates(*args, **kwargs) + updates.update(DEFECT_SET_UPDATES) + return updates @dataclass class DefectCellOptSetGenerator(CellOptSetGenerator): - def __post_init__(self): - self.user_input_settings.update(DEFECT_SET_UPDATES) + def get_input_updates(self, *args, **kwargs) -> dict: + updates = super().get_input_updates(*args, **kwargs) + updates.update(DEFECT_SET_UPDATES) + return updates @dataclass class DefectHybridStaticSetGenerator(HybridStaticSetGenerator): - def __post_init__(self): - self.user_input_settings.update(DEFECT_SET_UPDATES) + def get_input_updates(self, *args, **kwargs) -> dict: + updates = super().get_input_updates(*args, **kwargs) + updates.update(DEFECT_SET_UPDATES) + return updates @dataclass class DefectHybridRelaxSetGenerator(HybridRelaxSetGenerator): - def __post_init__(self): - self.user_input_settings.update(DEFECT_SET_UPDATES) + def get_input_updates(self, *args, **kwargs) -> dict: + updates = super().get_input_updates(*args, **kwargs) + updates.update(DEFECT_SET_UPDATES) + return updates @dataclass class DefectHybridCellOptSetGenerator(HybridCellOptSetGenerator): - def __post_init__(self): - self.user_input_settings.update(DEFECT_SET_UPDATES) \ No newline at end of file + def get_input_updates(self, *args, **kwargs) -> dict: + updates = super().get_input_updates(*args, **kwargs) + updates.update(DEFECT_SET_UPDATES) + return updates \ No newline at end of file From df484551ae8e9934133db32b4cf49c88c71e8162 Mon Sep 17 00:00:00 2001 From: nwinner Date: Fri, 20 Jan 2023 15:12:32 -0800 Subject: [PATCH 48/50] lint --- src/atomate2/cp2k/jobs/defect.py | 3 ++- src/atomate2/cp2k/sets/defect.py | 29 +++++++++++++++-------------- 2 files changed, 17 insertions(+), 15 deletions(-) diff --git a/src/atomate2/cp2k/jobs/defect.py b/src/atomate2/cp2k/jobs/defect.py index 37af4f658c..fcaeec9817 100644 --- a/src/atomate2/cp2k/jobs/defect.py +++ b/src/atomate2/cp2k/jobs/defect.py @@ -151,6 +151,7 @@ class DefectHybridStaticMaker(BaseDefectMaker): default_factory=DefectHybridStaticSetGenerator ) + @dataclass class DefectHybridRelaxMaker(BaseDefectMaker): @@ -180,4 +181,4 @@ class DefectHybridCellOptMaker(BaseDefectMaker): ) transformation_params: tuple[dict, ...] | None = field( default=({"distance": 0.01},) - ) \ No newline at end of file + ) diff --git a/src/atomate2/cp2k/sets/defect.py b/src/atomate2/cp2k/sets/defect.py index 3aee7ed977..0ac4200512 100644 --- a/src/atomate2/cp2k/sets/defect.py +++ b/src/atomate2/cp2k/sets/defect.py @@ -5,62 +5,63 @@ import logging from dataclasses import dataclass -from pymatgen.core import Structure -from pymatgen.io.cp2k.utils import get_truncated_coulomb_cutoff - -from atomate2.cp2k.sets.base import Cp2kInputGenerator from atomate2.cp2k.sets.core import ( - StaticSetGenerator, RelaxSetGenerator, CellOptSetGenerator, - HybridStaticSetGenerator, HybridRelaxSetGenerator, HybridCellOptSetGenerator + CellOptSetGenerator, + HybridCellOptSetGenerator, + HybridRelaxSetGenerator, + HybridStaticSetGenerator, + RelaxSetGenerator, + StaticSetGenerator, ) + logger = logging.getLogger(__name__) -DEFECT_SET_UPDATES = {'print_v_hartree': True, "print_pdos": True, "print_dos": True} +DEFECT_SET_UPDATES = {"print_v_hartree": True, "print_pdos": True, "print_dos": True} + @dataclass class DefectStaticSetGenerator(StaticSetGenerator): - def get_input_updates(self, *args, **kwargs) -> dict: updates = super().get_input_updates(*args, **kwargs) updates.update(DEFECT_SET_UPDATES) return updates + @dataclass class DefectRelaxSetGenerator(RelaxSetGenerator): - def get_input_updates(self, *args, **kwargs) -> dict: updates = super().get_input_updates(*args, **kwargs) updates.update(DEFECT_SET_UPDATES) return updates + @dataclass class DefectCellOptSetGenerator(CellOptSetGenerator): - def get_input_updates(self, *args, **kwargs) -> dict: updates = super().get_input_updates(*args, **kwargs) updates.update(DEFECT_SET_UPDATES) return updates + @dataclass class DefectHybridStaticSetGenerator(HybridStaticSetGenerator): - def get_input_updates(self, *args, **kwargs) -> dict: updates = super().get_input_updates(*args, **kwargs) updates.update(DEFECT_SET_UPDATES) return updates + @dataclass class DefectHybridRelaxSetGenerator(HybridRelaxSetGenerator): - def get_input_updates(self, *args, **kwargs) -> dict: updates = super().get_input_updates(*args, **kwargs) updates.update(DEFECT_SET_UPDATES) return updates + @dataclass class DefectHybridCellOptSetGenerator(HybridCellOptSetGenerator): - def get_input_updates(self, *args, **kwargs) -> dict: updates = super().get_input_updates(*args, **kwargs) updates.update(DEFECT_SET_UPDATES) - return updates \ No newline at end of file + return updates From d17ec26425c239335f4230856eaf0df26b770d32 Mon Sep 17 00:00:00 2001 From: nwinner Date: Tue, 7 Mar 2023 10:24:41 -0800 Subject: [PATCH 49/50] defects --- src/atomate2/cp2k/flows/defect.py | 25 ++++++++------- src/atomate2/cp2k/schemas/defect.py | 47 +++++++++++++++++------------ src/atomate2/cp2k/sets/base.py | 2 +- 3 files changed, 42 insertions(+), 32 deletions(-) diff --git a/src/atomate2/cp2k/flows/defect.py b/src/atomate2/cp2k/flows/defect.py index 1faefcf425..adc705037f 100644 --- a/src/atomate2/cp2k/flows/defect.py +++ b/src/atomate2/cp2k/flows/defect.py @@ -67,7 +67,8 @@ class DefectHybridCellOptFlowMaker(HybridCellOptFlowMaker): ) -# TODO close to being able to put this in common. Just need a switch that decides which core flow/job to use based on software +# TODO close to being able to put this in common. Just need a switch that decides +# which core flow/job to use based on software @dataclass class FormationEnergyMaker(Maker): """ @@ -85,12 +86,14 @@ class FormationEnergyMaker(Maker): initialize_with_pbe: If hybrid functional is provided, this enables the use of a static PBE run before the hybrid calc to provide a starting guess for CP2K HF module. - supercell_matrix: If provided, the defect supercell wil lbe created + supercell_matrix: If provided, the defect supercell will be created by this 3x3 matrix. Else other parameters will be used. max_atoms: Maximum number of atoms allowed in the supercell. min_atoms: Minimum number of atoms allowed in the supercell. - min_length: Minimum length of the smallest supercell lattice vector. - force_diagonal: If True, return a transformation with a diagonal transformation matrix. + min_length: Minimum length of the smallest supercell lattice + vector. + force_diagonal: If True, return a transformation with a + diagonal transformation matrix. """ name: str = "defect formation energy" @@ -178,8 +181,8 @@ def make( flow: Flow The workflow to calculate the formation energy diagram. """ - jobs, defect_outputs = [], {} - defect_outputs = { + jobs = [] + defect_outputs: dict[str, dict[int, tuple[Defect, OutputReference]]] = { defect.name: {} for defect in defects } # TODO DEFECT NAMES ARE NOT UNIQUE HASHES bulk_structure = ensure_defects_same_structure(defects) @@ -205,8 +208,8 @@ def make( jobs.append(bulk_job) for defect in defects: - if charges == True: - chgs = defect.get_charge_states() if charges else [0] + if charges is True: + chgs = defect.get_charge_states() else: chgs = charges if charges else [0] for charge in chgs: @@ -251,7 +254,7 @@ def collect_defect_outputs( dielectric: The dielectric constant used to construct the formation energy diagram. """ - outputs = {"results": {}} + outputs: dict[str, dict[str, dict]] = {"results": {}} if not dielectric: logger.warn( "Dielectric constant not provided. Defect formation energies will be uncorrected." @@ -261,7 +264,7 @@ def collect_defect_outputs( fnv_plots = {} for charge, defect_and_output in defects_with_charges.items(): defect, output_with_charge = defect_and_output - logger.info(f"Processing {defect.name} with charge state={charge}") + logger.info(f"Processing {defect_name} with charge state={charge}") defect_entry = DefectEntry( defect=defect, charge_state=charge, @@ -281,7 +284,7 @@ def collect_defect_outputs( dielectric=dielectric, ) fnv_plots[int(charge)] = plot_data - outputs["results"][defect.name] = dict( + outputs["results"][defect_name] = dict( defect=defect, defect_entries=defect_entries, fnv_plots=fnv_plots ) return outputs diff --git a/src/atomate2/cp2k/schemas/defect.py b/src/atomate2/cp2k/schemas/defect.py index 8312a1c9dc..112341dc60 100644 --- a/src/atomate2/cp2k/schemas/defect.py +++ b/src/atomate2/cp2k/schemas/defect.py @@ -1,5 +1,5 @@ from datetime import datetime -from typing import Callable, ClassVar, Dict, List, Mapping, Tuple, Type, TypeVar +from typing import Callable, ClassVar, Dict, List, Mapping, Set, Tuple, Type, TypeVar import numpy as np from monty.json import MontyDecoder @@ -11,11 +11,7 @@ get_freysoldt_correction, ) from pymatgen.analysis.defects.finder import DefectSiteFinder -from pymatgen.analysis.defects.thermo import ( - DefectEntry, - DefectSiteFinder, - MultiFormationEnergyDiagram, -) +from pymatgen.analysis.defects.thermo import DefectEntry, MultiFormationEnergyDiagram from pymatgen.analysis.phase_diagram import PhaseDiagram from pymatgen.core import Element from pymatgen.entries.computed_entries import ComputedEntry, ComputedStructureEntry @@ -25,7 +21,7 @@ from atomate2 import SETTINGS from atomate2.common.schemas.structure import StructureMetadata from atomate2.cp2k.schemas.calc_types.enums import RunType -from atomate2.cp2k.schemas.task import TaskDocument +from atomate2.cp2k.schemas.task import Cp2kObject, TaskDocument __all__ = ["DefectDoc"] @@ -221,8 +217,8 @@ def get_defect_entry_from_tasks( Args: defect_task: task dict for the defect calculation bulk_task: task dict for the bulk calculation - dielectric: Dielectric doc if the defect is charged. If not present, no dielectric - corrections will be performed, even if the defect is charged. + dielectric: Dielectric doc if the defect is charged. If not present, no + dielectric corrections will be performed, even if the defect is charged. query: Mongo-style query to retrieve the defect object from the defect task """ parameters = cls.get_parameters_from_tasks( @@ -294,12 +290,14 @@ def get_freysoldt2d_correction(cls, parameters): dielectric = (eps_parallel - 1) / (1 - 1 / eps_perp) with ScratchDir("."): - # TODO builder ensure structures are commensurate, but the sxdefectalign2d requires exact match - # between structures (to about 6 digits of precision). No good solution right now, - # Just setting def lattice with bulk lattice, which will shift the locpot data + # TODO builder ensure structures are commensurate, but the + # sxdefectalign2d requires exact match between structures + # (to about 6 digits of precision). No good solution right now, + # Just setting def lattice with bulk lattice, which will shift + # the locpot data parameters["defect_v_hartree"].structure.lattice = parameters[ "bulk_v_hartree" - ].structure + ].structure.lattice lref = VaspVolumetricData( structure=parameters["bulk_v_hartree"].structure, @@ -337,7 +335,8 @@ def get_parameters_from_tasks( cls, defect_task: TaskDocument, bulk_task: TaskDocument ): """ - Get parameters necessary to create a defect entry from defect and bulk task dicts + Get parameters necessary to create a defect entry from defect and bulk + task dicts Args: defect_task: task dict for the defect calculation bulk_task: task dict for the bulk calculation @@ -367,10 +366,10 @@ def get_parameters_from_tasks( "charge_state": defect_task.output.structure.charge, "defect_frac_sc_coords": defect_frac_sc_coords, "defect_v_hartree": MontyDecoder().process_decoded( - defect_task.cp2k_objects["v_hartree"] + defect_task.cp2k_objects[Cp2kObject.v_hartree] # type: ignore ), # TODO CP2K spec name "bulk_v_hartree": MontyDecoder().process_decoded( - bulk_task.cp2k_objects["v_hartree"] + bulk_task.cp2k_objects[Cp2kObject.v_hartree] # type: ignore ), # TODO CP2K spec name } @@ -392,15 +391,21 @@ class DefectValidation(BaseModel): 3, description="Distance to consider adsorbate as desorbed" ) - def process_entry(self, parameters) -> V: - """Gets a dictionary of {validator: result}. Result true for passing, false for failing.""" + def process_entry(self, parameters) -> Dict: + """ + Gets a dictionary of {validator: result}. Result true for passing, + false for failing. + """ v = {} v.update(self._atomic_relaxation(parameters)) v.update(self._desorption(parameters)) return v def _atomic_relaxation(self, parameters): - """Returns false if the mean displacement outside the isolation radius is greater than the cutoff""" + """ + Returns false if the mean displacement outside the isolation radius is greater + than the cutoff. + """ in_struc = parameters["initial_defect_structure"] out_struc = parameters["final_defect_structure"] sites = out_struc.get_sites_in_sphere( @@ -487,10 +492,12 @@ def get_formation_energy_diagram( ) -> MultiFormationEnergyDiagram: filters = filters if filters else [lambda _: True] - els = set() + els: Set[Element] = set() defect_entries = [] bulk_entries = [] vbms = [] + if isinstance(run_type, str): + run_type = RunType(run_type) for doc in filter(lambda x: all(f(x) for f in filters), self.defect_docs): if doc.defect_entries.get(run_type): els = els | set(doc.defect.element_changes.keys()) diff --git a/src/atomate2/cp2k/sets/base.py b/src/atomate2/cp2k/sets/base.py index e7a685dbb2..8ac5d83f75 100644 --- a/src/atomate2/cp2k/sets/base.py +++ b/src/atomate2/cp2k/sets/base.py @@ -189,7 +189,7 @@ class Cp2kInputGenerator(InputGenerator): user_input_settings: dict = field(default_factory=dict) user_kpoints_settings: dict | Kpoints = field(default_factory=dict) auto_kspacing: bool = True - use_structure_charge: bool = False + use_structure_charge: bool = True sort_structure: bool = True symprec: float = SETTINGS.SYMPREC force_gamma: bool = False From e856206f393849205e578c40cafcf29844029bba Mon Sep 17 00:00:00 2001 From: nwinner Date: Sun, 26 Mar 2023 16:15:07 -0700 Subject: [PATCH 50/50] freysoldt --- src/atomate2/cp2k/schemas/defect.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/atomate2/cp2k/schemas/defect.py b/src/atomate2/cp2k/schemas/defect.py index 112341dc60..e78d01da96 100644 --- a/src/atomate2/cp2k/schemas/defect.py +++ b/src/atomate2/cp2k/schemas/defect.py @@ -176,7 +176,7 @@ def from_tasks( "intrinsic" if all( el in defect_entries[rt].defect.structure.composition - for el in defect_entries[rt].defect.element_changes.keys() + for el in defect_entries[rt].defect.element_changes ) else "extrinsic" ) @@ -265,7 +265,7 @@ def get_correction_from_parameters(cls, parameters) -> Tuple[Dict, Dict]: @classmethod def get_freysoldt_correction(cls, parameters) -> Tuple[Dict, Dict]: if parameters["charge_state"] and not parameters.get("2d"): - es, pot, met = get_freysoldt_correction( + result = get_freysoldt_correction( q=parameters["charge_state"], dielectric=np.array( parameters["dielectric"] @@ -274,7 +274,7 @@ def get_freysoldt_correction(cls, parameters) -> Tuple[Dict, Dict]: bulk_locpot=parameters["bulk_v_hartree"], defect_frac_coords=parameters["defect_frac_sc_coords"], ) - return {"electrostatic": es, "potential_alignment": pot}, met + return {"freysoldt": result.correction_energy}, result.metadata return {}, {} @classmethod @@ -310,7 +310,7 @@ def get_freysoldt2d_correction(cls, parameters): lref.write_file("LOCPOT.ref") ldef.write_file("LOCPOT.def") - es, pot, met = get_freysoldt2d_correction( + result = get_freysoldt2d_correction( q=parameters["charge_state"], dielectric=dielectric, defect_locpot=ldef, @@ -319,7 +319,7 @@ def get_freysoldt2d_correction(cls, parameters): energy_cutoff=520, slab_buffer=2, ) - return {"electrostatic": es, "potential_alignment": pot}, met + return {"freysoldt": result.correction_energy}, result.metadata return {}, {} @classmethod @@ -339,7 +339,7 @@ def get_parameters_from_tasks( task dicts Args: defect_task: task dict for the defect calculation - bulk_task: task dict for the bulk calculation + bulk_task: task dict for the bulk calculation. """ final_defect_structure = defect_task.structure final_bulk_structure = bulk_task.structure @@ -380,7 +380,7 @@ def get_parameters_from_tasks( class DefectValidation(BaseModel): - """Validate a task document for defect processing""" + """Validate a task document for defect processing.""" MAX_ATOMIC_RELAXATION: float = Field( 0.02, @@ -445,7 +445,7 @@ def _desorption(self, parameters): class DefectiveMaterialDoc(StructureMetadata): - """Document containing all / many defect tasks for a single material ID""" + """Document containing all / many defect tasks for a single material ID.""" property_name: ClassVar[str] = "defective material"