Source code for lobsterpy.featurize.utils

# Copyright (c) lobsterpy development team
# Distributed under the terms of a BSD 3-Clause "New" or "Revised" License

"""This package provides the modules for featurzing Lobster data ready for ML studies."""

from __future__ import annotations

from pathlib import Path
from typing import NamedTuple
from warnings import warn

import numpy as np
from mendeleev import element
from monty.os.path import zpath

POSCAR_WARNING = (
    "Falling back to POSCAR, translations between individual atoms may differ from LOBSTER outputs. "
    "Please note that translations in the LOBSTER outputs are consistent with CONTCAR "
    "(also with POSCAR.lobster.vasp or POSCAR.vasp : written by LOBSTER >=v5)."
)


[docs] class CoxxFingerprint(NamedTuple): """ Represents a Coxx fingerprint. This named tuple is used to store information related to a Coxx fingerprint, which includes energies, Coxx values, fingerprint type, spin type, number of bins, and bin width. :param energies: The energy values associated with the Coxx fingerprint. :param coxx: The Coxx values corresponding to each energy. :param fp_type: The type of the Coxx fingerprint. :param spin_type: The spin type associated with the fingerprint. :param n_bins: The number of bins used in the Coxx fingerprint. :param bin_width: The width of each bin in the Coxx fingerprint. """ energies: np.ndarray coxx: np.ndarray fp_type: str spin_type: str n_bins: int bin_width: float
[docs] def get_file_paths( path_to_lobster_calc: str | Path = "", requested_files: list[str] = [], use_lso_dos: bool = True ) -> dict: """ Get file paths for LobsterPy featurizations, raise Exception if not all of requested paths exist. :param path_to_lobster_calc: path to root LOBSTER calc directory :param requested_files: files to return paths for. :param use_lso_dos: solely required for BatchDosFeaturizer. Will force featurizer to use DOSCAR.LSO.lobster instead of DOSCAR.lobster. :return: dict that assigns each item of requested_files its path """ default_values = { "structure": "CONTCAR", "cohpcar": "COHPCAR.lobster", "icohplist": "ICOHPLIST.lobster", "cobicar": "COBICAR.lobster", "icobilist": "ICOBILIST.lobster", "coopcar": "COOPCAR.lobster", "icooplist": "ICOOPLIST.lobster", "charge": "CHARGE.lobster", "madelung": "MadelungEnergies.lobster", "doscar": ("DOSCAR.LSO.lobster" if use_lso_dos else "DOSCAR.lobster"), "lobsterin": "lobsterin", "lobsterout": "lobsterout", "bandoverlaps": "bandOverlaps.lobster", "potcar": "POTCAR", "vasprun": "vasprun.xml", "incar": "INCAR", } lobster_path = Path(path_to_lobster_calc) file_paths = {} missing_files = [] for file in requested_files: file_str = default_values.get(file) file_str = file_str if isinstance(file_str, str) else file if file == "structure": try: file_paths[file] = get_structure_path(lobster_path=lobster_path) except Exception: missing_files.append(default_values["structure"]) else: file_path = lobster_path / file_str if file_path.exists(): file_paths[file] = file_path else: gz_file_path = Path(zpath(str(file_path.as_posix()))) if gz_file_path.exists(): file_paths[file] = gz_file_path else: missing_files.append(default_values[file]) if missing_files: raise Exception(f"Files {missing_files} not found in {lobster_path.name}.") return file_paths
[docs] def get_structure_path(lobster_path: Path) -> Path: """ Search iteratively for (unzipped / zipped) structure file. CONTCAR is prioritized over POSCAR.lobster. :param lobster_path: path to root LOBSTER calc directory :return: path to structure file """ for filename in ["CONTCAR", "POSCAR.lobster", "POSCAR.lobster.vasp", "POSCAR"]: poscar_path = lobster_path / filename if poscar_path.exists(): if filename == "POSCAR": warn(POSCAR_WARNING) return poscar_path gz_file_path = Path(zpath(str(poscar_path.as_posix()))) if gz_file_path.exists(): if filename == "POSCAR": warn(POSCAR_WARNING) return gz_file_path raise Exception
[docs] def get_reduced_mass(atom_pair: list[str]) -> float: """ Compute reduced mass between a pair of atoms. :param atom_pair: list of atomic species symbols in string :return: reduced mass """ atom1 = element(atom_pair[0]) atom2 = element(atom_pair[1]) return (atom1.atomic_weight * atom2.atomic_weight) / (atom1.atomic_weight + atom2.atomic_weight)
[docs] def get_electronegativities(atom_pair: list[str]) -> list[float]: """ Get Allen electronegativities for a pair of atoms. :param atom_pair: list of atomic species symbols in string :return: list of Allen electronegativities """ atom1 = element(atom_pair[0]) atom2 = element(atom_pair[1]) return [atom1.electronegativity_allen(), atom2.electronegativity_allen()]
[docs] def sort_dict_by_value(input_dict: dict[str, float]) -> dict: """ Sort dictionary by values. :param input_dict: input dictionary :return: sorted dictionary """ return dict(sorted(input_dict.items(), key=lambda item: item[1]))