Source code for gnrs.gnrsutil.volume_estimation

"""
This module is based on the PyMoVE algorithm implemented in: 
[PyMoVE](https://github.com/manny405/PyMoVE)
predicting the unit cell volume of a molecule.

This source code is licensed under the BSD-3-Clause license found in the
LICENSE file in the root directory of this source tree.
"""
from __future__ import annotations

__author__ = ["Yi Yang", "Rithwik Tom"]
__email__ = "yiy5@andrew.cmu.edu"
__group__ = "https://www.noamarom.com/"

import yaml
from importlib.resources import files

import numpy as np
from scipy.spatial.distance import cdist
from ase.data import vdw_radii, atomic_numbers
from gnrs.gnrsutil.molecule_bonding import BondNeighborhood
from gnrs.core.structure import Structure

_PYMOVE_MODEL = yaml.safe_load(
    files("gnrs.gnrsutil").joinpath("pymove_model.yaml").read_text()
)
_FRAGMENT_COEF: dict[str, float] = _PYMOVE_MODEL["fragment_coef"]


[docs] def predict_cell_volume(mol_path, seed=42) -> float: """ Predict the unit cell volume of a molecule. Args: mol_path: Path to the molecule file. Returns: Predicted unit cell volume. """ struct = Structure() struct.build_geo_from_atom_file(mol_path) mve = MoleculeVolumeEstimator(seed=seed) cell_volume = mve.calc(struct) return cell_volume
[docs] class MoleculeVolumeEstimator: """ Class for performing molecular volume estimation using vdW radii. Args: tol: Tolerance to converge the estimation. iterations: Maximum number of iterations to converge. batch: Number of samples to process in MC method at a single time. vdW: Array of vdW radii indexed by the atomic number. seed: Random seed. """
[docs] def __init__( self, tol: float = 1e-3, iterations: int = int(1e8), batch: int = 200000, vdW: np.ndarray = vdw_radii, seed: int = 42, ): self.iterations = int(iterations) self.batch = int(batch) self.vdW = vdW self.tol = tol self.rng = np.random.default_rng(seed)
[docs] def calc(self, molecule_struct: Structure) -> float: """ Calculates the predicted molecular volume for a Structure object. """ volume = self.MC(molecule_struct) if volume is None: return None volume = self.bond_neighberhood_model(molecule_struct, volume) return volume
[docs] def bond_neighberhood_model( self, molecule_struct: Structure, MC_volume: float ) -> float: """ Applies linear correction to the input volume from CSD analysis. """ fragment_coef = _FRAGMENT_COEF intercept = 0.0 # Start with MC_volume contribution + intercept volume = MC_volume * fragment_coef["MC_volume"] + intercept # Calc structure fragments and counts bn = BondNeighborhood() fragments, counts = bn.calc(molecule_struct) # Accumulate contributions for frag, count in zip(fragments, counts): if frag in fragment_coef: volume += count * fragment_coef[frag] return volume
[docs] def MC(self, molecule_struct: Structure) -> float: """ Monte Carlo method for volume estimation using vdW radii. """ geo = molecule_struct.get_geo_array() # Get radii array for each element radii = np.array( [self.vdW[atomic_numbers[ele]] for ele in molecule_struct.geometry["element"]] ) radii_sq = radii ** 2 # Set sample region min_region = np.min(geo, axis=0) - 5.0 max_region = np.max(geo, axis=0) + 5.0 region_range = max_region - min_region region_volume = float(np.prod(region_range)) # MC tracking vdW_in_total = 0 total_samples = 0 prev_volume = None for start in range(0, self.iterations, self.batch): batch_size = min(self.batch, self.iterations - start) # Generate random points, uniform in bounding box points = self.rng.uniform(size=(batch_size, 3)) * region_range + min_region dist_sq = cdist(geo, points, metric="sqeuclidean") # Check if any atom is close enough: dist_sq <= radii^2 # Broadcasting radii_sq inside = np.any(dist_sq <= radii_sq[:, None], axis=0) vdW_in_total += np.count_nonzero(inside) total_samples += batch_size # Check convergence current_volume = (vdW_in_total / total_samples) * region_volume if prev_volume is not None: err = abs(current_volume - prev_volume) if err <= self.tol: break prev_volume = current_volume molecule_volume = (vdW_in_total / total_samples) * region_volume return molecule_volume