Source code for gnrs.gnrsutil.molecule_bonding

"""
This module implements the molecule bonding module.

This source code is licensed under the BSD-3-Clause license found in the
LICENSE file in the root directory of this source tree.
"""
from __future__ import annotations

__author__ = ["Yi Yang", "Rithwik Tom"]
__email__ = "yiy5@andrew.cmu.edu"
__group__ = "https://www.noamarom.com/"

import yaml
from importlib.resources import files

import numpy as np
import networkx as nx
from ase.io import read
from ase.data import atomic_numbers, vdw
from ase.neighborlist import NeighborList, natural_cutoffs

# Update Hydrogen radii — 1.1 A instead of the default 1.2 A.
# Rationale: https://pubs.acs.org/doi/full/10.1021/jp953141%2B
try:
    vdw_radii = vdw.vdw_radii.copy()
    vdw_radii[1] = 1.1
except (TypeError, AttributeError):
    vdw_radii = None
import gnrs.output as gout

_H_BOND_PARAMS = yaml.safe_load(
    files("gnrs.gnrsutil").joinpath("h_bond.yaml").read_text()
)

intermolecular_dist = {
    "vdw": {},
    "h_bond": _H_BOND_PARAMS["h_bond"],
}

[docs] def construct_pair_keys(elements: list[str]) -> np.ndarray: """ Constructs pair_key matrix for every element in the argument """ atom_keys = np.array(elements) pair_keys = np.char.add(atom_keys, np.array(["-"], dtype="<U2")) pair_keys = np.char.add(pair_keys, atom_keys[:, None]) return pair_keys
[docs] def construct_intermolecular_dist_dict() -> None: """ Constructs the intermolecular distance dictionary with vdW radii. """ global intermolecular_dist # Construct vdw contact distances for pairwise_dist dictionary pair_keys = construct_pair_keys([x for x in atomic_numbers.keys()]) pair_keys = pair_keys.ravel() for pair in pair_keys: a1 = atomic_numbers[pair.split("-")[0]] a2 = atomic_numbers[pair.split("-")[1]] try: r1 = vdw_radii[a1] r2 = vdw_radii[a2] except IndexError: continue if np.isnan(r1) or np.isnan(r2): continue intermolecular_dist["vdw"][pair] = r1 + r2
construct_intermolecular_dist_dict()
[docs] def get_vdw_distance_cutoff_matrix(mol_path: str | list[str], z: int, sr: float, natural_cutoff_mult: float) -> tuple[np.ndarray, list[str]]: """ Get the van der Waals distance cutoff matrix. Args: mol_path: Path to the molecule file. z: Number of molecules in the unit cell. sr: Fraction of the van der Waals distance cutoff. natural_cutoff_mult: Multiplier for the natural cutoffs. Returns: cutoff_matrix: Van der Waals distance cutoff matrix. hbond_keys: List of hydrogen bond keys. """ if isinstance(mol_path, list): mol = [] for pth in mol_path: m = read(pth) mol.append(m) else: mol = read(mol_path) mb = MoleculeBonding(mol, natural_cutoff_mult=natural_cutoff_mult) cutoff_matrix = mb.get_crystal_cutoff_matrix(z, vdw_mult=sr) cutoff_matrix = np.array(cutoff_matrix, dtype="float32") hbond_keys = np.unique(mb.hbond_key).tolist() return cutoff_matrix, hbond_keys
[docs] class MoleculeBonding: """ Identifies all bonded atoms in a molecule or structure using ASE NeighborList Arguments --------- atoms: ASE atoms object OR list of ASE atoms object Structure object to build molecule bonding natural_cutoff_mult: float Multiplier to covalent distances for identifying molecular bonding skin: float For ase.neighborlists. If skin is not zero, then extra neighbors outside the cutoff can be returned. """
[docs] def __init__(self, atoms, natural_cutoff_mult=1.2, skin=0): self.atoms = atoms self.hbond_key = np.array([]) if isinstance(self.atoms, list): ele_list = [] self.bonding_list = np.array([]) mol_idx = 0 for mol in self.atoms: mol_bond = self._get_bonding( mol, natural_cutoff_mult=natural_cutoff_mult ) self.bonding_list = np.concatenate( (self.bonding_list, mol_bond + mol_idx) ) ele_list += mol.get_chemical_symbols() mol_idx += len(mol) self.ele = np.array(ele_list, dtype=str) else: self.bonding_list = self._get_bonding( atoms, natural_cutoff_mult=natural_cutoff_mult ) self.ele = np.array(atoms.get_chemical_symbols()) # Donor and acceptor elements for identifying hydrogen bonds self.donor_elements = ["N", "O"] self.acceptor_elements = ["N", "O"]
[docs] def get_cutoff_matrix(self, vdw_mult=0.85): """ Returns NxN matrix expressing a cutoff distance between intermolecular contacts in a crystal system of the given molecule. Arguments --------- vdw_mult: float Multiplicative factor for the cutoff distance for vdw type contacts. A cutoff value of 0.85 is well supported by statistical analysis for these types of contacts. """ # Get pair_key_matrix and construct initial vdw cutoff pair_key_matrix = construct_pair_keys(self.ele) cutoff_matrix = np.zeros(pair_key_matrix.shape) for index, value in np.ndenumerate(pair_key_matrix): cutoff_matrix[index] = intermolecular_dist["vdw"][value] cutoff_matrix = cutoff_matrix * vdw_mult # Add hydrogen bond cutoff distances donor_idx, acceptor_idx = self._get_hydrogen_bond_idx() gout.emit(f"Donor atom indices = {donor_idx}") gout.emit(f"Acceptor atom indices = {acceptor_idx}") if len(donor_idx) > 0: acceptor_ele = self.ele[acceptor_idx] donor_ele = self.ele[np.concatenate(self.bonding_list[donor_idx])] hbond_key = np.char.add(donor_ele, "H-") self.hbond_key = np.char.add(hbond_key[:, None], acceptor_ele) hbond_values = np.zeros(self.hbond_key.shape) for index, value in np.ndenumerate(self.hbond_key): hbond_values[index] = intermolecular_dist["h_bond"][value] # Create pairwise index grid for indexing into cutoff_matrix ixgrid1 = np.ix_(donor_idx, acceptor_idx) ixgrid2 = np.ix_(acceptor_idx, donor_idx) cutoff_matrix[ixgrid1] = hbond_values cutoff_matrix[ixgrid2] = hbond_values.T return cutoff_matrix
[docs] def get_cutoff_matrix_vdw(self, vdw_mult=0.85): """ Returns NxN matrix expressing a cutoff distance between intermolecular contacts in a crystal system of the given molecule. Only uses vdw contact distances. Arguments --------- vdw_mult: float Multiplicative factor for the cutoff distance for vdw type contacts. A cutoff value of 0.85 is well supported by statistical analysis for these types of contacts. """ # Get pair_key_matrix and construct initial vdw cutoff pair_key_matrix = construct_pair_keys(self.ele) cutoff_matrix = np.zeros(pair_key_matrix.shape) for index, value in np.ndenumerate(pair_key_matrix): cutoff_matrix[index] = intermolecular_dist["vdw"][value] cutoff_matrix = cutoff_matrix * vdw_mult return cutoff_matrix
[docs] def get_crystal_cutoff_matrix(self, nmpc, vdw_mult=0.85): """ Copies the intermolecular distance matrix from MoleculeBonding.get_cutoff_matrix into the correct size for a specific number of molecules in the unit cell. """ cutoff_matrix = self.get_cutoff_matrix(vdw_mult=vdw_mult) return np.tile(cutoff_matrix, (nmpc, nmpc))
def _get_bonding(self, atoms, natural_cutoff_mult=1, skin=0): # Use ASE neighborlist class to identify bonding of atoms cutOff = natural_cutoffs(atoms, mult=natural_cutoff_mult) neighborList = NeighborList( cutOff, self_interaction=False, bothways=True, skin=0 ) neighborList.update(atoms) # Construct bonding list indexed by atom in struct bonding_list = [[] for x in range(len(atoms))] for i in range(len(atoms)): bonding_list[i] = neighborList.get_neighbors(i)[0] bonding_list = np.array(bonding_list, dtype=object) return bonding_list def _get_hydrogen_bond_idx(self): """ For all hydrogen atoms in the system, identify the index of any which can participate in hydrogen. This is defined as hydrogens which are bonded to polar elements: nitrogen, oxygen. Returns a list of indices for donors and acceptors of hydrogen bonds """ self.donor_idx = [] self.acceptor_idx = [] for i, ele in enumerate(self.ele): # Donors definition if ele == "H": bond_idx = self.bonding_list[i] elements = self.ele[bond_idx] for h_ele in self.donor_elements: if h_ele in elements: self.donor_idx.append(i) break # Acceptor definition elif ele in self.acceptor_elements: bonding = self.bonding_list[i] # Check for terminal oxygen or bridging oxygen if ele == "O": if len(bonding) == 1: self.acceptor_idx.append(i) elif len(bonding) == 2: bond_ele = self.ele[bonding] unique_ele = np.unique(bond_ele) # C-O-C if len(unique_ele) == 1 and unique_ele[0] == "C": self.acceptor_idx.append(i) # H-O-H elif len(unique_ele) == 1 and unique_ele[0] == "H": self.acceptor_idx.append(i) # R-O-H elif "H" in bond_ele: self.acceptor_idx.append(i) # Check for terminal nitrogen if ele == "N": if len(bonding) <= 2: self.acceptor_idx.append(i) return self.donor_idx, self.acceptor_idx
[docs] class BondNeighborhood: """ Returns the bonding neighborhood of each atom for a structure. User is allowed to define a radius that the algorithm traverses to build the neighborhood for each atom. If the radius is 0, this would correspond to just return the atoms im the system. Arguments --------- radius: int Radius is the number of edges the model is allowed to traverse on the graph representation of the molecule. mb_kwargs: dict Keyword arguments for the molecule bonding module. The default values are the recommended settings. A user may potentially want to decrease the natural_cutoff_mult. This value is multiplied by covalent bond radii in the MoleculeBonding class. It's highly recommended that the skin value is kept at 0. For more details see ibslib.molecules.MoleculeBonding """
[docs] def __init__(self, radius=1, mb_kwargs={"natural_cutoff_mult": 1.2, "skin": 0}): self.radius = radius self.mb_kwargs = mb_kwargs if radius != 1: raise Exception("Radius greater than 1 not implemented")
[docs] def calc(self, struct): self.struct = struct self.ele = struct.geometry["element"] g = self._build_graph(struct) n = self._calc_neighbors(g) n = self._sort(g, n) fragments = self._construct_fragment_list(n) fragments, count = np.unique(fragments, return_counts=True) return fragments.tolist(), count
def _build_graph(self, struct): """ Builds networkx graph of a structures bonding. """ mb = MoleculeBonding(struct.get_ase_atoms(), **self.mb_kwargs) g = nx.Graph() # Add node to graph for each atom in struct self.ele = struct.geometry["element"] g.add_nodes_from(range(len(self.ele))) # Add edges for i, bond_list in enumerate(mb.bonding_list): [g.add_edge(i, x) for x in bond_list] return g def _calc_neighbors(self, g): """ Calculates neighbors for each node in the graph. Uses the radius which was declared when BondNeighborhood was initialized. Ordering of the neighbors follows: 1. Terminal atom alphabetical 2. Self 3. Bonded atom alphabetical 4. If continuing from bonded group, print terminal groups of the bonded group. 5. If there's ambiguity about which should come next, place in alphabetical order. When the radius is small, there's less ambiguity. If the radius becomes large there will be more. Although, only a radius of 1 is currently implemented. """ neighbors = [[[]] for x in g.nodes] for i, idx_list in enumerate(neighbors): neighbor_list = [x for x in g.adj[i]] neighbor_list.append(i) idx_list[0] += neighbor_list return neighbors def _sort(self, g, neighbor_list): """ Sorts neighborlist according to definition in _calc_neighbors. Only works for a radius of 1. Arguments --------- g: nx.Graph neighbor_list: list of int List of adjacent nodes plus the node itself as i """ sorted_list_final = [[[]] for x in g.nodes] for i, temp in enumerate(neighbor_list): # Preparing things which aren't writting well idx_list = temp[0] current_node = i terminal_groups = [] bonded_groups = [] for idx in idx_list: if g.degree(idx) == 1: terminal_groups.append(idx) else: bonded_groups.append(idx) terminal_ele = self.ele[terminal_groups] alphabet_idx = np.argsort(terminal_ele) terminal_groups = [terminal_groups[x] for x in alphabet_idx] sorted_list = terminal_groups if current_node not in terminal_groups: sorted_list.append(current_node) remove_idx = bonded_groups.index(current_node) del bonded_groups[remove_idx] bonded_ele = self.ele[bonded_groups] alphabet_idx = np.argsort(bonded_ele) bonded_groups = [bonded_groups[x] for x in alphabet_idx] sorted_list += bonded_groups sorted_list_final[i][0] = sorted_list return sorted_list_final def _construct_fragment_list(self, n): fragment_list = [self.struct.geometry["element"][tuple(x)] for x in n] fragment_list = ["".join(x) for x in fragment_list] return fragment_list