Skip to content

Commit

Permalink
Merge pull request #6 from duartegroup/tom
Browse files Browse the repository at this point in the history
Minor improvements
  • Loading branch information
t-young31 authored Sep 16, 2020
2 parents 4ced91b + cded7cc commit 8c2369d
Show file tree
Hide file tree
Showing 22 changed files with 400 additions and 193 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ strings, which can be generated directly from ChemDraw, as
![alt text](cgbind/common/SMILES_generation.png)


To generate a simple Pd<sub>2</sub>L<sub>4</sub> construct and print the geometry geometry
To generate a simple Pd<sub>2</sub>L<sub>4</sub> construct and print the geometry
```python
from cgbind import Linker, Cage

Expand Down
2 changes: 1 addition & 1 deletion cgbind/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from cgbind.cage_subt import CageSubstrateComplex
from cgbind.templates import Template

__version__ = '1.0.1'
__version__ = '1.0.2'

__all__ = ['Config',
'Constants',
Expand Down
52 changes: 33 additions & 19 deletions cgbind/add_substrate.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,18 +14,21 @@

def cage_subst_repulsion_func(cage, substrate, cage_coords, subst_coords, with_attraction=True):
"""
Determine the energy using two-body atom-atom repulsion derived from noble gas dimers where
Determine the energy using two-body atom-atom repulsion derived from noble
gas dimers where
V_rep(r) = exp(- r/b + a)
where a and b are parameters determined by the atom pairs. Parameters are suitable to generate V_rep in kcal mol-1
where a and b are parameters determined by the atom pairs. Parameters are
suitable to generate V_rep in kcal mol-1
:param cage: (Cage object)
:param substrate: (Substrate object)
:param cage_coords: (list(np.ndarray)) Cage coordinates
:param subst_coords: (list(np.ndarray)) Substrate coordinates
:param with_attraction: (bool) do or don't return the energy with a constant attractive term based on the number of
substrate atoms in the structure
:param with_attraction: (bool) do or don't return the energy with a
constant attractive term based on the number of
substrate atoms in the structure
:return: energy: (float) Potential energy (V_rep) in kcal mol-1
"""
Expand All @@ -36,16 +39,18 @@ def cage_subst_repulsion_func(cage, substrate, cage_coords, subst_coords, with_a
sum_vdw_radii = np.add.outer(np.array(cage.vdw_radii),
np.array(substrate.vdw_radii))

# Magic numbers derived from fitting potentials to noble gas dimers and plotting against the sum of vdw radii
# Magic numbers derived from fitting potentials to noble gas dimers and
# plotting against the sum of vdw radii
b_mat = 0.083214 * sum_vdw_radii - 0.003768
a_mat = 11.576415 * (0.175541 * sum_vdw_radii + 0.316642)
exponent_mat = -(dist_mat / b_mat) + a_mat

energy_mat = np.exp(exponent_mat)
energy = np.sum(energy_mat)

# E is negative for favourable binding but this is a purely repulsive function so subtract a number..
# which is determined from the best classifier for 102 binding affinities (see cgbind paper) 0.4 kcal mol-1
# E is negative for favourable binding but this is a purely repulsive
# function so subtract a number.. which is determined from the best
# classifier for 102 binding affinities (see cgbind paper) 0.4 kcal mol-1
if with_attraction:
return energy - 0.4 * substrate.n_atoms

Expand All @@ -54,9 +59,10 @@ def cage_subst_repulsion_func(cage, substrate, cage_coords, subst_coords, with_a

def cage_subst_repulsion_and_electrostatic_func(cage, substrate, cage_coords, subst_coords):
"""
Determine the energy of adding a substrate to a cage based on V_rep + V_att where the attractive term
is electrostatic and uses the sum of q_i q_j / r_ij interaction energies where q_i is the partial atomic
charge on atom i.
Determine the energy of adding a substrate to a cage based on V_rep + V_att
where the attractive term is electrostatic and uses the sum of
q_i q_j / r_ij interaction energies where q_i is the partial atomic charge
on atom i.
:param cage: (Cage object)
:param substrate: (Substrate object)
Expand All @@ -81,17 +87,20 @@ def cage_subst_repulsion_and_electrostatic_func(cage, substrate, cage_coords, su

def add_substrate_com(cagesubt):
"""
Add a substrate the centre of a cage defined by its centre of mass (com) will minimise the energy with respect to
rotation of the substrate and the substrate conformer using cagesubt.energy_func. Will rotate cagesubt.n_init_geom
Add a substrate the centre of a cage defined by its centre of mass (com)
will minimise the energy with respect to rotation of the substrate and the
substrate conformer using cagesubt.energy_func. Will rotate cagesubt.n_init_geom
times and use cagesubt.n_subst_confs number of substrate conformers
:param cagesubt: (CageSubstrateComplex object)
:return: xyzs: (list(list))
"""
logger.info(f'Adding substrate to the cage COM and minimising the energy with {cagesubt.energy_func.__name__}')
logger.info(f'Adding substrate to the cage COM and minimising the energy '
f'with {cagesubt.energy_func.__name__}')

# Minimum energy initialisation and the x parameter array (angles to rotate about the x, y, z axes)
# Minimum energy initialisation and the x parameter array (angles to
# rotate about the x, y, z axes)
min_energy, curr_x = 9999999999.9, np.zeros(3)

# Optimum (minimum energy) conformer
Expand All @@ -117,9 +126,11 @@ def add_substrate_com(cagesubt):
for _ in range(cagesubt.n_init_geom):
rot_angles = 2.0 * np.pi * np.random.rand(3) # rand generates in [0, 1] so multiply with

# Minimise the energy with a BFGS minimiser supporting bounds on the values (rotation is periodic)
# Minimise the energy with a BFGS minimiser supporting bounds on
# the values (rotation is periodic)
result = minimize(get_energy, x0=np.array(rot_angles),
args=(c, s, cagesubt.energy_func, cage_coords, subst_coords), method='L-BFGS-B',
args=(c, s, cagesubt.energy_func, cage_coords, subst_coords),
method='L-BFGS-B',
bounds=Bounds(lb=0.0, ub=2*np.pi), tol=0.01)

energy = result.fun
Expand Down Expand Up @@ -160,7 +171,8 @@ def get_centered_substrate_coords(substrate):

def cat_cage_subst_coords(cage, substrate, cage_coords, substrate_coords):
"""
Concatenate some coordinates into a set of xyzs by adding back the atom labels from the original xyzs
Concatenate some coordinates into a set of xyzs by adding back the atom
labels from the original xyzs
:param cage:
:param substrate:
Expand All @@ -178,7 +190,8 @@ def cat_cage_subst_coords(cage, substrate, cage_coords, substrate_coords):


def get_rotated_subst_coords(x, subst_coords):
"""Get substrate coordinates that have been rotated by x[0] radians in the x axis etc."""
"""Get substrate coordinates that have been rotated by x[0] radians in the
x axis etc."""

x_rot, y_rot, z_rot = x

Expand All @@ -192,7 +205,8 @@ def get_rotated_subst_coords(x, subst_coords):

def get_energy(x, cage, substrate, energy_func, cage_coords, subst_coords):
"""
Calculate the energy in kcal mol-1 for a particular x, which contains the rotations in x, y, z cartesian directions
Calculate the energy in kcal mol-1 for a particular x, which contains the
rotations in x, y, z cartesian directions
"""
rot_substrate_coords = get_rotated_subst_coords(x, subst_coords)
energy = energy_func(cage, substrate, cage_coords, rot_substrate_coords)
Expand Down
15 changes: 9 additions & 6 deletions cgbind/architectures.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,19 +7,22 @@ class Arch:

def __init__(self, name, n_metals, n_linkers):
"""
Architecture class holding the name (self.name), self.n_metals and self.n_linkers
Architecture class holding the name (self.name), self.n_metals and
self.n_linkers
:ivar self.name: (str)
:ivar self.n_metals: (int)
:ivar self.n_linkers: (int)
:param name: (str) Name of the architecture
:param n_metals: (int) Number of metals in the architecture. e.g. 2 for M2L4
:param n_linkers: (int) Number of linkers in the architecture. e.g. 4 for M2L4
:param n_metals: (int) Number of metals in the architecture.
e.g. 2 for M2L4
:param n_linkers: (int) Number of linkers in the architecture.
e.g. 4 for M2L4
"""
self.name = name
self.n_metals = n_metals
self.n_linkers = n_linkers
self.name = str(name)
self.n_metals = int(n_metals)
self.n_linkers = int(n_linkers)


archs = []
Expand Down
2 changes: 1 addition & 1 deletion cgbind/atoms.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ def __init__(self, atomic_symbol, x=0.0, y=0.0, z=0.0, coord=None):
'Fr', 'Ra', 'Ac', 'Th', 'Pa', 'U', 'Np', 'Pu', 'Am', 'Cm', 'Bk', 'Cf', 'Es', 'Fm', 'Md', 'No', 'Lr',
'Rf', 'Db', 'Sg', 'Bh', 'Hs', 'Mt', 'Ds', 'Rg', 'Cn', 'Nh', 'Fl', 'Mc', 'Lv', 'Ts', 'Og']

# TODO: Make this less empirical... e.g. tabulate all M-L bond enthalpies using CCSDT//DFT
# TODO: Make this less empirical... e.g. tabulate all M-L bond enthalpies
metals_and_favored_heteroatoms = {
'Pd': ['P', 'N', 'S', 'O', 'F', 'Cl']
}
Expand Down
10 changes: 6 additions & 4 deletions cgbind/bonds.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@ def get_bond_list_from_atoms(atoms, relative_tolerance=0.2):
:param relative_tolerance: (float)
:return: (list(tuple(int)))
"""
logger.info(f'Getting bond list from xyzs. Maximum bond is {1 + relative_tolerance}x average')
logger.info(f'Getting bond list from xyzs. Maximum bond is '
f'{1 + relative_tolerance}x average')

bond_list = []

Expand All @@ -31,16 +32,17 @@ def get_bond_list_from_atoms(atoms, relative_tolerance=0.2):


def get_avg_bond_length(atom_i_label, atom_j_label):
"""Get the average bond length from either a molecule and a bond or two atom labels (e.g. atom_i_label = 'C'
atom_j_label = 'H')"""
"""Get the average bond length from either a molecule and a bond or two
atom labels (e.g. atom_i_label = 'C' atom_j_label = 'H')"""
key1, key2 = atom_i_label + atom_j_label, atom_j_label + atom_i_label

if key1 in avg_bond_lengths.keys():
return avg_bond_lengths[key1]
elif key2 in avg_bond_lengths.keys():
return avg_bond_lengths[key2]
else:
logger.warning(f'Couldn\'t find a default bond length for ({atom_i_label},{atom_j_label})')
logger.warning(f'Couldn\'t find a default bond length for '
f'({atom_i_label},{atom_j_label}) using 1.5 Å')
return 1.5


Expand Down
32 changes: 22 additions & 10 deletions cgbind/build.py
Original file line number Diff line number Diff line change
@@ -1,21 +1,22 @@
from copy import deepcopy
import numpy as np
from scipy.optimize import minimize, Bounds
from scipy.spatial import distance_matrix
from cgbind.log import logger
from cgbind.atoms import Atom
from cgbind.config import Config
from cgbind.geom import get_centered_matrix
from cgbind.geom import get_rot_mat_kabsch
from cgbind.x_motifs import get_shifted_template_x_motif_coords
from cgbind.molecule import BaseStruct
from cgbind.exceptions import CannotBuildCage


def get_fitted_linker_coords_and_cost(linker, template_x_coords, coords_to_fit, curr_coords):
"""
For a linker get the best mapping onto a list of template X coords (e.g. NCN motifs in a pyridyl donor) these will
this can be achieved in normal or reverse order of the coordinates as to maximise the distance to the rest of the
metallocage structure. Also returns a measure of the repulsion to the rest of the cage structure
For a linker get the best mapping onto a list of template X coords
(e.g. NCN motifs in a pyridyl donor) these will this can be achieved in
normal or reverse order of the coordinates as to maximise the distance to
the rest of the metallocage structure. Also returns a measure of the
repulsion to the rest of the cage structure
:param linker: (Linker object)
:param template_x_coords: (list(np.ndarray))
Expand Down Expand Up @@ -59,7 +60,8 @@ def get_kfitted_coords_and_cost(linker, template_x_coords, coords_to_fit, return
:param linker: (Linker object)
:param template_x_coords: (list(np.ndarray))
:param coords_to_fit: (list(np.ndarray)) must have len() = len(linker_template.x_xyzs)
:param return_cost: (bool) return just the cost function, which is the sum of squares of ∆dists
:param return_cost: (bool) return just the cost function, which is the sum
of squares of ∆dists
:return: (np.ndarray) n_atoms x 3
"""
assert len(template_x_coords) == len(coords_to_fit)
Expand Down Expand Up @@ -95,8 +97,9 @@ def get_kfitted_coords_and_cost(linker, template_x_coords, coords_to_fit, return

def get_linker_atoms_and_cost(linker, template_linker, current_atoms, x_coords=None):
"""
Get the xyzs of a linker that is fitted to a template_linker object and the associated cost function – i.e. the
repulsion to the current cage structure
Get the xyzs of a linker that is fitted to a template_linker object and
the associated cost function – i.e. the repulsion to the current cage
structure
:param linker: (Linker)
:param template_linker: (Template.Linker)
Expand Down Expand Up @@ -130,8 +133,9 @@ def get_linker_atoms_and_cost(linker, template_linker, current_atoms, x_coords=N

def cost_fitted_x_motifs(dr, linker, linker_template, x_coords):
"""
For a linker compute the cost function (RMSD) for fitting all the coordinates in the x motifs to a template which
which be shifted by dr in the corresponding shift_vec
For a linker compute the cost function (RMSD) for fitting all the
coordinates in the x motifs to a template which which be shifted by dr in
the corresponding shift_vec
:param linker: (object)
:param linker_template: (object)
Expand Down Expand Up @@ -160,6 +164,7 @@ def build_homoleptic_cage(cage, max_cost):
logger.info(f'Have {len(cage.linkers[0].possibilities)} linkers to fit')

min_cost, best_linker = 99999999.9, None
atoms, cage_cost = [], 99999999.9

# For all the possible linker conformer / Xmotif set possibilities
for linker in cage.linkers[0].possibilities:
Expand Down Expand Up @@ -209,6 +214,13 @@ def build_homoleptic_cage(cage, max_cost):

# Add the metals from the template shifted by dr
for metal in cage.cage_template.metals:

if cage.dr is None:
raise CannotBuildCage('Cage had no shift distance (∆r)')

if metal.shift_vec is None:
raise CannotBuildCage('Template shift vector not defined')

metal_coord = cage.dr * metal.shift_vec / np.linalg.norm(metal.shift_vec) + metal.coord
atoms.append(Atom(cage.metal, coord=metal_coord))

Expand Down
Loading

0 comments on commit 8c2369d

Please sign in to comment.