TEMPLATES¶
%%time
from retropaths.reactions.template import ReactionTemplate
from retropaths.reactions.changes import Changes
from retropaths.reactions.rules import Rules
from retropaths.reactions.conditions import Conditions, Acidity, Solvent
from ipywidgets import interact
from IPython.core.display import HTML
import retropaths.helper_functions as hf
from retropaths.molecules.molecule import Molecule
import itertools
from retropaths.reactions.template_utilities import template_generator, special_cartesian_product
library = hf.pload('../data/reactions.p')
HTML('<script src="//d3js.org/d3.v3.min.js"></script>')
CPU times: user 6.22 s, sys: 625 ms, total: 6.85 s Wall time: 6.99 s
MATCHING GROUPS¶
# The library of templates is built around a matching_group_data object
mg = library.matching
print(mg)
Matching groups loaded: (H+, Hydrogen cation) (O, Water) (HCl, Chloridic Acid) (A1AA1, 3-membered-ring) (A1A=A1, 3-membered cyclopropene) (A1AAA1, 4-membered-ring) (A1AAAA1, 5-membered-ring) (A1A=AA1, A-Cyclobutene) (A1A(ar)AA1, A-Cyclobutene-Aromatic) (a1A=Aa1, A-Cyclobutadiene) (a1A(ar)Aa1, A-Cyclobutadiene) (A1A(ar)A(ar)A1, A-Cyclobutadiene) (A1AA=AA1, A-Cyclopentene) (A1AA(ar)AA1, A-Cyclo-aromatic-5) (A1=AA=AA1, A-Cyclopentadiene) (A1=AA(ar)AA1, A-Cyclopentene-Aromatic) (A1(ar)AA(ar)AA1, A-Cyclo-Double-Aromatic-pentene) (A1(ar)A(ar)AAA1, bridged_meta_tether_2) (A1(ar)A(ar)AA=A1, A-Cyclo-Double-Aromatic-Pentene3) (A1(ar)A(ar)A(ar)AAA1, bridged_para_tether_2) (A1(ar)A(ar)A(ar)AAAA1, bridged_para_tether_3) (A1(ar)A(ar)A(ar)AAAAA1, bridged_para_tether_4) (A1(ar)A(ar)A(ar)AAAAAA1, bridged_para_tether_5) (A1(ar)A(ar)AA1, bridged_meta_tether_1) (A1(ar)A(ar)AAAA1, bridged_meta_tether_3) (A1(ar)A(ar)AAAAA1, bridged_meta_tether_4) (A1(ar)A(ar)AAAAAA1, bridged_meta_tether_5) (A1LA1, 3-membered-ring-L) (LH, Hydrogen) (MeL, Methyl) (PhL, Phenyl) (OHL, Hydroxyl) (FL, Fluorine) (ClL, Chlorine) (BrL, Bromine) (IL, Iodine) (C(L)(=CR2), Alkene-R) ((C=O)[L][R], Carbonyl) (C(=O)(OR)[L], Carboxyl) (C(=O)(OL)[R], Carboxyl-H) (C#N[L], Nitrile) (S([L])[R], Thiolate) (O([L])[R], Alkoxide) (RCOL, Long-Alkoxide) ([N-]=[N+]=N[L], Azide) (NR3L, Ammonia) (PR3L, Phosphine) (PR2L, Phosphine) (NR2L, Amino) (NO2L, Nitrite) ([N+]O2L, Nitronium) (NO3[L], Nitrate) (ONOL, ONO) (NOL, Nitroso) (CONR2L, Amide) (CONRRL, N-Amide) (C=ONR2OL, O-Carbamate) (C=OORNRL, N-Carbamate) ((S=O)[L][R], Sulfoxide) (OSO2[L], Sulfonate) (LSO2R, Sulfonyl) (HSO4[L], Sulfate) (SO4[L], Sulfate) (C(=O)[R][L], Acetyl) (LOSO2[R], Sulfonate) (R, Alkane) (RCL, Long-Alkane) (RCL-, Long-Alkane-(-)) (Ar-6C, Aryl-Phenyl) (ArL, Aryl-Phenyl) (CO-(=C), Enolate) (COH(=C), Enol) (OL-, Alkoxide-) (H2NCOOL, Carbamic Acid) (C(=O)(OL)(OR), Carbonate-Ester) (LP(=O)(OR)2, Phosphonate) (C(=S)(SR)[L], Dithioate) (C(=S)(SL)[R], Dithioate-H) (LOOH, Peroxide) (LOOR, Peroxide) (SO2RNRL, Sulfonamide) ([O+]([H])([H])[L], Water-Lg) (H2OL+, Water-Lg) ([O+]([H])([R])[L], Alcohol) (OR2L+, Ether) (RCOHL+, Long-Protonated-Alkoxide) (C(=O)[O+]([L])[H], Protonated-Carboxyl-H) (C=[O+][L], Protonated-Carbonyl) (C(=[O+][L])N, Protonated-Amide) (C=NL+, Protonated-Iminium) (HL+, H+) (C, Carbon) (C-Aromatic, Aromatic Carbon) (OCZ, Ester-Oxygen) (CZZZL, Carbon) (NZZL, Nitrogen) (OZL, Oxygen) (OHZ, Alcohol) (C(=O)(L)Z, Carbonyl Carbon) (C-Alkene, Alkene Carbon) (C-Imine, Imine Carbon) (C-Alkyne, Alkyne Carbon) (C-Nitrile, Nitrile Carbon) (SZL, Sulfur) (PZL, Phosphorus) (HL, H) (CA3L, A-Aliphatic-Carbon) (CA2L, A-Aromatic-Carbon) (AL-aromatic, L-aromatic) (AL, L-A) (CA2VL, Vinyl-Group) (CA2VL-no-O, Vinyl-Group-No-Oxygen) (CCAL, A-Carbon=Carbon) (COAL, A-Oxygen=Carbon) (CNAL, A-Nitrogen=Carbon) (NA2L, A-Aliphatic-Nitrogen) (OAL, A-Aliphatic-Oxygen) (O([L])[A], A-Alkoxide) (C(=O)(L)A, A-Carbonyl) (L-CCC=O, Gamma-Carbonyl) (L-ArCCC=O, Aromatic-Gamma-Carbonyl) (L-CC=O, Beta-Carbonyl2) (L-CO, Beta-Alcohol) (L-CCO, Gamma-Alcohol) (L-CS, Beta-Alcohol) (L-CCS, Gamma-Thiol) (L-CN, Beta-Amine) (L-CCN, Gamma-Amine) (CX3L, Halocarbon)
# mg.draw(label) <- this draws the single matching group. Let's use an interactive function.
@interact(label=list(mg.data.keys()))
def draw_matching(label):
single_matching_group = mg.data[label]
print(f'Matching group: {single_matching_group}')
return mg.draw(label)
interactive(children=(Dropdown(description='label', options=('H+', 'O', 'HCl', 'A1AA1', 'A1A=A1', 'A1AAA1', 'A…
CONDITIONS¶
cond = Conditions() # I create the default conditions
cond
Acidity all_pH | Solvent any
acid_water = Conditions(pH=Acidity.acid, solvent=Solvent.water) # those one are acid and water.
acid_water
Acidity acid | Solvent water
acid_water.is_compatible(cond) # <- watch out this is NOT COMMUTATIVE. An Acid/Water pot is NOT compatible with an "any" pot.
False
cond.is_compatible(acid_water) # <- the reverse is true, an "any" pot can be Acid/Water
True
MOLECULES with R groups¶
a = Molecule.from_smiles('C1(C2=C(C(N1C)=O)C=CC=C2)=O')
b = Molecule.from_smiles('NN')
mol = a + b
mol.draw(mode='d3')
# We can add R groups using the substitute_groups method. It takes as input a list of substitutions.
reactants = mol.substitute_groups([[12, 'R1'],
[15, 'R2'],
[16, 'R3'],
[17, 'R4'],
[18, 'R15']]) # NOTE: we call one R group 'R15', names do not matter (as long as they're not repeated)
reactants.draw(mode='d3')
RULES¶
rules = Rules() # create the Rules object
rules.add_new_R_pattern_to_enforce_rule('R1', ['HL', 'CA3L'], mg) # Add an enforce for R1 to be HL or CA3L matching groups
rules
Condition to enforce:
R1 = H, A-Aliphatic-Carbon
CHANGES¶
dic_changes = {'delete': [(19, 21), (20, 23), (0, 4), (3, 4)],
'single': [(0, 19), (3, 20), (4, 21), (4, 23)],
'double': [],
'aromatic': [],
'triple': [],
'charges': []}
changes = Changes.from_dict(dic_changes)
changes
Changes:
Bonds:
delete -> [(19, 21), (20, 23), (0, 4), (3, 4)]
single -> [(0, 19), (3, 20), (4, 21), (4, 23)]
# changes can be APPLIED to a molecule.
# NOTE: This function is NOT giving you all the regioisomers!!! This just update the exact bonds that are in the changes.
new_mol = reactants.update_edges_and_charges(changes)
Molecule.draw_list([reactants, new_mol], names=['Reactant molecule', 'Product after application'])
Reactant molecule
Product after application
rt = ReactionTemplate.from_components('Reaction name goes here',
reactants,
dic_changes,
acid_water,
rules=rules,
MG=mg)
rt.draw(size=(400, 400))
Reaction name goes here
References: 0
Condition to enforce:
R1 = H, A-Aliphatic-Carbon
Reactants
Products
# let's take a new molecule now
# NOTE this molecules DOES NOT give you any products, because it breaks the rules. Cl is NOT H nor an aliphatic carbon.
mol = Molecule.from_smiles('NN.C1(C2=C(C(N1CCl)=O)C=CC=C2)=O')
prod = rt.apply_forward(mol)
Molecule.draw_list([mol] + prod, names=['Reactants'] + ['Products']*len(prod))
Reactants
# other Rs substitutions are not restricted, so mol2 actually gets a product
mol2 = Molecule.from_smiles('NN.C1(C2=C(C(N1C)=O)C=C(Cl)C=C2)=O')
prod2 = rt.apply_forward(mol2)
Molecule.draw_list([mol2] + prod2, names=['Reactants'] + [f'Product {i+1}' for i, _ in enumerate(prod2)])
Reactants
Product 1
The save method is very important, because it does some basic tests and precalculates the list of changes.¶
When the template is saved, Rs are remapped, tests are run and regioisomers are calculated.
# if you uncomment and run this cell, the template rt is serialized in the hard disk in '.' folder.
# rt.save()
this is what happens when you save.¶
- Rs are remapped
rt.rename_rs_from_map() # Note how R15 is now R5
rt.draw()
Reaction name goes here
References: 0
Condition to enforce:
R1 = H, A-Aliphatic-Carbon
Reactants
Products
Regioisomers precalculation¶
The template is created with a single change object, but all the regioisomers are precalculated when the template is created. Products and changes from products to reactants are calculated as well.
reaction_name = "A new template with some regioselectivity"
conditions = Conditions()
rules = Rules()
a = Molecule.from_smiles('OCCN')
b = Molecule.from_smiles('N(O)(=O)')
p_precursor = (a + b).substitute_groups([[5, 'R1'], [6, 'R2'], [7, 'R3'], [8, 'R4']])
changes_d = {'delete': [(0, 4), (1, 6), (2, 3), (3, 9), (3, 10), (11, 13), (11, 12)], 'single': [(2, 6), (4, 12), (9, 13), (10, 13)], 'double': [(0, 1)], 'aromatic': [], 'triple': [(3, 11)], 'charges': []}
reaction_template = ReactionTemplate.from_components(reaction_name, p_precursor, changes_d, conditions, rules=rules, MG=mg)
reaction_template.draw(mode='d3', size=(300, 300), node_index=True)
# NOTE: the changes_react_to_prod is 2 long!!
# We told the program that R2-C bond should be broken, but the program recognized that this template should apply for R1, too!!!
reaction_template.changes_react_to_prod
[Changes(bonds=BondsChanges(delete=[(0, 4), (1, 6), (2, 3), (3, 9), (3, 10), (11, 13), (11, 12)], single=[(2, 6), (4, 12), (9, 13), (10, 13)], aromatic=[], double=[(0, 1)], triple=[(3, 11)]), charges=ChargeChanges(charges=[])), Changes(bonds=BondsChanges(delete=[(0, 4), (1, 5), (2, 3), (3, 9), (3, 10), (11, 13), (11, 12)], single=[(2, 5), (4, 12), (9, 13), (10, 13)], aromatic=[], double=[(0, 1)], triple=[(3, 11)]), charges=ChargeChanges(charges=[]))]
# From product to reactants, there are 3 identical possibilities. R2, R3 and R4 are identical. Those are also precalculated.
reaction_template.changes_prod_to_react
[Changes(bonds=BondsChanges(delete=[(2, 8), (4, 12), (9, 13), (10, 13), (3, 11)], single=[(0, 4), (1, 8), (2, 3), (3, 9), (3, 10), (11, 12), (0, 1)], aromatic=[], double=[(11, 13)], triple=[]), charges=ChargeChanges(charges=[])), Changes(bonds=BondsChanges(delete=[(2, 6), (4, 12), (9, 13), (10, 13), (3, 11)], single=[(0, 4), (1, 6), (2, 3), (3, 9), (3, 10), (11, 12), (0, 1)], aromatic=[], double=[(11, 13)], triple=[]), charges=ChargeChanges(charges=[])), Changes(bonds=BondsChanges(delete=[(2, 7), (4, 12), (9, 13), (10, 13), (3, 11)], single=[(0, 4), (1, 7), (2, 3), (3, 9), (3, 10), (11, 12), (0, 1)], aromatic=[], double=[(11, 13)], triple=[]), charges=ChargeChanges(charges=[]))]
DEBUG¶
# this is what happens when you try to save an invalid reaction template.
# NOTE, the "empty changes teplate" would apply infinite times.
dic_changes2 = {'delete': [],
'single': [],
'double': [],
'aromatic': [],
'triple': [],
'charges': []} # <- Empty Changes
rt = ReactionTemplate.from_components('Reaction_name_goes_here', reactants, dic_changes2, acid_water, rules=rules, MG=mg)
try:
rt.default_test() # <- This will fail, because Products and reactants are the same molecules.
except AssertionError as e:
raise hf.StopExecution(e) # <- suppresses verbose output.
dic_changes2 = {'delete': [(1, 2)],
'single': [],
'double': [],
'aromatic': [],
'triple': [],
'charges': []} # <- Not empty anymore
rt = ReactionTemplate.from_components('Reaction_name_goes_here', reactants, dic_changes2, acid_water, rules=rules, MG=mg) # <- this would be a stupid template
rt.products.nodes[10]['charge'] = 100 # let's mess with the total charge.
rt.save() # <- nah, I will not save this!
Basic test failed. Template Reaction_name_goes_here has been NOT written to disc.
# Identical Rs MUST have the same rules applied to them. If this is not the case, it means that the template should be changed!!
reaction_name = "A_new_template_with_some_regioselectivity"
conditions = Conditions()
rules = Rules()
# R1 and R2 are identical R groups, but I add a restriction Rule ONLY to R1 !
rules.add_new_R_pattern_to_enforce_rule('R1', ['HL', 'CA3L'], mg)
a = Molecule.from_smiles('OCCN')
b = Molecule.from_smiles('N(O)(=O)')
p_precursor = (a + b).substitute_groups([[5, 'R1'], [6, 'R2'], [7, 'R3'], [8, 'R4']])
changes_d = {'delete': [(0, 4), (1, 6), (2, 3), (3, 9), (3, 10), (11, 13), (11, 12)],
'single': [(2, 6), (4, 12), (9, 13), (10, 13)],
'double': [(0, 1)],
'aromatic': [],
'triple': [(3, 11)],
'charges': [],
}
reaction_template = ReactionTemplate.from_components(reaction_name, p_precursor, changes_d, conditions, rules=rules, MG=mg)
reaction_template.draw(size=(600,600)) # NOTE: The program WILL temporary create the template, so it is possible to visualize and change it
A_new_template_with_some_regioselectivity
References: 0
Condition to enforce:
R1 = H, A-Aliphatic-Carbon
Reactants
Products
# but, you guess it, this template will NOT be saved until it passes all tests.
try:
reaction_template.save()
except AssertionError as e:
raise hf.StopExecution(e)
Template Generator¶
# First create the template with special characters like "Nu" and "Lg"
reaction_name = "Example Template Generator"
doi_list = ['http://www1.biologie.uni-hamburg.de/b-online/library/newton/Chy251_253/Lectures/Sn2LeavingGroups/Sn2LeavingGroups.html',
'https://chem.libretexts.org/Bookshelves/Organic_Chemistry/Book%3A_Organic_Chemistry_-_A_Carbonyl_Early_Approach_(McMichael)/01%3A_Chapters/1.24%3A_Nucleophilic_Substitution_SN2_SN1',
'http://www1.biologie.uni-hamburg.de/b-online/library/newton/Chy251_253/Lectures/Sn2LeavingGroups/Sn2LeavingGroups.html',
]
conditions_thing = {'temperature': 3,
'light': False,
'pH': 4,
'solvent': Solvent.no_water,
'catalyst': '',
'doi': doi_list}
conditions = Conditions(**conditions_thing)
a = Molecule.from_smiles('[HH]') # electrophile
b = Molecule.from_smiles("[HH]") # nucleophile
p_without_r = b + a
p1_precursor = p_without_r.substitute_groups([[1, 'Nu'], [3, 'Lg'], [2, 'R1']])
changes_d = {'delete': [(2, 3), (0, 1)],
'single': [(0, 3), (1, 2)],
'double': [],
'aromatic': [],
'triple': [],
'charges': []}
rules = Rules()
prova = ReactionTemplate.from_components(reaction_name, p1_precursor, changes_d, conditions, rules=rules, MG=mg)
prova.draw(size=(300, 600), charges=True, node_index=True)
# Now define lists of matching groups for those special characters
Lg = ['IL', 'BrL', 'ClL', 'LOSO2[R]']
Nu = ['C#N[L]', 'S([L])[R]', 'NR2L', 'O([L])[R]', 'OHL']
product = list(itertools.product(Lg, Nu))
# Can define a reactivity order e.g. upper diagonal of matrix
combos = special_cartesian_product(product)
bs = ''
repl_list = [[('Lg', combo[0]), ('Nu', combo[1])] for combo in combos if combo[0] != combo[1]]
changes = Changes.from_dict(changes_d)
a = template_generator(p1_precursor, changes, repl_list, mg)
for (lg, nu), name, p1, changes in zip(repl_list, *a):
conditions.solvent = Solvent.no_water
full_name = reaction_name + '-' + name
rules = Rules.create_default_rules_for_molecule(p1, mg, allow_aromatic=True)
rules.add_new_R_pattern_to_enforce_rule('R1', ['CA3L'], mg)
if nu[1] == 'S([L])[R]':
rules.add_new_R_pattern_to_enforce_rule('R16', ['CA3L', 'CA2L'], mg)
# more rules can be added here
prova = ReactionTemplate.from_components(full_name, p1, changes.to_dict(), conditions, rules=rules, MG=mg)
# prova.save(folder=saving_folder)
bs += prova.draw(size=(400, 400), mode='d3', node_index=True, charges=True, string_mode=True)
HTML(bs)
Example Template Generator-Lg-Iodine-and-Nu-Nitrile
Condition to enforce:
R1 = A-Aliphatic-Carbon
Reactants
Products
Example Template Generator-Lg-Iodine-and-Nu-Thiolate
Condition to enforce:
R1 = A-Aliphatic-Carbon
R16 = A-Aliphatic-Carbon, A-Aromatic-Carbon
Reactants
Products
Example Template Generator-Lg-Iodine-and-Nu-Amino
Condition to enforce:
R1 = A-Aliphatic-Carbon
R21 = H, A-Aliphatic-Carbon, A-Aromatic-Carbon
R22 = H, A-Aliphatic-Carbon, A-Aromatic-Carbon
Reactants
Products
Example Template Generator-Lg-Iodine-and-Nu-Alkoxide
Condition to enforce:
R1 = A-Aliphatic-Carbon
R17 = H, A-Aliphatic-Carbon, A-Aromatic-Carbon
Reactants
Products
Example Template Generator-Lg-Iodine-and-Nu-Hydroxyl
Condition to enforce:
R1 = A-Aliphatic-Carbon
Reactants
Products
Example Template Generator-Lg-Bromine-and-Nu-Nitrile
Condition to enforce:
R1 = A-Aliphatic-Carbon
Reactants
Products
Example Template Generator-Lg-Bromine-and-Nu-Thiolate
Condition to enforce:
R1 = A-Aliphatic-Carbon
R16 = A-Aliphatic-Carbon, A-Aromatic-Carbon
Reactants
Products
Example Template Generator-Lg-Bromine-and-Nu-Amino
Condition to enforce:
R1 = A-Aliphatic-Carbon
R21 = H, A-Aliphatic-Carbon, A-Aromatic-Carbon
R22 = H, A-Aliphatic-Carbon, A-Aromatic-Carbon
Reactants
Products
Example Template Generator-Lg-Bromine-and-Nu-Alkoxide
Condition to enforce:
R1 = A-Aliphatic-Carbon
R17 = H, A-Aliphatic-Carbon, A-Aromatic-Carbon
Reactants
Products
Example Template Generator-Lg-Bromine-and-Nu-Hydroxyl
Condition to enforce:
R1 = A-Aliphatic-Carbon
Reactants
Products
Example Template Generator-Lg-Chlorine-and-Nu-Nitrile
Condition to enforce:
R1 = A-Aliphatic-Carbon
Reactants
Products
Example Template Generator-Lg-Chlorine-and-Nu-Thiolate
Condition to enforce:
R1 = A-Aliphatic-Carbon
R16 = A-Aliphatic-Carbon, A-Aromatic-Carbon
Reactants
Products
Example Template Generator-Lg-Chlorine-and-Nu-Amino
Condition to enforce:
R1 = A-Aliphatic-Carbon
R21 = H, A-Aliphatic-Carbon, A-Aromatic-Carbon
R22 = H, A-Aliphatic-Carbon, A-Aromatic-Carbon
Reactants
Products
Example Template Generator-Lg-Chlorine-and-Nu-Alkoxide
Condition to enforce:
R1 = A-Aliphatic-Carbon
R17 = H, A-Aliphatic-Carbon, A-Aromatic-Carbon
Reactants
Products
Example Template Generator-Lg-Chlorine-and-Nu-Hydroxyl
Condition to enforce:
R1 = A-Aliphatic-Carbon
Reactants
Products
Example Template Generator-Lg-Sulfonate-and-Nu-Nitrile
Condition to enforce:
R1 = A-Aliphatic-Carbon
R23 = H, A-Aliphatic-Carbon, A-Aromatic-Carbon
Reactants
Products
Example Template Generator-Lg-Sulfonate-and-Nu-Thiolate
Condition to enforce:
R1 = A-Aliphatic-Carbon
R16 = A-Aliphatic-Carbon, A-Aromatic-Carbon
R23 = H, A-Aliphatic-Carbon, A-Aromatic-Carbon
Reactants
Products
Example Template Generator-Lg-Sulfonate-and-Nu-Amino
Condition to enforce:
R1 = A-Aliphatic-Carbon
R21 = H, A-Aliphatic-Carbon, A-Aromatic-Carbon
R22 = H, A-Aliphatic-Carbon, A-Aromatic-Carbon
R23 = H, A-Aliphatic-Carbon, A-Aromatic-Carbon
Reactants
Products
Example Template Generator-Lg-Sulfonate-and-Nu-Alkoxide
Condition to enforce:
R1 = A-Aliphatic-Carbon
R17 = H, A-Aliphatic-Carbon, A-Aromatic-Carbon
R23 = H, A-Aliphatic-Carbon, A-Aromatic-Carbon
Reactants
Products
Example Template Generator-Lg-Sulfonate-and-Nu-Hydroxyl
Condition to enforce:
R1 = A-Aliphatic-Carbon
R23 = H, A-Aliphatic-Carbon, A-Aromatic-Carbon
Reactants
Products