Molecule

This is the main file of the molecule object

`Molecule`

Bases: nx.Graph

Basic class for manipulating molecules as graphs. Molecules are undirected graphs with node attribute 'element', being the atom type and sometimes a unique integer id.

Source code in retropaths/molecules/molecule.py

class Molecule(nx.Graph):
    """
    Basic class for manipulating molecules as graphs.
    Molecules are undirected graphs with node attribute 'element', being the atom type and sometimes a unique integer id.
    """

    def __init__(self, name: str = '', smi: str = ''):
        """
        Constructs a molecule as a subclass of networkx Graph type

        Args:
            name (list): a name for the molecule
            smi (list): stores the smiles so it does not need to be recalculated

        Returns:
            molecule (Molecule): The Molecule Object
        """

        self.chemical_name = name
        self._smiles = smi
        super(Molecule, self).__init__()

    def __str__(self):
        """
        This just prints out the debug string for a networkx graph
        """
        edge_data = self.edges.data()
        return f"nodes = {self.nodes.data()}\nedges = {edge_data}\n"

    @classmethod
    def from_debug_string(cls, nodes: list, edges: list) -> Molecule:
        """
        So you can copy paste and parse a molecule printed by the __str__() method

        Args:
            nodes: the nodes list printed by str()
            edges: the edges list printed by str()

        Returns:
            molecule: The Molecule Object
        """
        mol = cls()
        for node, dictionary in nodes:
            mol.add_node(node, **dictionary)

        for a, b, bo in edges:
            mol.add_edge(a, b, **bo)
        mol.set_neighbors()
        return mol

    def __repr__(self):
        rep = None
        if self.chemical_name != "":
            rep = self.chemical_name
        elif self._smiles != "":
            rep = self.smiles
        else:
            rep = self.__str__()
        return rep

    def replace_A_with_element_matching(self) -> Molecule:
        """returns a new molecule with A atoms replaced by one of its contained atoms

        Returns:
            Molecule: a molecule object
        """
        g2 = self.copy()
        for x in self.nodes:
            if self.nodes[x]["element"] == "A":
                g2.nodes[x]["element"] = self.nodes[x]["element_matching"][0]
        return g2

    def get_bond_changes(self, mol: Molecule) -> BondChanges:
        """This function is fault of Jan Estrada

        Args:
            mol (Molecule): the molecule which has bond changes with respect to self

        Returns:
            BondChanges: the bond changes between self and mol
        """

        def is_in_it(list_edges, edge):
            sorted_list = [sorted(x) for x in list_edges]
            return sorted(edge) in sorted_list
        forming = [edge for edge in mol.edges if not is_in_it(self.edges, edge)]
        breaking = [edge for edge in self.edges if not is_in_it(mol.edges, edge)]
        return BondChanges(forming, breaking)

    @property
    def molecular_formula(self) -> MolecularFormula:
        """Generates the molecular formula of the self molecule

        Returns:
            MolecularFormula: this is a molecular formula object
        """
        return MolecularFormula.from_list_of_elements(self.list_of_elements())

    @classmethod
    def from_rdmol(cls, rdmol, smi, name=None):
        new_mol = cls(name=name, smi=smi)
        assert isinstance(rdmol, Chem.rdchem.Mol), "rdmol must be Rdkit molecule"

        # atom_list = [(x.GetTotalNumHs(), x.GetAtomicNum()) for x in rdmol.GetAtoms()]
        atom_list = [(atom.GetAtomicNum(), atom.GetFormalCharge(), atom.GetTotalNumHs()) for atom in rdmol.GetAtoms()]
        edge_list = [(x.GetEndAtomIdx(), x.GetBeginAtomIdx(), x.GetBondTypeAsDouble()) for x in rdmol.GetBonds()]
        [new_mol.add_node(i, neighbors=0, element=from_number_to_element(x), charge=y) for i, (x, y, _) in enumerate(atom_list)]
        # [new_mol.add_node(i, neighbors=0, element=from_number_to_element(y), charge=0) for i, (x, y) in enumerate(atom_list)]
        [new_mol.add_edge(i, j, bond_order=bond_ord_number_to_string(k)) for i, j, k in edge_list]

        # # now adding the hydrogens
        non_hydrogen_atoms = len(new_mol)
        indexes_of_hydrogens = non_hydrogen_atoms

        for i in range(non_hydrogen_atoms):
            _, _, n_hs = atom_list[i]
            j = 0
            while j < n_hs:
                new_mol.add_node(indexes_of_hydrogens, neighbors=0, element='H', charge=0)
                new_mol.add_edge(indexes_of_hydrogens, i, bond_order='single')
                indexes_of_hydrogens += 1
                j += 1
        # the neighbors is a number set to have a better isomorphism.
        new_mol.set_neighbors()

        # Need to create smiles to canonicalize
        new_mol._smiles = new_mol.create_smiles()

        return new_mol

    @classmethod
    def from_oemol(cls, oemol, smi, name=None):
        new_mol = cls(name=name, smi=smi)

        # add in hydrogen atoms
        oechem.OEAddExplicitHydrogens(oemol)

        def set_correct_BO_with_aromaticity(bond):
            return 1.5 if bond.IsAromatic() else bond.GetOrder()

        # get list of atoms and list of bonds
        atom_list = [(atom.GetAtomicNum(), atom.GetFormalCharge()) for atom in oemol.GetAtoms()]
        edge_list = [(bond.GetBgnIdx(), bond.GetEndIdx(), set_correct_BO_with_aromaticity(bond)) for bond in oemol.GetBonds()]

        # adding nodes and edges in the same format as retropaths
        [new_mol.add_node(i, neighbors=0, element=from_number_to_element(y), charge=z) for i, (y, z) in enumerate(atom_list)]
        [new_mol.add_edge(i, j, bond_order=bond_ord_number_to_string(k)) for i, j, k in edge_list]

        # the neighbors is a number set to have a better isomorphism.
        new_mol.set_neighbors()

        # Need to recreate the smiles because order of graphs matters
        new_mol._smiles = new_mol.create_smiles()

        return new_mol

    @classmethod
    def from_smiles(cls, smi: str, name: str = '') -> Molecule:
        """creates a molecule object from a smiles string


        Args:
            smi (str): the SMILES string
            name (str, optional): The common name of the molecule. Defaults to ''.

        Raises:
            ValueError: when the smiles is not valid

        Returns:
            Molecule: a retropath molecule object
        """
        if 'OE_LICENSE' not in os.environ:
            rdmol = Chem.MolFromSmiles(smi)
            new_mol = cls.from_rdmol(rdmol, smi, name)
        else:

            # Use openeye smiles tools to get a canonical smiles string and an OpenEye Molecule instance
            oemol = oechem.OEGraphMol()
            success = oechem.OESmilesToMol(oemol, smi)
            if success:
                smi_canon = oechem.OEMolToSmiles(oemol)
            else:
                raise ValueError("Not a valid SMILES string according to OpenEye:", smi)
            new_mol = cls.from_oemol(oemol, smi_canon, name)

        return new_mol

    @classmethod
    def from_list_of_smiles_to_single_graph(cls, list_of_smiles: list[str]) -> Molecule:
        """this creates a single graph from a list of smiles

        Args:
            list_of_smiles (list[str]): a list of SMILES strings

        Returns:
            Molecule: retropaths molecule object
        """
        assert all([type(x) == str for x in list_of_smiles]), "Hey, I need a list of strings"
        single_graph = cls()
        for molecule in [cls.from_smiles(x) for x in list_of_smiles]:
            single_graph += molecule
        return single_graph

    @classmethod
    def add_list_of_molecules(cls, list_molec: list[Molecule]) -> Molecule:
        """This functions creates a graph from a list of molecules.
        If the list is empty, it will return the empty molecule.

        Args:
            list_molec (list[Molecule]): a list of molecules

        Returns:
            Molecule: retropaths molecule object
        """
        assert type(list_molec)
        final_molecule = Molecule()
        for x in list_molec:
            final_molecule += x
        return final_molecule

    def copy(self, reindex=False, start=0) -> Molecule:
        """Overload the nx.Graph.copy() method to bring the non-networkx attributes across

        Args:
            reindex (bool, optional): convert the node labels to integers starting with start. Defaults to False.
            start (int, optional): initial value for the reindexing. Defaults to 0.

        Returns:
            Molecule: retropaths molecule object
        """
        if reindex:
            # if reindex is true, convert the node labels to integers starting with start
            new_mol = nx.convert_node_labels_to_integers(self, first_label=start)
        else:
            new_mol = super().copy()
        new_mol._smiles = self._smiles
        new_mol.chemical_name = self.chemical_name

        return new_mol

    def update_edges_and_charges(self, changes: Changes) -> Molecule:
        """Apply changes, a dictionary with keys in {delete, single, double, aromatic, triple} and
        values lists of tuple pairs (i, j) which define edges to be removed or added.

        Args:
            changes (Changes): it is a retropath changes object

        Returns:
            Molecule: a molecule with changes applied
        """
        new_graph = self.copy()

        dictio = changes.bonds.dict()

        if "delete" in dictio:
            # if delete edges are specified, remove them
            for i, j in dictio["delete"]:
                new_graph.remove_edge(i, j)

        for bond_order in ["single", "double", "aromatic", "triple"]:
            # add edges to the graph for each bond order in the dictio dictionary, if it exists,
            # since the dictio dictionary is keyed by bond order and the values are lists of tuples of atom indices.
            if bond_order in dictio:
                for i, j in dictio[bond_order]:
                    new_graph.add_edge(i, j, bond_order=bond_order)

        new_graph.set_neighbors()

        for atom_index, change_in_charge in changes.charges.charges:
            # chnages in charges is a delta
            new_graph.nodes[atom_index]["charge"] += change_in_charge

        return new_graph

    def graph_difference(self, subtracted_molecule: Molecule) -> Molecule:
        """This will remove all subgraphs from self matching subtracted molecule

        Args:
            subtracted_molecule (Molecule): a molecule graph

        Returns:
            Molecule: the self molecule graph without subtracted_molecule
        """
        target = self.copy()
        isomorphisms = subtracted_molecule.get_subgraph_isomorphisms_of(target)

        assert len(isomorphisms) > 0, f"The graph difference between {self.force_smiles()} and {subtracted_molecule.force_smiles()} needs a second look."

        this_isomorphism = isomorphisms[0]
        this_iso_remaining_fragments = target.copy()

        # remove the nodes
        for _, value in this_isomorphism.reverse_mapping.items():
            this_iso_remaining_fragments.remove_node(value)

        return this_iso_remaining_fragments

    def graph_difference_with_list_and_duplicates(self, mols: list[Molecule]) -> Molecule:
        '''
        it is a graph difference between a graph and a list of molecules
        Every instance of each molecule in the list is removed from the self
        molecule.
        '''
        new_mol = self.copy()
        for mol in mols:
            while mol.is_subgraph_isomorphic_to(new_mol):
                new_mol = new_mol.graph_difference(mol)
        return new_mol

    def __add__(self, mol2: Molecule) -> Molecule:
        """when you do: mol1 + mol2
        it creates a single graph with both molecules

        **NOTE**: atoms will be renumbered by this operation !

        Args:
            mol2 (Molecule): a molecule

        Returns:
            Molecule: a molecule that is the sum of self and mol2
        """

        mol1 = self

        n1 = len(mol1)
        n2 = len(mol2)
        # create a mapping between nodes in mol1 and nodes in mol2,
        mapping1 = dict(zip(mol1.nodes(), range(0, n1)))
        mapping2 = dict(zip(mol2.nodes(), range(n1, n1 + n2)))

        # relabel nodes to match the mapping dictionary
        mol1 = nx.relabel_nodes(mol1, mapping1)
        mol2 = nx.relabel_nodes(mol2, mapping2)
        new_mol = nx.compose(mol1, mol2)

        return new_mol

    def __eq__(self, other_molecule) -> bool:
        """Equality without hydrogens is waaay faster. Also, hydrogens are always a terminal.
        But the problem comes when H-H or H+ get to do the isomorphism. In this case I do NOT want to collapse them.

        Args:
            other_molecule: a molecule object

        Returns:
            bool: returns if mols are equals (isomorphic).
        """
        if not isinstance(other_molecule, Molecule):
            raise NotImplementedError

        if len(self) < 3 or len(other_molecule) < 3:
            first = self
            second = other_molecule
        else:
            first = self.remove_Hs()
            second = other_molecule.remove_Hs()
        GM = SubGraphMatcher(first)
        is_this_equal = GM.is_isomorphic(second)
        return is_this_equal

    def __mul__(self, other: int) -> Molecule:
        """In case one wants more of the same molecule

        Args:
            other (int): how many molecules

        Returns:
            Molecule: a retropaths molecule
        """
        assert isinstance(other, int)
        final = self.__class__()
        for _ in range(other):
            final += self
        return final

    def draw(self,
             mode="rdkit",
             size=None,
             string_mode=False,
             node_index=True,
             percentage=None,
             force=None,
             fixed_bond_length=None,
             fixedScale=None,
             fontSize=None,
             lineWidth=None,
             charges=True,
             neighbors=False):
        """
        Draw the graph, mode can be 'd3' for interactive force directed graphs or 'rdkit' or 'oe' for chemdraw style images
        """

        if mode == "d3":
            size = size or (500, 500)
            G = self.copy()
            # G = nx.convert_node_labels_to_integers(G)
            nodes, links = molecule_to_d3json(G, node_index, charges=charges, neighbors=neighbors)
            return draw_d3(nodes, links, size=size, string_mode=string_mode, percentage=percentage, force_layout_charge=force)

        elif mode == "rdkit":
            size = size or (300, 300)
            d = {
                "Sg": "R1",
                "Rf": "R2",
                "Ne": "R3",
                "Ar": "R4",
                "Kr": "R5",
                "Ru": "R6",
                "Rn": "R7",
                "Og": "R8",
                "Fr": "R9",
                "At": "R10",
                "Db": "R11",
                "Hs": "R12",
                "Bh": "R13",
                "Mt": "R14",
                "Rg": "R15",
                "Cn": "R16",
                "Hf": "R17",
                "U": "R18",
                "W": "R19",
                "Pu": "R20",
                "Am": "R21",
                "Cm": "R22",
            }
            smiles = self.replace_r_with_wild_names(d).force_smiles()
            svg_str = moldrawsvg(smiles, d, molSize=size, fixed_bond_length=fixed_bond_length, fixedScale=fixedScale, fontSize=fontSize, lineWidth=lineWidth)
            if string_mode:
                return svg_str
            else:
                return SVG(svg_str)

        elif mode == "oe":
            width, height = 400, 400

            mol = oechem.OEGraphMol()
            oechem.OESmilesToMol(mol, self.smiles)
            oedepict.OEPrepareDepiction(mol)

            opts = oedepict.OE2DMolDisplayOptions(width, height, oedepict.OEScale_AutoScale)
            opts.SetMargins(10)
            disp = oedepict.OE2DMolDisplay(mol, opts)

            font = oedepict.OEFont(oedepict.OEFontFamily_Default, oedepict.OEFontStyle_Default, 12,
                                   oedepict.OEAlignment_Center, oechem.OEDarkRed)

            for adisp in disp.GetAtomDisplays():
                atom = adisp.GetAtom()
                toggletext = f"{atom.GetIdx()}"
                oedepict.OEDrawSVGToggleText(disp, adisp, toggletext, font)

            ofs = oechem.oeosstream()
            oedepict.OERenderMolecule(ofs, "svg", disp)
            string = ofs.str()

            sss = f'<div style="width: {100}%; display: table;"> <div style="display: table-row;">'
            sss += f'{string.decode()}</div></div>'
            if string_mode:
                return sss
            else:
                return HTML(sss)
        else:
            raise ValueError(f'mode must be one of "oe", "d3" or "rdkit", received {mode}')

    def to_svg(self, folder=Path("."), file_name=None):
        if file_name is None:
            file_name = f'{self.force_smiles()}.svg'
        full_path = folder / file_name
        with open(full_path, "w") as f:
            f.write(self.draw(mode="rdkit", string_mode=True))

    def to_png(self, folder=Path("."), file_name=None):
        if file_name is None:
            file_name = f'{self.force_smiles()}.png'
        full_path = folder / file_name
        full_path = str(full_path)
        svg_code = self.draw(mode="rdkit", string_mode=True)
        svg2png(bytestring=svg_code, write_to=full_path)

    @staticmethod
    def draw_list_smiles(smis, **kwargs):
        mols = [Molecule.from_smiles(smi) for smi in smis]
        return Molecule.draw_list(mols, names=smis, **kwargs)

    @staticmethod
    def draw_list(molecule_list,
                  names=[],
                  mode="rdkit",
                  title="",
                  charges=False,
                  size=(650, 650),
                  width=100,
                  columns=5,
                  fixed_bond_length=None,
                  string_mode=False,
                  node_index=True,
                  neighbors=False,
                  arrows=False,
                  borders=False,
                  display_title=True,
                  arrow_size=2,
                  ):
        """
        Draws a list of molecules
        """
        if len(molecule_list) == 0:
            print("This list is empty")
            molecule_list.append(Molecule())
            names.append("")

        true_mol_len = len(molecule_list)
        while len(molecule_list) < columns:
            molecule_list.append(Molecule())
            names.append("")

        if not fixed_bond_length:
            fixed_bond_length = [None for x in range(len(molecule_list))]
        else:
            len_fb = len(fixed_bond_length)
            if len_fb < len(molecule_list):
                diff = len(molecule_list) - len_fb
                fixed_bond_length = [*fixed_bond_length, *[100 for x in range(diff)]]

        if columns <= len(molecule_list):
            how_many_columns = columns
        else:
            how_many_columns = len(molecule_list)

        cell_width = 100.0 / how_many_columns

        borders_string = 'border: 1px solid black;' if borders else ''
        title_string = f'<h2>{title}</h2>' if display_title else ''

        sstring = f'{title_string}<div style="width: {width}%; display: table;"> <div style="display: table-row;">'

        for i, mol in enumerate(molecule_list):
            if i % how_many_columns == 0:
                sstring += '</div><div style="display: table-row;">'

            try:
                name = f'<p style="text-align: center;">{names[i]}</p>'
            except IndexError:
                name = ""
            this_border_string = borders_string if not mol.is_empty() else ''  # I do not want to draw border on empty molecules.
            sstring += (
                f'<div style="width: {cell_width}%; display: table-cell;{this_border_string}"> \
                {mol.draw(mode=mode, string_mode=True, size=size, fixed_bond_length=fixed_bond_length[i], charges=charges, neighbors=neighbors, node_index=node_index,percentage=0.8)} {name} </div>'
            )
            if arrows and i < true_mol_len - 1:
                # sstring += f'<div style="width: 0%; display: table-cell; vertical-align: middle;"><font size="+{arrow_size}">⟶</font></div>'
                sstring += f'<div style="width: 0%; display: table-cell; vertical-align: middle;"><font size="+{arrow_size}">&#8594;</font></div>'

        sstring += "</div></div>"
        if string_mode:
            return sstring
        else:
            return HTML(sstring)

    def renumber_indexes(self, swaps):
        '''
        Takes a molecule and a mapping {2:3, 3:2, 5:4, 4:6, 6:5}
        and returns the new molecule that has the VALUES of the mapping where the KEYS were
        '''
        mol2 = nx.relabel_nodes(self, swaps)
        return mol2

    def change_element_name(self, lab1, lab2):
        """
        label1 and label2 -> str
        this change the 'element' name
        """
        mol2 = self.copy()
        for node in mol2.nodes():
            label = mol2.nodes[node]["element"]
            if label == lab1:
                mol2.nodes[node]["element"] = lab2
        return mol2

    def get_bond_order(self, first_atom, second_atom):
        '''get the bond orde between two indexes'''
        return self.edges[first_atom, second_atom]["bond_order"]

    def get_element(self, index_atom):
        '''returns the element at one index'''
        return self.nodes[index_atom]["element"]

    def are_R_labels_equivalent(self, first_label, second_label):
        """
        Given two R labels, this test returns if they are equivalent.
        Equivalent here means that they would be treated as identical or symmetric by ismags
        so for instance two Rs single bond connected to the same Carbon are equivalent.
        """
        which_r_groups = self.which_r_groups
        bool1 = first_label in which_r_groups
        bool2 = second_label in which_r_groups
        assert bool1 and bool2, f"Watch out, one of the two labels {first_label} or {second_label} is not in {which_r_groups} in the method are_R_labels_equivalent."
        mol1 = self.change_element_name(first_label, "Q1").change_element_name(second_label, "Q2").qify(number=False)
        mol2 = self.change_element_name(second_label, "Q1").change_element_name(first_label, "Q2").qify(number=False)
        return mol2.is_isomorphic_to(mol1)

    def is_isomorphic_to(self, mol):
        GM = SubGraphMatcher(self)
        return GM.is_isomorphic(mol)

    def largest_common_subgraph(self, mol):
        GM = SubGraphMatcher(self)
        return GM.largest_common_subgraph(mol)

    def is_subgraph_isomorphic_to(self, mol, timeout_seconds=10):
        """
        Compares self to g and returns True if self is isomorphic to a subgraph of g
        a.is_subgraph_isomorphic_to(b)
        means that a is a subgraph of b
        """
        GM = SubGraphMatcher(mol, timeout_seconds=timeout_seconds)
        boolean = GM.is_subgraph_isomorphic(self.remove_r_groups())
        return boolean

    def get_subgraph_isomorphisms_of(self, target, verbosity=0) -> list[IsomorphismMappings]:
        """
        a.get_subgraph_isomorphisms_of(b)
        gives the isomorphic map of A being a subgraph of B
        self.get_subgraph_isomorphisms_of(target)
        tells if the template molecule SELF is a subgraph of TARGET molecule
        """
        GM = SubGraphMatcher(target, verbosity=verbosity)
        isoms = GM.get_subgraph_isomorphisms(self.remove_r_groups())
        return [IsomorphismMappings(x) for x in isoms]

    def create_smiles(self):
        smiles = ".".join(sorted([graph_to_smiles(x) for x in self.separate_graph_in_pieces()]))
        return smiles

    def which_atoms_are_in(self):
        """returns a list of unique elements"""
        return {self.nodes[x]["element"] for x in self.nodes}

    def list_of_elements(self):
        """returns a list of elements"""
        return [self.nodes[x]["element"] for x in self.nodes]

    def is_empty(self):
        """method to check if the molecule graph is empty or not"""
        return len(self.nodes) == 0

    def separate_graph_in_pieces(self) -> list[Molecule]:
        """
        this function returns a list of single connected graphs.
        """
        return list((self.subgraph(x).copy() for x in nx.connected_components(self)))

    def deduplicate_single_graph(self):
        '''
        This takes a molecule graph and returns a single graph with no duplicates.
        '''
        return Molecule.add_list_of_molecules(self.delete_duplicates(self.separate_graph_in_pieces()))

    def give_me_free_index(self):
        return give_me_free_index(naturals(0), self)

    def substitute_group(self, i, label, cut=None, copy=True, force_bond_order="single", charge=0, element_matching=None):
        """
        It will insert into the molecule a group labeled "label" at node index i, disconnecting it from "exit"
        i :: int <- the node to be replaced.
        cut :: (int,int) <- This selects an edge to be cut in case there is an ambiguity
        label :: Str <- the label of new node.
        copy :: Bool <- will it creater a new graph or not?
        """
        if copy:
            mol = self.copy()
        else:
            mol = self

        if label == "A":
            assert element_matching is not None, "An A atom must be added with element_matching flag."

        counter = mol.give_me_free_index()
        new_index = next(counter)

        if cut is None:
            how_many_edges_connected = sum([i in x for x in mol.edges])
            if how_many_edges_connected > 1:
                print(f"Removing link {i} is ambiguous on {self}, has it has {how_many_edges_connected} edges. You should define the cut=(x,y) keyword\n\n")
            else:
                # renaming node only
                mol.nodes(data=True)[i]["element"] = f"{label}"
                if element_matching:
                    mol.nodes(data=True)[i]["element_matching"] = element_matching

        else:

            # I write this bullshit here, because I want the user to be able to write the edges in whatever order
            if i == cut[0]:
                attach_node = cut[1]
            else:
                attach_node = cut[0]

            mol.add_node(new_index, neighbors=0, element=f"{label}", charge=charge, element_matching=element_matching)
            mol.add_edge(new_index, attach_node, bond_order=force_bond_order)
            mol.remove_edge(attach_node, i)
            set_id = nx.node_connected_component(mol, attach_node)
            mol = mol.subgraph(set_id)
            mol.set_neighbors()
        return mol

    def substitute_groups(self, list_of_simple_substitutions, element_matching=None):
        """
        Applies, in order, the changes in list_of_simple_substitutions.
        Each change is a pair (i, s) which causes node i to be replaced with a node with label 's'.
        """
        for i, label in list_of_simple_substitutions:
            self = self.substitute_group(i, label, element_matching=element_matching)
        return self

    def substitute_bond(self, i, j, bond_order="single", copy=True):
        """
        this will change edge type
        copy :: Bool <- will it creater a new graph or not?
        """
        if copy:
            mol = self.copy()
        else:
            mol = self

        mol.remove_edge(i, j)
        mol.add_edge(i, j, bond_order=bond_order)

        return mol

    def substitute_element(self, node_index, element):
        mol = self.copy()
        mol.nodes[node_index]["element"] = element
        return mol

    def remap_element_names(self, mapping):
        """renames the element name given a dictionary of rename mapping"""
        for node_index in self.nodes:
            node_elem = self.nodes[node_index]["element"]
            if node_elem in mapping:
                self.nodes[node_index]["element"] = mapping[node_elem]
        return self

    def remap_element_names_reverse(self, mapping):
        """
        renames the element name given a dictionary of rename mapping
        This is the version with inverted mapping, due to double naming
        mapping can be a dictionary like this.

        {'R1':'R1',
         'R2':'R2',
         'R3':'R17',
         'R4':'R17'}
        """
        mol = self.copy()
        for k, v in mapping.items():
            # I take the first, cody will assume all responsabilities.
            # When we substitute the first R17 with R3, whatever the index is,
            # in the next loop, the list should be one less long, until there is only one
            change_me = [x for x in mol.nodes if mol.nodes[x]["element"] == v][0]
            mol.nodes[change_me]["element"] = k
        return mol

    def remove_r_groups(self):
        """
        If this molecule is a template, remove the R-groups and return a valid molecule fragment
        """
        # make a copy of this molecule graph
        mol = self.copy()

        # remove R-groups if there are any
        if self.is_template:
            r_groups = {k: v for k, v in self.atom_types.items() if "R" in v or "X" in v}
            # r_groups = {k: v for k, v in self.atom_types.items() if 'R' in v}
            for k in r_groups.keys():
                mol.remove_node(k)

        # return a non-template molecule graph
        return mol

    def remove_Hs(self):
        """
        Remove Hydrogens
        """
        # make a copy of this molecule graph
        mol = self.copy()
        r_groups = {k: v for k, v in self.atom_types.items() if "H" in v}
        for k in r_groups.keys():
            mol.remove_node(k)
        return mol

    def remove_fragments(self, remove):
        """
        Remove connected components, molecular fragments, which contain the nodes listed in 'remove'
        remove - an integer or a list of integers
        """
        new_graph = self.copy(self)
        if hasattr(remove, "__iter__"):
            # it is a list of removals
            for single_remove in remove:
                set_id = nx.node_connected_component(new_graph, single_remove)
                new_graph.remove_nodes_from(set_id)

        elif isinstance(remove, int):
            set_id = nx.node_connected_component(new_graph, remove)
            new_graph.remove_nodes_from(set_id)

        return new_graph

    def replace_L_with_R(self):
        """
        This needs to be refactored, CRA used it for polymer generation.
        """
        mol2 = self.copy()
        for node in mol2.nodes():
            label = mol2.nodes[node]["element"]
            if label == "L2":
                mol2.nodes[node]["element"] = "R1"
        return mol2

    def replace_R_groups_with_L_molecule(self, L_molecules):
        """
        This needs to be refactored. CRA used it for polymer generation.
        this will replace the R groups with matching groups
        """
        labels = self.which_r_groups
        # assert len(labels) == len(L_molecules), f"labels {len(labels)} and L_molecules {len(L_molecules)} must be of equal length"
        mol = self.copy()

        # For each R label replace with a matching group from MG_list
        for label, graph in zip(labels, L_molecules):

            mol += graph

            index_list = [x for x in mol.nodes if mol.nodes[x]["element"] == label]
            assert len(index_list) == 1, f"{label} Something strange with duplicate R values (or missing)."
            index = index_list[0]

            molecule_neigh_list = list(mol.neighbors(index))
            assert len(molecule_neigh_list) == 1, "To use this functionality you need to substitute an hydrogen or something that only has one neighbor."
            molecule_attach = molecule_neigh_list[0]
            l_is = [x for x in mol.nodes if mol.nodes[x]["element"] == "L"]
            # assert len(l_is) == 1, "To use this functionality you need to select a group with only one L group."
            index_l = l_is[0]
            index_attach_list = list(mol.neighbors(index_l))
            index_attach = index_attach_list[0]
            mol.remove_node(index_l)
            mol.remove_node(index)
            mol.add_edge(molecule_attach, index_attach, bond_order="single")
            mol.set_neighbors()
        return mol

    def replace_R_groups_with_matching_group(self, MG, MG_list):
        """
        this will replace the R groups with matching groups
        """
        labels = self.which_r_groups
        mg_label_list = [x.symbol for x in MG_list]
        assert len(labels) == len(mg_label_list), f"labels {len(labels)} and mg_label_list {len(mg_label_list)} must be of equal length"
        mol = self.copy()

        # For each R label replace with a matching group from MG_list
        for label, mg_label in zip(labels, mg_label_list):

            # Replace A if there are any in MG
            graph = MG.data[mg_label].replace_A().graph

            mol += graph

            index_list = [x for x in mol.nodes if mol.nodes[x]["element"] == label]
            assert len(index_list) == 1, f"{label} Something strange with duplicate R values (or missing)."
            index = index_list[0]

            molecule_neigh_list = list(mol.neighbors(index))
            assert len(molecule_neigh_list) == 1, "To use this functionality you need to substitute an hydrogen or something that only has one neighbor."
            molecule_attach = molecule_neigh_list[0]
            l_is = [x for x in mol.nodes if mol.nodes[x]["element"] == "L"]
            assert len(l_is) == 1, "To use this functionality you need to select a Matching group with only one L group."
            index_l = l_is[0]
            index_attach_list = list(mol.neighbors(index_l))
            index_attach = index_attach_list[0]
            mol.remove_node(index_l)
            mol.remove_node(index)
            mol.add_edge(molecule_attach, index_attach, bond_order="single")
            mol.set_neighbors()
        return mol

    def replace_R_groups_from_rules(self, rules, MG, seed=42):
        """
        this will replace the R groups with matching groups  from rules
        """

        assert self.is_template, "The molecule must be a template molecule"
        random.seed(seed)
        r_groups = self.which_r_groups

        # For each R label replace with a matching group from rules
        replacements = []
        for R in r_groups:
            if R not in rules.enforce.dictionary:
                rules.add_new_R_pattern_to_enforce_rule(R, ["HL"], MG)
            data = rules.enforce.dictionary[R]
            random_matching_group = random.choice(data)
            replacements.append(random_matching_group)
        new_mol = self.replace_R_groups_with_matching_group(MG, replacements)

        return new_mol

    def replace_r_with_rf(self):
        """this function seres the purpose of tricking rdkit, making the template molecule a little heavier"""
        graph = self.copy()
        for x in graph.nodes:
            if graph.nodes[x]["element"][0] == "R":
                graph.nodes[x]["element"] = "Rf"
        return graph

    def replace_r_with_h(self):
        """
        transform an R molecule into a normal molecule with hydrogen atoms.
        this is used in order to make the simplest template case possible.
        We need to take care, that this will also replace an R that has rules that exclude H
        """
        mol2 = self.copy()
        for node in mol2.nodes():
            label = mol2.nodes[node]["element"]
            if label[0] == "R":
                mol2.nodes[node]["element"] = "H"
        return mol2

    def replace_r_with_methyl(self):
        """
        This can eventually become a function where you pass the actual group you substitute
        """
        mol2 = self.copy()
        mol2 += Molecule()  # hack for renumbering.
        methyl = Molecule.from_smiles("C").substitute_group(4, "L")
        r_labels = [x for x in mol2.nodes if mol2.nodes[x]["element"][0] == "R"]

        for _ in r_labels:
            mol2 += methyl

        l_indexes = [x for x in mol2.nodes if mol2.nodes[x]["element"] == "L"]

        assert len(r_labels) == len(l_indexes), "this should never happen"
        for l_atom_index, r_atom_index in zip(r_labels, l_indexes):
            first = list(mol2.neighbors(l_atom_index))
            first_i = first[0]
            second = list(mol2.neighbors(r_atom_index))
            second_i = second[0]
            #         print(f'I need to remove {r_atom_index}, {l_atom_index}, and connect {first} and {second}')
            mol2.add_edge(first_i, second_i, bond_order="single")
            mol2.remove_node(r_atom_index)
            mol2.remove_node(l_atom_index)
        return mol2

    def replace_r_with_wild_names(self, d):
        d_reverse = {value: key for (key, value) in d.items()}
        mol = self.copy()
        for i in mol.nodes:
            elem = mol.nodes[i]["element"]
            if elem[0] == "R":
                if elem in d_reverse:
                    mol.nodes[i]["element"] = d_reverse[elem]
                else:
                    raise ValueError(f"Wtf is this atom -> {elem}? You cannot draw in rdkit over R22")
        return mol

    def qify(self, number=True):
        """
        transform an R molecule into a Q molecule.
        this is used: in the apply permute, to make every R molecule different (with different substitution)
        it is also used in the database search, when we wanted to substitute R for something else (not R but also not C or H)
        """
        mol2 = self.copy()
        for node in mol2.nodes():
            label = mol2.nodes[node]["element"]
            if label[0] == "R":
                if number:
                    mol2.nodes[node]["element"] = label.replace("R", "Q")
                else:
                    mol2.nodes[node]["element"] = "Q"
        return mol2

    def deqify(self):
        """
        from a qifyied molecule to an R molecule again.
        This DEOS NOT KEEP THE SAME NUMBER!!
        """
        mol2 = self.copy()
        for node in mol2.nodes():
            label = mol2.nodes[node]["element"]
            if label[0] == "Q":
                mol2.nodes[node]["element"] = f"R{node}"
        return mol2

    def collapse_nodes(self, list_of_collapses):
        """
        this method is used by the template maker to collapse certain groups
        list_of_collapses is a list of tuples like this:
        [(6, 4, 'N-Me'), (22,23,'Me'), (28,29,'Me')]
        meaning that you want to -> (6, 4, 'N-Me')
        Collapse at node 6 with new name 'N-Me' keeping the link at 6-4.
        This will transform the graph into
        """
        new_mol = self.copy()
        for collapse_this in list_of_collapses:
            node, linked_node, new_label = collapse_this
            # print(f'I will collapse {node} into {new_label}, keeping connection with {linked_node}')
            new_mol.remove_edge(node, linked_node)
            # new_mol_2 = new_mol.copy()
            separate_pieces = new_mol.separate_graph_in_pieces()
            piece_to_collapse = [x for x in separate_pieces if node in x.nodes]  # this is the piece that I am cutting out
            assert len(piece_to_collapse) == 1, "The piece should be long 1"
            piece = piece_to_collapse[0]
            new_mol.remove_nodes_from(piece.nodes)
            new_mol.add_node(node, neighbors=1, element=f"{new_label}", expand=piece, charge=0)
            new_mol.add_edge(node, linked_node, bond_order="single")
        return new_mol

    def expand_nodes(self):
        """
        this method is used to expand collapsed groups from "collapse_mol"
        """
        new_mol = self.copy()

        for node in self.nodes:
            data = new_mol.nodes[node]
            if "expand" in data:
                neigh_list = list(new_mol.neighbors(node))
                assert len(neigh_list) == 1, "The list of neighbors should be long 1"

                neigh = neigh_list[0]
                # print(f'I need to expand the node in {node}, linked to {neigh}')
                piece = data["expand"]
                new_mol.remove_nodes_from([node])
                new_mol = nx.compose(new_mol, piece)
                new_mol.add_edge(node, neigh, bond_order="single", neighbors=1)

        return new_mol

    def save_smiles(self, path=None):
        if path is None:
            raise ValueError("Must provide a path")

        with open(path, "w") as f:
            f.write(self.smiles)

    def smiles_from_multiple_molecules(mol):
        """
        this method is used to have a unique smile for each graph
        even when the graph contains multiple molecules.
        It is used in the pot to have uniqueness.
        """
        list_smiles = [x.force_smiles() for x in mol.separate_graph_in_pieces()]
        return ".".join(sorted(list_smiles))

    def force_smiles(self):
        """
        in case I really wat to recalculate the smiles
        """
        return self.create_smiles()

    def get_neighbors_of_node(self, ind):
        """
        get neighbors of molecule atom with index ind
        """
        return list(nx.neighbors(self, ind))

    def get_L_atom_index(self):
        """
        given a molecule returns the position of the L atom, or the empty list
        """
        return [x for x in self.nodes if self.nodes[x]["element"] == "L"]

    @property
    def atom_types(self):
        return nx.get_node_attributes(self, "element")

    @property
    def neighbors_number(self):
        return nx.get_node_attributes(self, "neighbors")

    @property
    def num_heavy_atoms(self):
        heavy = [x for x in self.atom_types.values() if x != "H"]
        return len(heavy)

    @property
    def bond_order(self):
        return nx.get_edge_attributes(self, "bond_order")

    @property
    def is_template(self):
        """
        Checks whether a Molecule is in the form of a template molecule.
        """
        atoms = set(self.atom_types.values())
        template = any(["R" in a for a in atoms if a not in ["Ar", "Br", "Cr", "Kr", "Rb", "Ru", "Ir"]]) or any(["X" in a for a in atoms if a not in ["Xe"]])
        return template

    @property
    def smiles(self):
        """
        Return a SMILES representation, tries to avoid recomputing the smiles string
        """
        if self._smiles == "" and not self.is_template:
            try:
                self._smiles = self.create_smiles()
            except Exception as e:
                print("Error making smiles:")
                print(e)
                self._smiles = ""
        return self._smiles

    @property
    def empirical_formula(self):
        """
        Calculates the empirical formula from the graph
        """

        all_elements = [self.nodes[x]['element'] for x in self.nodes]

        element_count = Counter(all_elements)
        element_count_sorted = {k: v for k, v in sorted(element_count.items(), key=lambda item: item[1], reverse=True)}

        formula = ""
        for element, count in element_count_sorted.items():
            formula += str(element) + str(count)

        return formula

    @property
    def which_r_groups(self):
        """
        returns a list of R names from molecule
        ['R1','R3']
        """
        atoms = self.atom_types
        r_names = [v for v in atoms.values() if v[0] == "R"]
        return sorted(r_names)

    @property
    def r_groups(self):
        """
        returns all the nodes whose 'element' attribute contains 'R'
        """
        atoms = self.atom_types
        r_groups = {k: v for k, v in atoms.items() if v[0] == "R"}
        return r_groups

    @property
    def weight(self):
        return sum(get_atom(self.nodes[x]["element"]) for x in self.nodes)

    def set_neighbors(self):
        """
        given a molecule, this function will set the neighbors attribute
        """
        for node in self.nodes:
            self.nodes[node]["neighbors"] = len(list(self.neighbors(node)))
        return self

    # TODO HERE HERE
    def expand_functional_groups(self, FG, verbosity=0):
        """
        Perform acronym substitution, replace symbols like Me, Ph, and Boc with their full atom equivalents
        FG - a FunctionalGroupData instance
        """
        mol = self.copy()

        # find number of functional groups in the molecule
        node_dict = nx.get_node_attributes(self, "element")
        num_fg = len([(k, v) for k, v in node_dict.items() if v in FG.data and v not in ["C", "H"]])

        for i in range(num_fg):

            node_dict = nx.get_node_attributes(mol, "element")
            substitutions = [(k, v) for k, v in node_dict.items() if v in FG.data and v not in ["C", "H"]]

            node, label = substitutions[0]

            if verbosity > 0:
                print("-----------------", i, node)
                print("substitutions", substitutions)

            # add functional group to mol
            fg = FG.data[label].graph
            mol = mol + fg

            # find "A" nodes in combined molecule graph
            a_node = [(k, v) for k, v in mol.nodes.data() if v["element"][0] == "A"]
            if len(a_node) > 1:
                raise ValueError("Found more than one A in a functional group", mol)
            a_node = a_node[0][0]

            if verbosity > 0:
                print("a_node", a_node)

            # find A node's neighbour
            a_attach = list(nx.neighbors(mol, a_node))[0]
            if verbosity > 0:
                print("a_attach", a_attach)

            # find functional group connection
            fg_attach = list(nx.neighbors(mol, node))[0]
            if verbosity > 0:
                print("node", node)
                print("fg_attach", fg_attach)

            # update mm graph
            changes = {"delete": [(a_node, a_attach), (node, fg_attach)], "single": [(fg_attach, a_attach)], "double": [], "aromatic": [], "triple": [], "charges": []}
            mol = mol.update_edges_and_charges(Changes.from_dict(changes))
            mol.remove_nodes_from([a_node, node])

            mol = mol.copy(reindex=True)

        return mol

    def remove_hydrogens(self, list_of_hydrogen_to_remove):
        """
        This will create anions given a list of acid hydrogens
        """
        mol = self.copy()
        for i in list_of_hydrogen_to_remove:
            neigh = list(mol.neighbors(i))
            assert len(neigh) == 1, f"Watch out, an hydrogen is linked to more than atom in {mol}"
            mol.nodes[neigh[0]]["charge"] -= 1
            mol.remove_node(i)
        return mol

    @property
    def charge(self):
        return sum(self.nodes[x]["charge"] for x in self.nodes)

    def neutralize(self):
        '''
        This is nothing less than a wrapper for neutralizing cations and anions
        '''
        return self.neutralize_amine_cations().neutralize_each_anion_piece_with_H()

    def neutralize_anions_with_H(self):
        """
        We cannot blindly neutralize anions, because of groups like NO2, where O is -1 and N is +1. So we check
        if a neighbor of the anion atom is +1, in which case we DO NOT neutralize it.

        Other groups like N=N=N made me rework the whole function. I now go to each atom and search for atoms that
        are in not equilibrium. for instance a +1 with attached 2 different -1. This is creating a -1 that needs to
        be corrected.
        """
        new_mol = self.copy()
        new_mol._smiles = ""
        neutralizable = self.copy()

        counter = new_mol.give_me_free_index()
        if new_mol.charge < 0:
            for cation_index in [x for x in neutralizable.nodes if neutralizable.nodes[x]["charge"] > 0]:
                anion_indexes = [x for x in list(neutralizable.neighbors(cation_index)) if neutralizable.nodes[x]["charge"] != 0]
                sum_neigh_charges = sum(neutralizable.nodes[x]["charge"] for x in anion_indexes)
                # print(f'{self.smiles}: we have a cation index: {cation_index} with sum on neighbors being {sum_neigh_charges}')
                if sum_neigh_charges < 0:  # AV this zero is controversial. Good luck with it.
                    # print(f'this cation has many anions close to it ({anion_indexes}). I neutralize one.')
                    neutralizable.nodes[anion_indexes[0]]["charge"] += 1
                    neutralizable.nodes[cation_index]["charge"] -= 1

            for anion_index in [x for x in neutralizable.nodes if neutralizable.nodes[x]["charge"] < 0]:
                number_of_negative_charges = -(neutralizable.nodes[anion_index]["charge"])  # we know it is negative
                for _ in range(number_of_negative_charges):
                    index = next(counter)
                    new_mol.add_node(index, neighbors=0, element="H", charge=0)  # I am adding 0 neighbors because I want to ensure set_neighbors to be launched later
                    new_mol.add_edge(index, anion_index, bond_order="single")
                    new_mol.nodes[anion_index]["charge"] += 1
            if not new_mol.charge == 0:
                NeutralizationError(f"something went wrong in neutralize_anions_with_H with molecule {self.smiles}, with charge {new_mol.charge}")
        new_mol.set_neighbors()
        return new_mol

    def neutralize_each_anion_piece_with_H(self) -> Molecule:
        pieces = self.separate_graph_in_pieces()
        new_pieces = []
        for piece in pieces:
            new = piece.neutralize_anions_with_H()
            new_pieces.append(new)
        new_mol = Molecule.add_list_of_molecules(new_pieces)
        if new_mol.charge < 0:
            raise NeutralizationError('After neutralizing the pieces, I did not get a neutral result.')
        return new_mol

    def neutralize_amine_cations(self) -> Molecule:
        pieces = self.separate_graph_in_pieces()
        new_pieces = []
        for piece in pieces:
            new = piece.neutralize_amine_cation()
            new_pieces.append(new)
        new_mol = Molecule.add_list_of_molecules(new_pieces)
        return new_mol

    def neutralize_amine_cation(self):
        new_mol = self.copy()
        nitrogens = [x for x in new_mol.nodes if new_mol.nodes[x]['element'] == 'N']
        for nitrogen_index in nitrogens:
            has_anion_neighbor = sum(new_mol.nodes[x]['charge'] for x in new_mol.neighbors(nitrogen_index)) < 0
            if new_mol.nodes[nitrogen_index]['charge'] == 1 and not has_anion_neighbor:
                hydrogen_neighs = [x for x in new_mol.neighbors(nitrogen_index) if new_mol.nodes[x]['element'] == 'H']
                if hydrogen_neighs:
                    designated_hydrogen = hydrogen_neighs[0]
                    new_mol.remove_node(designated_hydrogen)
                    new_mol.nodes[nitrogen_index]['charge'] -= 1
                else:
                    raise NoHydrogenAttachedToNitrogenError(f'{self.force_smiles()} could not be neutralized')
        return new_mol

    def add_hydrogen(self, index):
        new_mol = self.copy()
        counter = new_mol.give_me_free_index()
        new_index = next(counter)
        new_mol.add_node(new_index, neighbors=0, element="H", charge=0)
        new_mol.add_edge(index, new_index, bond_order="single")
        return new_mol

    def add_hydrogens(self, indexes, how_many):
        new_mol = self.copy()
        for index, num in zip(indexes, how_many):
            for _ in range(num):
                new_mol = new_mol.add_hydrogen(index)
        return new_mol

    def add_A_atom(self, index, element_matching=["C", "H"]):
        new_mol = self.copy()
        counter = new_mol.give_me_free_index()
        new_index = next(counter)
        new_mol.add_node(new_index, neighbors=0, element="A", charge=0, element_matching=element_matching)
        new_mol.add_edge(index, new_index, bond_order="single")
        return new_mol

    def add_A_atoms(self, indexes, how_many, element_matching=["C", "H"]):
        new_mol = self.copy()
        for index, num in zip(indexes, how_many):
            for _ in range(num):
                new_mol = new_mol.add_A_atom(index, element_matching=element_matching)
        return new_mol

    def is_lower_than_number_of_element(self, n: int, element: str = 'C') -> bool:
        '''
        returns True if the molecule contains less or equal number of "element" atoms
        '''
        n_carbon = len([x for x in self.nodes if self.nodes[x]['element'] == element])
        return n_carbon <= n

    @staticmethod
    def delete_duplicates(graph_list: list[Molecule], verbosity=0) -> list[Molecule]:
        """
        This function takes a list of molecule_graphs and returns a list of unique molecules graph based on isomporphism
        """
        unique_list: list[Molecule] = []
        for graph in graph_list:
            if verbosity > 0:
                print(f"Processing {graph.smiles} now for the deduplication")
            if graph not in unique_list and graph != Molecule():
                if verbosity > 0:
                    print(f"{graph.smiles} is not found in the list. Appending it now to:\n{unique_list}")
                unique_list.append(graph)
        return unique_list

    def satisfy_bradt_good_enough(self, cycles=None, verbosity=0) -> bool:
        '''
        warning, this cannot possibly catch all the bridgeheads nor all cases where the bridge is bigger than 7 carbons.
        This is checking all nx basis cycles, keeping the nodes that are only present in one cycle.
        '''
        if verbosity > 0:
            print(self.smiles)
        if cycles is None:
            cycles = nx.cycle_basis(self)
        if len(cycles) <= 1:
            return True

        if verbosity > 0:
            print(f'{cycles=}\n')
        bridge_heads = set()
        banish = set()
        for list_index in cycles:
            all_others = set()
            for xs in cycles:
                if xs != list_index:
                    for x in xs:
                        all_others.add(x)

            if verbosity > 0:
                print(f'  {all_others=}\n  {list_index=}\n')
            prev_real = None
            nexT_real = None
            for i, index in enumerate(list_index):
                if index not in all_others:
                    prev = list_index[i-1]
                    if prev in all_others:
                        prev_real = prev
                    nexT = list_index[(i + 1) % len(list_index)]
                    if nexT in all_others:
                        nexT_real = nexT
            if verbosity > 0:
                print(f'  {prev_real=}\n  {nexT_real=}\n')
            try:
                if prev_real is not None and nexT_real is not None:
                    self.edges[(prev_real, nexT_real)]
                    banish.add(prev_real)
                    banish.add(nexT_real)
            except KeyError:
                bridge_heads.add(prev_real)
                bridge_heads.add(nexT_real)

        bridge_heads_filtered = [x for x in bridge_heads if x not in banish]
        if verbosity > 0:
            print(f'{bridge_heads=}\n{bridge_heads_filtered=}')
        booleans = []
        for index in bridge_heads_filtered:
            this_bool = all([self.edges[edge]['bond_order'] == 'single' for edge in self.edges if index in edge])
            booleans.append(this_bool)
        return all(booleans)

`empirical_formula` `property`

Calculates the empirical formula from the graph

`is_template` `property`

Checks whether a Molecule is in the form of a template molecule.

`molecular_formula: MolecularFormula` `property`

Generates the molecular formula of the self molecule

Returns:

Name	Type	Description
`MolecularFormula`	`MolecularFormula`	this is a molecular formula object

`r_groups` `property`

returns all the nodes whose 'element' attribute contains 'R'

`smiles` `property`

Return a SMILES representation, tries to avoid recomputing the smiles string

`which_r_groups` `property`

returns a list of R names from molecule ['R1','R3']

`add(mol2)`

when you do: mol1 + mol2 it creates a single graph with both molecules

NOTE: atoms will be renumbered by this operation !

Parameters:

Name	Type	Description	Default
`mol2`	`Molecule`	a molecule	required

Returns:

Name	Type	Description
`Molecule`	`Molecule`	a molecule that is the sum of self and mol2

Source code in retropaths/molecules/molecule.py

def __add__(self, mol2: Molecule) -> Molecule:
    """when you do: mol1 + mol2
    it creates a single graph with both molecules

    **NOTE**: atoms will be renumbered by this operation !

    Args:
        mol2 (Molecule): a molecule

    Returns:
        Molecule: a molecule that is the sum of self and mol2
    """

    mol1 = self

    n1 = len(mol1)
    n2 = len(mol2)
    # create a mapping between nodes in mol1 and nodes in mol2,
    mapping1 = dict(zip(mol1.nodes(), range(0, n1)))
    mapping2 = dict(zip(mol2.nodes(), range(n1, n1 + n2)))

    # relabel nodes to match the mapping dictionary
    mol1 = nx.relabel_nodes(mol1, mapping1)
    mol2 = nx.relabel_nodes(mol2, mapping2)
    new_mol = nx.compose(mol1, mol2)

    return new_mol

`eq(other_molecule)`

Equality without hydrogens is waaay faster. Also, hydrogens are always a terminal. But the problem comes when H-H or H+ get to do the isomorphism. In this case I do NOT want to collapse them.

Parameters:

Name	Type	Description	Default
`other_molecule`		a molecule object	required

Returns:

Name	Type	Description
`bool`	`bool`	returns if mols are equals (isomorphic).

Source code in retropaths/molecules/molecule.py

def __eq__(self, other_molecule) -> bool:
    """Equality without hydrogens is waaay faster. Also, hydrogens are always a terminal.
    But the problem comes when H-H or H+ get to do the isomorphism. In this case I do NOT want to collapse them.

    Args:
        other_molecule: a molecule object

    Returns:
        bool: returns if mols are equals (isomorphic).
    """
    if not isinstance(other_molecule, Molecule):
        raise NotImplementedError

    if len(self) < 3 or len(other_molecule) < 3:
        first = self
        second = other_molecule
    else:
        first = self.remove_Hs()
        second = other_molecule.remove_Hs()
    GM = SubGraphMatcher(first)
    is_this_equal = GM.is_isomorphic(second)
    return is_this_equal

`init(name='', smi='')`

Constructs a molecule as a subclass of networkx Graph type

Parameters:

Name	Type	Description	Default
`name`	`list`	a name for the molecule	`''`
`smi`	`list`	stores the smiles so it does not need to be recalculated	`''`

Returns:

Name	Type	Description
`molecule`	`Molecule`	The Molecule Object

Source code in retropaths/molecules/molecule.py

def __init__(self, name: str = '', smi: str = ''):
    """
    Constructs a molecule as a subclass of networkx Graph type

    Args:
        name (list): a name for the molecule
        smi (list): stores the smiles so it does not need to be recalculated

    Returns:
        molecule (Molecule): The Molecule Object
    """

    self.chemical_name = name
    self._smiles = smi
    super(Molecule, self).__init__()

`mul(other)`

In case one wants more of the same molecule

Parameters:

Name	Type	Description	Default
`other`	`int`	how many molecules	required

Returns:

Name	Type	Description
`Molecule`	`Molecule`	a retropaths molecule

Source code in retropaths/molecules/molecule.py

def __mul__(self, other: int) -> Molecule:
    """In case one wants more of the same molecule

    Args:
        other (int): how many molecules

    Returns:
        Molecule: a retropaths molecule
    """
    assert isinstance(other, int)
    final = self.__class__()
    for _ in range(other):
        final += self
    return final

`str()`

This just prints out the debug string for a networkx graph

Source code in retropaths/molecules/molecule.py

def __str__(self):
    """
    This just prints out the debug string for a networkx graph
    """
    edge_data = self.edges.data()
    return f"nodes = {self.nodes.data()}\nedges = {edge_data}\n"

`add_list_of_molecules(list_molec)` `classmethod`

This functions creates a graph from a list of molecules. If the list is empty, it will return the empty molecule.

Parameters:

Name	Type	Description	Default
`list_molec`	`list[Molecule]`	a list of molecules	required

Returns:

Name	Type	Description
`Molecule`	`Molecule`	retropaths molecule object

Source code in retropaths/molecules/molecule.py

@classmethod
def add_list_of_molecules(cls, list_molec: list[Molecule]) -> Molecule:
    """This functions creates a graph from a list of molecules.
    If the list is empty, it will return the empty molecule.

    Args:
        list_molec (list[Molecule]): a list of molecules

    Returns:
        Molecule: retropaths molecule object
    """
    assert type(list_molec)
    final_molecule = Molecule()
    for x in list_molec:
        final_molecule += x
    return final_molecule

`are_R_labels_equivalent(first_label, second_label)`

Given two R labels, this test returns if they are equivalent. Equivalent here means that they would be treated as identical or symmetric by ismags so for instance two Rs single bond connected to the same Carbon are equivalent.

Source code in retropaths/molecules/molecule.py

def are_R_labels_equivalent(self, first_label, second_label):
    """
    Given two R labels, this test returns if they are equivalent.
    Equivalent here means that they would be treated as identical or symmetric by ismags
    so for instance two Rs single bond connected to the same Carbon are equivalent.
    """
    which_r_groups = self.which_r_groups
    bool1 = first_label in which_r_groups
    bool2 = second_label in which_r_groups
    assert bool1 and bool2, f"Watch out, one of the two labels {first_label} or {second_label} is not in {which_r_groups} in the method are_R_labels_equivalent."
    mol1 = self.change_element_name(first_label, "Q1").change_element_name(second_label, "Q2").qify(number=False)
    mol2 = self.change_element_name(second_label, "Q1").change_element_name(first_label, "Q2").qify(number=False)
    return mol2.is_isomorphic_to(mol1)

`change_element_name(lab1, lab2)`

label1 and label2 -> str this change the 'element' name

Source code in retropaths/molecules/molecule.py

def change_element_name(self, lab1, lab2):
    """
    label1 and label2 -> str
    this change the 'element' name
    """
    mol2 = self.copy()
    for node in mol2.nodes():
        label = mol2.nodes[node]["element"]
        if label == lab1:
            mol2.nodes[node]["element"] = lab2
    return mol2

`collapse_nodes(list_of_collapses)`

this method is used by the template maker to collapse certain groups list_of_collapses is a list of tuples like this: [(6, 4, 'N-Me'), (22,23,'Me'), (28,29,'Me')] meaning that you want to -> (6, 4, 'N-Me') Collapse at node 6 with new name 'N-Me' keeping the link at 6-4. This will transform the graph into

Source code in retropaths/molecules/molecule.py

def collapse_nodes(self, list_of_collapses):
    """
    this method is used by the template maker to collapse certain groups
    list_of_collapses is a list of tuples like this:
    [(6, 4, 'N-Me'), (22,23,'Me'), (28,29,'Me')]
    meaning that you want to -> (6, 4, 'N-Me')
    Collapse at node 6 with new name 'N-Me' keeping the link at 6-4.
    This will transform the graph into
    """
    new_mol = self.copy()
    for collapse_this in list_of_collapses:
        node, linked_node, new_label = collapse_this
        # print(f'I will collapse {node} into {new_label}, keeping connection with {linked_node}')
        new_mol.remove_edge(node, linked_node)
        # new_mol_2 = new_mol.copy()
        separate_pieces = new_mol.separate_graph_in_pieces()
        piece_to_collapse = [x for x in separate_pieces if node in x.nodes]  # this is the piece that I am cutting out
        assert len(piece_to_collapse) == 1, "The piece should be long 1"
        piece = piece_to_collapse[0]
        new_mol.remove_nodes_from(piece.nodes)
        new_mol.add_node(node, neighbors=1, element=f"{new_label}", expand=piece, charge=0)
        new_mol.add_edge(node, linked_node, bond_order="single")
    return new_mol

`copy(reindex=False, start=0)`

Overload the nx.Graph.copy() method to bring the non-networkx attributes across

Parameters:

Name	Type	Description	Default
`reindex`	`bool`	convert the node labels to integers starting with start. Defaults to False.	`False`
`start`	`int`	initial value for the reindexing. Defaults to 0.	`0`

Returns:

Name	Type	Description
`Molecule`	`Molecule`	retropaths molecule object

Source code in retropaths/molecules/molecule.py

def copy(self, reindex=False, start=0) -> Molecule:
    """Overload the nx.Graph.copy() method to bring the non-networkx attributes across

    Args:
        reindex (bool, optional): convert the node labels to integers starting with start. Defaults to False.
        start (int, optional): initial value for the reindexing. Defaults to 0.

    Returns:
        Molecule: retropaths molecule object
    """
    if reindex:
        # if reindex is true, convert the node labels to integers starting with start
        new_mol = nx.convert_node_labels_to_integers(self, first_label=start)
    else:
        new_mol = super().copy()
    new_mol._smiles = self._smiles
    new_mol.chemical_name = self.chemical_name

    return new_mol

`deduplicate_single_graph()`

This takes a molecule graph and returns a single graph with no duplicates.

Source code in retropaths/molecules/molecule.py

def deduplicate_single_graph(self):
    '''
    This takes a molecule graph and returns a single graph with no duplicates.
    '''
    return Molecule.add_list_of_molecules(self.delete_duplicates(self.separate_graph_in_pieces()))

`delete_duplicates(graph_list, verbosity=0)` `staticmethod`

This function takes a list of molecule_graphs and returns a list of unique molecules graph based on isomporphism

Source code in retropaths/molecules/molecule.py

@staticmethod
def delete_duplicates(graph_list: list[Molecule], verbosity=0) -> list[Molecule]:
    """
    This function takes a list of molecule_graphs and returns a list of unique molecules graph based on isomporphism
    """
    unique_list: list[Molecule] = []
    for graph in graph_list:
        if verbosity > 0:
            print(f"Processing {graph.smiles} now for the deduplication")
        if graph not in unique_list and graph != Molecule():
            if verbosity > 0:
                print(f"{graph.smiles} is not found in the list. Appending it now to:\n{unique_list}")
            unique_list.append(graph)
    return unique_list

`deqify()`

from a qifyied molecule to an R molecule again. This DEOS NOT KEEP THE SAME NUMBER!!

Source code in retropaths/molecules/molecule.py

def deqify(self):
    """
    from a qifyied molecule to an R molecule again.
    This DEOS NOT KEEP THE SAME NUMBER!!
    """
    mol2 = self.copy()
    for node in mol2.nodes():
        label = mol2.nodes[node]["element"]
        if label[0] == "Q":
            mol2.nodes[node]["element"] = f"R{node}"
    return mol2

`draw(mode='rdkit', size=None, string_mode=False, node_index=True, percentage=None, force=None, fixed_bond_length=None, fixedScale=None, fontSize=None, lineWidth=None, charges=True, neighbors=False)`

Draw the graph, mode can be 'd3' for interactive force directed graphs or 'rdkit' or 'oe' for chemdraw style images

Source code in retropaths/molecules/molecule.py

def draw(self,
         mode="rdkit",
         size=None,
         string_mode=False,
         node_index=True,
         percentage=None,
         force=None,
         fixed_bond_length=None,
         fixedScale=None,
         fontSize=None,
         lineWidth=None,
         charges=True,
         neighbors=False):
    """
    Draw the graph, mode can be 'd3' for interactive force directed graphs or 'rdkit' or 'oe' for chemdraw style images
    """

    if mode == "d3":
        size = size or (500, 500)
        G = self.copy()
        # G = nx.convert_node_labels_to_integers(G)
        nodes, links = molecule_to_d3json(G, node_index, charges=charges, neighbors=neighbors)
        return draw_d3(nodes, links, size=size, string_mode=string_mode, percentage=percentage, force_layout_charge=force)

    elif mode == "rdkit":
        size = size or (300, 300)
        d = {
            "Sg": "R1",
            "Rf": "R2",
            "Ne": "R3",
            "Ar": "R4",
            "Kr": "R5",
            "Ru": "R6",
            "Rn": "R7",
            "Og": "R8",
            "Fr": "R9",
            "At": "R10",
            "Db": "R11",
            "Hs": "R12",
            "Bh": "R13",
            "Mt": "R14",
            "Rg": "R15",
            "Cn": "R16",
            "Hf": "R17",
            "U": "R18",
            "W": "R19",
            "Pu": "R20",
            "Am": "R21",
            "Cm": "R22",
        }
        smiles = self.replace_r_with_wild_names(d).force_smiles()
        svg_str = moldrawsvg(smiles, d, molSize=size, fixed_bond_length=fixed_bond_length, fixedScale=fixedScale, fontSize=fontSize, lineWidth=lineWidth)
        if string_mode:
            return svg_str
        else:
            return SVG(svg_str)

    elif mode == "oe":
        width, height = 400, 400

        mol = oechem.OEGraphMol()
        oechem.OESmilesToMol(mol, self.smiles)
        oedepict.OEPrepareDepiction(mol)

        opts = oedepict.OE2DMolDisplayOptions(width, height, oedepict.OEScale_AutoScale)
        opts.SetMargins(10)
        disp = oedepict.OE2DMolDisplay(mol, opts)

        font = oedepict.OEFont(oedepict.OEFontFamily_Default, oedepict.OEFontStyle_Default, 12,
                               oedepict.OEAlignment_Center, oechem.OEDarkRed)

        for adisp in disp.GetAtomDisplays():
            atom = adisp.GetAtom()
            toggletext = f"{atom.GetIdx()}"
            oedepict.OEDrawSVGToggleText(disp, adisp, toggletext, font)

        ofs = oechem.oeosstream()
        oedepict.OERenderMolecule(ofs, "svg", disp)
        string = ofs.str()

        sss = f'<div style="width: {100}%; display: table;"> <div style="display: table-row;">'
        sss += f'{string.decode()}</div></div>'
        if string_mode:
            return sss
        else:
            return HTML(sss)
    else:
        raise ValueError(f'mode must be one of "oe", "d3" or "rdkit", received {mode}')

`draw_list(molecule_list, names=[], mode='rdkit', title='', charges=False, size=(650, 650), width=100, columns=5, fixed_bond_length=None, string_mode=False, node_index=True, neighbors=False, arrows=False, borders=False, display_title=True, arrow_size=2)` `staticmethod`

Draws a list of molecules

Source code in retropaths/molecules/molecule.py

@staticmethod
def draw_list(molecule_list,
              names=[],
              mode="rdkit",
              title="",
              charges=False,
              size=(650, 650),
              width=100,
              columns=5,
              fixed_bond_length=None,
              string_mode=False,
              node_index=True,
              neighbors=False,
              arrows=False,
              borders=False,
              display_title=True,
              arrow_size=2,
              ):
    """
    Draws a list of molecules
    """
    if len(molecule_list) == 0:
        print("This list is empty")
        molecule_list.append(Molecule())
        names.append("")

    true_mol_len = len(molecule_list)
    while len(molecule_list) < columns:
        molecule_list.append(Molecule())
        names.append("")

    if not fixed_bond_length:
        fixed_bond_length = [None for x in range(len(molecule_list))]
    else:
        len_fb = len(fixed_bond_length)
        if len_fb < len(molecule_list):
            diff = len(molecule_list) - len_fb
            fixed_bond_length = [*fixed_bond_length, *[100 for x in range(diff)]]

    if columns <= len(molecule_list):
        how_many_columns = columns
    else:
        how_many_columns = len(molecule_list)

    cell_width = 100.0 / how_many_columns

    borders_string = 'border: 1px solid black;' if borders else ''
    title_string = f'<h2>{title}</h2>' if display_title else ''

    sstring = f'{title_string}<div style="width: {width}%; display: table;"> <div style="display: table-row;">'

    for i, mol in enumerate(molecule_list):
        if i % how_many_columns == 0:
            sstring += '</div><div style="display: table-row;">'

        try:
            name = f'<p style="text-align: center;">{names[i]}</p>'
        except IndexError:
            name = ""
        this_border_string = borders_string if not mol.is_empty() else ''  # I do not want to draw border on empty molecules.
        sstring += (
            f'<div style="width: {cell_width}%; display: table-cell;{this_border_string}"> \
            {mol.draw(mode=mode, string_mode=True, size=size, fixed_bond_length=fixed_bond_length[i], charges=charges, neighbors=neighbors, node_index=node_index,percentage=0.8)} {name} </div>'
        )
        if arrows and i < true_mol_len - 1:
            # sstring += f'<div style="width: 0%; display: table-cell; vertical-align: middle;"><font size="+{arrow_size}">⟶</font></div>'
            sstring += f'<div style="width: 0%; display: table-cell; vertical-align: middle;"><font size="+{arrow_size}">&#8594;</font></div>'

    sstring += "</div></div>"
    if string_mode:
        return sstring
    else:
        return HTML(sstring)

`expand_functional_groups(FG, verbosity=0)`

Perform acronym substitution, replace symbols like Me, Ph, and Boc with their full atom equivalents FG - a FunctionalGroupData instance

Source code in retropaths/molecules/molecule.py

def expand_functional_groups(self, FG, verbosity=0):
    """
    Perform acronym substitution, replace symbols like Me, Ph, and Boc with their full atom equivalents
    FG - a FunctionalGroupData instance
    """
    mol = self.copy()

    # find number of functional groups in the molecule
    node_dict = nx.get_node_attributes(self, "element")
    num_fg = len([(k, v) for k, v in node_dict.items() if v in FG.data and v not in ["C", "H"]])

    for i in range(num_fg):

        node_dict = nx.get_node_attributes(mol, "element")
        substitutions = [(k, v) for k, v in node_dict.items() if v in FG.data and v not in ["C", "H"]]

        node, label = substitutions[0]

        if verbosity > 0:
            print("-----------------", i, node)
            print("substitutions", substitutions)

        # add functional group to mol
        fg = FG.data[label].graph
        mol = mol + fg

        # find "A" nodes in combined molecule graph
        a_node = [(k, v) for k, v in mol.nodes.data() if v["element"][0] == "A"]
        if len(a_node) > 1:
            raise ValueError("Found more than one A in a functional group", mol)
        a_node = a_node[0][0]

        if verbosity > 0:
            print("a_node", a_node)

        # find A node's neighbour
        a_attach = list(nx.neighbors(mol, a_node))[0]
        if verbosity > 0:
            print("a_attach", a_attach)

        # find functional group connection
        fg_attach = list(nx.neighbors(mol, node))[0]
        if verbosity > 0:
            print("node", node)
            print("fg_attach", fg_attach)

        # update mm graph
        changes = {"delete": [(a_node, a_attach), (node, fg_attach)], "single": [(fg_attach, a_attach)], "double": [], "aromatic": [], "triple": [], "charges": []}
        mol = mol.update_edges_and_charges(Changes.from_dict(changes))
        mol.remove_nodes_from([a_node, node])

        mol = mol.copy(reindex=True)

    return mol

`expand_nodes()`

this method is used to expand collapsed groups from "collapse_mol"

Source code in retropaths/molecules/molecule.py

def expand_nodes(self):
    """
    this method is used to expand collapsed groups from "collapse_mol"
    """
    new_mol = self.copy()

    for node in self.nodes:
        data = new_mol.nodes[node]
        if "expand" in data:
            neigh_list = list(new_mol.neighbors(node))
            assert len(neigh_list) == 1, "The list of neighbors should be long 1"

            neigh = neigh_list[0]
            # print(f'I need to expand the node in {node}, linked to {neigh}')
            piece = data["expand"]
            new_mol.remove_nodes_from([node])
            new_mol = nx.compose(new_mol, piece)
            new_mol.add_edge(node, neigh, bond_order="single", neighbors=1)

    return new_mol

`force_smiles()`

in case I really wat to recalculate the smiles

Source code in retropaths/molecules/molecule.py

def force_smiles(self):
    """
    in case I really wat to recalculate the smiles
    """
    return self.create_smiles()

`from_debug_string(nodes, edges)` `classmethod`

So you can copy paste and parse a molecule printed by the str() method

Parameters:

Name	Type	Description	Default
`nodes`	`list`	the nodes list printed by str()	required
`edges`	`list`	the edges list printed by str()	required

Returns:

Name	Type	Description
`molecule`	`Molecule`	The Molecule Object

Source code in retropaths/molecules/molecule.py

@classmethod
def from_debug_string(cls, nodes: list, edges: list) -> Molecule:
    """
    So you can copy paste and parse a molecule printed by the __str__() method

    Args:
        nodes: the nodes list printed by str()
        edges: the edges list printed by str()

    Returns:
        molecule: The Molecule Object
    """
    mol = cls()
    for node, dictionary in nodes:
        mol.add_node(node, **dictionary)

    for a, b, bo in edges:
        mol.add_edge(a, b, **bo)
    mol.set_neighbors()
    return mol

`from_list_of_smiles_to_single_graph(list_of_smiles)` `classmethod`

this creates a single graph from a list of smiles

Parameters:

Name	Type	Description	Default
`list_of_smiles`	`list[str]`	a list of SMILES strings	required

Returns:

Name	Type	Description
`Molecule`	`Molecule`	retropaths molecule object

Source code in retropaths/molecules/molecule.py

@classmethod
def from_list_of_smiles_to_single_graph(cls, list_of_smiles: list[str]) -> Molecule:
    """this creates a single graph from a list of smiles

    Args:
        list_of_smiles (list[str]): a list of SMILES strings

    Returns:
        Molecule: retropaths molecule object
    """
    assert all([type(x) == str for x in list_of_smiles]), "Hey, I need a list of strings"
    single_graph = cls()
    for molecule in [cls.from_smiles(x) for x in list_of_smiles]:
        single_graph += molecule
    return single_graph

`from_smiles(smi, name='')` `classmethod`

creates a molecule object from a smiles string

Parameters:

Name	Type	Description	Default
`smi`	`str`	the SMILES string	required
`name`	`str`	The common name of the molecule. Defaults to ''.	`''`

Raises:

Type	Description
`ValueError`	when the smiles is not valid

Returns:

Name	Type	Description
`Molecule`	`Molecule`	a retropath molecule object

Source code in retropaths/molecules/molecule.py

@classmethod
def from_smiles(cls, smi: str, name: str = '') -> Molecule:
    """creates a molecule object from a smiles string


    Args:
        smi (str): the SMILES string
        name (str, optional): The common name of the molecule. Defaults to ''.

    Raises:
        ValueError: when the smiles is not valid

    Returns:
        Molecule: a retropath molecule object
    """
    if 'OE_LICENSE' not in os.environ:
        rdmol = Chem.MolFromSmiles(smi)
        new_mol = cls.from_rdmol(rdmol, smi, name)
    else:

        # Use openeye smiles tools to get a canonical smiles string and an OpenEye Molecule instance
        oemol = oechem.OEGraphMol()
        success = oechem.OESmilesToMol(oemol, smi)
        if success:
            smi_canon = oechem.OEMolToSmiles(oemol)
        else:
            raise ValueError("Not a valid SMILES string according to OpenEye:", smi)
        new_mol = cls.from_oemol(oemol, smi_canon, name)

    return new_mol

`get_L_atom_index()`

given a molecule returns the position of the L atom, or the empty list

Source code in retropaths/molecules/molecule.py

def get_L_atom_index(self):
    """
    given a molecule returns the position of the L atom, or the empty list
    """
    return [x for x in self.nodes if self.nodes[x]["element"] == "L"]

`get_bond_changes(mol)`

This function is fault of Jan Estrada

Parameters:

Name	Type	Description	Default
`mol`	`Molecule`	the molecule which has bond changes with respect to self	required

Returns:

Name	Type	Description
`BondChanges`	`BondChanges`	the bond changes between self and mol

Source code in retropaths/molecules/molecule.py

def get_bond_changes(self, mol: Molecule) -> BondChanges:
    """This function is fault of Jan Estrada

    Args:
        mol (Molecule): the molecule which has bond changes with respect to self

    Returns:
        BondChanges: the bond changes between self and mol
    """

    def is_in_it(list_edges, edge):
        sorted_list = [sorted(x) for x in list_edges]
        return sorted(edge) in sorted_list
    forming = [edge for edge in mol.edges if not is_in_it(self.edges, edge)]
    breaking = [edge for edge in self.edges if not is_in_it(mol.edges, edge)]
    return BondChanges(forming, breaking)

`get_bond_order(first_atom, second_atom)`

get the bond orde between two indexes

Source code in retropaths/molecules/molecule.py

def get_bond_order(self, first_atom, second_atom):
    '''get the bond orde between two indexes'''
    return self.edges[first_atom, second_atom]["bond_order"]

`get_element(index_atom)`

returns the element at one index

Source code in retropaths/molecules/molecule.py

def get_element(self, index_atom):
    '''returns the element at one index'''
    return self.nodes[index_atom]["element"]

`get_neighbors_of_node(ind)`

get neighbors of molecule atom with index ind

Source code in retropaths/molecules/molecule.py

def get_neighbors_of_node(self, ind):
    """
    get neighbors of molecule atom with index ind
    """
    return list(nx.neighbors(self, ind))

`get_subgraph_isomorphisms_of(target, verbosity=0)`

a.get_subgraph_isomorphisms_of(b) gives the isomorphic map of A being a subgraph of B self.get_subgraph_isomorphisms_of(target) tells if the template molecule SELF is a subgraph of TARGET molecule

Source code in retropaths/molecules/molecule.py

def get_subgraph_isomorphisms_of(self, target, verbosity=0) -> list[IsomorphismMappings]:
    """
    a.get_subgraph_isomorphisms_of(b)
    gives the isomorphic map of A being a subgraph of B
    self.get_subgraph_isomorphisms_of(target)
    tells if the template molecule SELF is a subgraph of TARGET molecule
    """
    GM = SubGraphMatcher(target, verbosity=verbosity)
    isoms = GM.get_subgraph_isomorphisms(self.remove_r_groups())
    return [IsomorphismMappings(x) for x in isoms]

`graph_difference(subtracted_molecule)`

This will remove all subgraphs from self matching subtracted molecule

Parameters:

Name	Type	Description	Default
`subtracted_molecule`	`Molecule`	a molecule graph	required

Returns:

Name	Type	Description
`Molecule`	`Molecule`	the self molecule graph without subtracted_molecule

Source code in retropaths/molecules/molecule.py

def graph_difference(self, subtracted_molecule: Molecule) -> Molecule:
    """This will remove all subgraphs from self matching subtracted molecule

    Args:
        subtracted_molecule (Molecule): a molecule graph

    Returns:
        Molecule: the self molecule graph without subtracted_molecule
    """
    target = self.copy()
    isomorphisms = subtracted_molecule.get_subgraph_isomorphisms_of(target)

    assert len(isomorphisms) > 0, f"The graph difference between {self.force_smiles()} and {subtracted_molecule.force_smiles()} needs a second look."

    this_isomorphism = isomorphisms[0]
    this_iso_remaining_fragments = target.copy()

    # remove the nodes
    for _, value in this_isomorphism.reverse_mapping.items():
        this_iso_remaining_fragments.remove_node(value)

    return this_iso_remaining_fragments

`graph_difference_with_list_and_duplicates(mols)`

it is a graph difference between a graph and a list of molecules Every instance of each molecule in the list is removed from the self molecule.

Source code in retropaths/molecules/molecule.py

def graph_difference_with_list_and_duplicates(self, mols: list[Molecule]) -> Molecule:
    '''
    it is a graph difference between a graph and a list of molecules
    Every instance of each molecule in the list is removed from the self
    molecule.
    '''
    new_mol = self.copy()
    for mol in mols:
        while mol.is_subgraph_isomorphic_to(new_mol):
            new_mol = new_mol.graph_difference(mol)
    return new_mol

`is_empty()`

method to check if the molecule graph is empty or not

Source code in retropaths/molecules/molecule.py

def is_empty(self):
    """method to check if the molecule graph is empty or not"""
    return len(self.nodes) == 0

`is_lower_than_number_of_element(n, element='C')`

returns True if the molecule contains less or equal number of "element" atoms

Source code in retropaths/molecules/molecule.py

def is_lower_than_number_of_element(self, n: int, element: str = 'C') -> bool:
    '''
    returns True if the molecule contains less or equal number of "element" atoms
    '''
    n_carbon = len([x for x in self.nodes if self.nodes[x]['element'] == element])
    return n_carbon <= n

`is_subgraph_isomorphic_to(mol, timeout_seconds=10)`

Compares self to g and returns True if self is isomorphic to a subgraph of g a.is_subgraph_isomorphic_to(b) means that a is a subgraph of b

Source code in retropaths/molecules/molecule.py

def is_subgraph_isomorphic_to(self, mol, timeout_seconds=10):
    """
    Compares self to g and returns True if self is isomorphic to a subgraph of g
    a.is_subgraph_isomorphic_to(b)
    means that a is a subgraph of b
    """
    GM = SubGraphMatcher(mol, timeout_seconds=timeout_seconds)
    boolean = GM.is_subgraph_isomorphic(self.remove_r_groups())
    return boolean

`list_of_elements()`

returns a list of elements

Source code in retropaths/molecules/molecule.py

def list_of_elements(self):
    """returns a list of elements"""
    return [self.nodes[x]["element"] for x in self.nodes]

`neutralize()`

This is nothing less than a wrapper for neutralizing cations and anions

Source code in retropaths/molecules/molecule.py

def neutralize(self):
    '''
    This is nothing less than a wrapper for neutralizing cations and anions
    '''
    return self.neutralize_amine_cations().neutralize_each_anion_piece_with_H()

`neutralize_anions_with_H()`

We cannot blindly neutralize anions, because of groups like NO2, where O is -1 and N is +1. So we check if a neighbor of the anion atom is +1, in which case we DO NOT neutralize it.

Other groups like N=N=N made me rework the whole function. I now go to each atom and search for atoms that are in not equilibrium. for instance a +1 with attached 2 different -1. This is creating a -1 that needs to be corrected.

Source code in retropaths/molecules/molecule.py

def neutralize_anions_with_H(self):
    """
    We cannot blindly neutralize anions, because of groups like NO2, where O is -1 and N is +1. So we check
    if a neighbor of the anion atom is +1, in which case we DO NOT neutralize it.

    Other groups like N=N=N made me rework the whole function. I now go to each atom and search for atoms that
    are in not equilibrium. for instance a +1 with attached 2 different -1. This is creating a -1 that needs to
    be corrected.
    """
    new_mol = self.copy()
    new_mol._smiles = ""
    neutralizable = self.copy()

    counter = new_mol.give_me_free_index()
    if new_mol.charge < 0:
        for cation_index in [x for x in neutralizable.nodes if neutralizable.nodes[x]["charge"] > 0]:
            anion_indexes = [x for x in list(neutralizable.neighbors(cation_index)) if neutralizable.nodes[x]["charge"] != 0]
            sum_neigh_charges = sum(neutralizable.nodes[x]["charge"] for x in anion_indexes)
            # print(f'{self.smiles}: we have a cation index: {cation_index} with sum on neighbors being {sum_neigh_charges}')
            if sum_neigh_charges < 0:  # AV this zero is controversial. Good luck with it.
                # print(f'this cation has many anions close to it ({anion_indexes}). I neutralize one.')
                neutralizable.nodes[anion_indexes[0]]["charge"] += 1
                neutralizable.nodes[cation_index]["charge"] -= 1

        for anion_index in [x for x in neutralizable.nodes if neutralizable.nodes[x]["charge"] < 0]:
            number_of_negative_charges = -(neutralizable.nodes[anion_index]["charge"])  # we know it is negative
            for _ in range(number_of_negative_charges):
                index = next(counter)
                new_mol.add_node(index, neighbors=0, element="H", charge=0)  # I am adding 0 neighbors because I want to ensure set_neighbors to be launched later
                new_mol.add_edge(index, anion_index, bond_order="single")
                new_mol.nodes[anion_index]["charge"] += 1
        if not new_mol.charge == 0:
            NeutralizationError(f"something went wrong in neutralize_anions_with_H with molecule {self.smiles}, with charge {new_mol.charge}")
    new_mol.set_neighbors()
    return new_mol

`qify(number=True)`

transform an R molecule into a Q molecule. this is used: in the apply permute, to make every R molecule different (with different substitution) it is also used in the database search, when we wanted to substitute R for something else (not R but also not C or H)

Source code in retropaths/molecules/molecule.py

def qify(self, number=True):
    """
    transform an R molecule into a Q molecule.
    this is used: in the apply permute, to make every R molecule different (with different substitution)
    it is also used in the database search, when we wanted to substitute R for something else (not R but also not C or H)
    """
    mol2 = self.copy()
    for node in mol2.nodes():
        label = mol2.nodes[node]["element"]
        if label[0] == "R":
            if number:
                mol2.nodes[node]["element"] = label.replace("R", "Q")
            else:
                mol2.nodes[node]["element"] = "Q"
    return mol2

`remap_element_names(mapping)`

renames the element name given a dictionary of rename mapping

Source code in retropaths/molecules/molecule.py

def remap_element_names(self, mapping):
    """renames the element name given a dictionary of rename mapping"""
    for node_index in self.nodes:
        node_elem = self.nodes[node_index]["element"]
        if node_elem in mapping:
            self.nodes[node_index]["element"] = mapping[node_elem]
    return self

`remap_element_names_reverse(mapping)`

renames the element name given a dictionary of rename mapping This is the version with inverted mapping, due to double naming mapping can be a dictionary like this.

{'R1':'R1', 'R2':'R2', 'R3':'R17', 'R4':'R17'}

Source code in retropaths/molecules/molecule.py

def remap_element_names_reverse(self, mapping):
    """
    renames the element name given a dictionary of rename mapping
    This is the version with inverted mapping, due to double naming
    mapping can be a dictionary like this.

    {'R1':'R1',
     'R2':'R2',
     'R3':'R17',
     'R4':'R17'}
    """
    mol = self.copy()
    for k, v in mapping.items():
        # I take the first, cody will assume all responsabilities.
        # When we substitute the first R17 with R3, whatever the index is,
        # in the next loop, the list should be one less long, until there is only one
        change_me = [x for x in mol.nodes if mol.nodes[x]["element"] == v][0]
        mol.nodes[change_me]["element"] = k
    return mol

`remove_Hs()`

Remove Hydrogens

Source code in retropaths/molecules/molecule.py

def remove_Hs(self):
    """
    Remove Hydrogens
    """
    # make a copy of this molecule graph
    mol = self.copy()
    r_groups = {k: v for k, v in self.atom_types.items() if "H" in v}
    for k in r_groups.keys():
        mol.remove_node(k)
    return mol

`remove_fragments(remove)`

Remove connected components, molecular fragments, which contain the nodes listed in 'remove' remove - an integer or a list of integers

Source code in retropaths/molecules/molecule.py

def remove_fragments(self, remove):
    """
    Remove connected components, molecular fragments, which contain the nodes listed in 'remove'
    remove - an integer or a list of integers
    """
    new_graph = self.copy(self)
    if hasattr(remove, "__iter__"):
        # it is a list of removals
        for single_remove in remove:
            set_id = nx.node_connected_component(new_graph, single_remove)
            new_graph.remove_nodes_from(set_id)

    elif isinstance(remove, int):
        set_id = nx.node_connected_component(new_graph, remove)
        new_graph.remove_nodes_from(set_id)

    return new_graph

`remove_hydrogens(list_of_hydrogen_to_remove)`

This will create anions given a list of acid hydrogens

Source code in retropaths/molecules/molecule.py

def remove_hydrogens(self, list_of_hydrogen_to_remove):
    """
    This will create anions given a list of acid hydrogens
    """
    mol = self.copy()
    for i in list_of_hydrogen_to_remove:
        neigh = list(mol.neighbors(i))
        assert len(neigh) == 1, f"Watch out, an hydrogen is linked to more than atom in {mol}"
        mol.nodes[neigh[0]]["charge"] -= 1
        mol.remove_node(i)
    return mol

`remove_r_groups()`

If this molecule is a template, remove the R-groups and return a valid molecule fragment

Source code in retropaths/molecules/molecule.py

def remove_r_groups(self):
    """
    If this molecule is a template, remove the R-groups and return a valid molecule fragment
    """
    # make a copy of this molecule graph
    mol = self.copy()

    # remove R-groups if there are any
    if self.is_template:
        r_groups = {k: v for k, v in self.atom_types.items() if "R" in v or "X" in v}
        # r_groups = {k: v for k, v in self.atom_types.items() if 'R' in v}
        for k in r_groups.keys():
            mol.remove_node(k)

    # return a non-template molecule graph
    return mol

`renumber_indexes(swaps)`

Takes a molecule and a mapping {2:3, 3:2, 5:4, 4:6, 6:5} and returns the new molecule that has the VALUES of the mapping where the KEYS were

Source code in retropaths/molecules/molecule.py

def renumber_indexes(self, swaps):
    '''
    Takes a molecule and a mapping {2:3, 3:2, 5:4, 4:6, 6:5}
    and returns the new molecule that has the VALUES of the mapping where the KEYS were
    '''
    mol2 = nx.relabel_nodes(self, swaps)
    return mol2

`replace_A_with_element_matching()`

returns a new molecule with A atoms replaced by one of its contained atoms

Returns:

Name	Type	Description
`Molecule`	`Molecule`	a molecule object

Source code in retropaths/molecules/molecule.py

def replace_A_with_element_matching(self) -> Molecule:
    """returns a new molecule with A atoms replaced by one of its contained atoms

    Returns:
        Molecule: a molecule object
    """
    g2 = self.copy()
    for x in self.nodes:
        if self.nodes[x]["element"] == "A":
            g2.nodes[x]["element"] = self.nodes[x]["element_matching"][0]
    return g2

`replace_L_with_R()`

This needs to be refactored, CRA used it for polymer generation.

Source code in retropaths/molecules/molecule.py

def replace_L_with_R(self):
    """
    This needs to be refactored, CRA used it for polymer generation.
    """
    mol2 = self.copy()
    for node in mol2.nodes():
        label = mol2.nodes[node]["element"]
        if label == "L2":
            mol2.nodes[node]["element"] = "R1"
    return mol2

`replace_R_groups_from_rules(rules, MG, seed=42)`

this will replace the R groups with matching groups from rules

Source code in retropaths/molecules/molecule.py

def replace_R_groups_from_rules(self, rules, MG, seed=42):
    """
    this will replace the R groups with matching groups  from rules
    """

    assert self.is_template, "The molecule must be a template molecule"
    random.seed(seed)
    r_groups = self.which_r_groups

    # For each R label replace with a matching group from rules
    replacements = []
    for R in r_groups:
        if R not in rules.enforce.dictionary:
            rules.add_new_R_pattern_to_enforce_rule(R, ["HL"], MG)
        data = rules.enforce.dictionary[R]
        random_matching_group = random.choice(data)
        replacements.append(random_matching_group)
    new_mol = self.replace_R_groups_with_matching_group(MG, replacements)

    return new_mol

`replace_R_groups_with_L_molecule(L_molecules)`

This needs to be refactored. CRA used it for polymer generation. this will replace the R groups with matching groups

Source code in retropaths/molecules/molecule.py

def replace_R_groups_with_L_molecule(self, L_molecules):
    """
    This needs to be refactored. CRA used it for polymer generation.
    this will replace the R groups with matching groups
    """
    labels = self.which_r_groups
    # assert len(labels) == len(L_molecules), f"labels {len(labels)} and L_molecules {len(L_molecules)} must be of equal length"
    mol = self.copy()

    # For each R label replace with a matching group from MG_list
    for label, graph in zip(labels, L_molecules):

        mol += graph

        index_list = [x for x in mol.nodes if mol.nodes[x]["element"] == label]
        assert len(index_list) == 1, f"{label} Something strange with duplicate R values (or missing)."
        index = index_list[0]

        molecule_neigh_list = list(mol.neighbors(index))
        assert len(molecule_neigh_list) == 1, "To use this functionality you need to substitute an hydrogen or something that only has one neighbor."
        molecule_attach = molecule_neigh_list[0]
        l_is = [x for x in mol.nodes if mol.nodes[x]["element"] == "L"]
        # assert len(l_is) == 1, "To use this functionality you need to select a group with only one L group."
        index_l = l_is[0]
        index_attach_list = list(mol.neighbors(index_l))
        index_attach = index_attach_list[0]
        mol.remove_node(index_l)
        mol.remove_node(index)
        mol.add_edge(molecule_attach, index_attach, bond_order="single")
        mol.set_neighbors()
    return mol

`replace_R_groups_with_matching_group(MG, MG_list)`

this will replace the R groups with matching groups

Source code in retropaths/molecules/molecule.py

def replace_R_groups_with_matching_group(self, MG, MG_list):
    """
    this will replace the R groups with matching groups
    """
    labels = self.which_r_groups
    mg_label_list = [x.symbol for x in MG_list]
    assert len(labels) == len(mg_label_list), f"labels {len(labels)} and mg_label_list {len(mg_label_list)} must be of equal length"
    mol = self.copy()

    # For each R label replace with a matching group from MG_list
    for label, mg_label in zip(labels, mg_label_list):

        # Replace A if there are any in MG
        graph = MG.data[mg_label].replace_A().graph

        mol += graph

        index_list = [x for x in mol.nodes if mol.nodes[x]["element"] == label]
        assert len(index_list) == 1, f"{label} Something strange with duplicate R values (or missing)."
        index = index_list[0]

        molecule_neigh_list = list(mol.neighbors(index))
        assert len(molecule_neigh_list) == 1, "To use this functionality you need to substitute an hydrogen or something that only has one neighbor."
        molecule_attach = molecule_neigh_list[0]
        l_is = [x for x in mol.nodes if mol.nodes[x]["element"] == "L"]
        assert len(l_is) == 1, "To use this functionality you need to select a Matching group with only one L group."
        index_l = l_is[0]
        index_attach_list = list(mol.neighbors(index_l))
        index_attach = index_attach_list[0]
        mol.remove_node(index_l)
        mol.remove_node(index)
        mol.add_edge(molecule_attach, index_attach, bond_order="single")
        mol.set_neighbors()
    return mol

`replace_r_with_h()`

transform an R molecule into a normal molecule with hydrogen atoms. this is used in order to make the simplest template case possible. We need to take care, that this will also replace an R that has rules that exclude H

Source code in retropaths/molecules/molecule.py

def replace_r_with_h(self):
    """
    transform an R molecule into a normal molecule with hydrogen atoms.
    this is used in order to make the simplest template case possible.
    We need to take care, that this will also replace an R that has rules that exclude H
    """
    mol2 = self.copy()
    for node in mol2.nodes():
        label = mol2.nodes[node]["element"]
        if label[0] == "R":
            mol2.nodes[node]["element"] = "H"
    return mol2

`replace_r_with_methyl()`

This can eventually become a function where you pass the actual group you substitute

Source code in retropaths/molecules/molecule.py

def replace_r_with_methyl(self):
    """
    This can eventually become a function where you pass the actual group you substitute
    """
    mol2 = self.copy()
    mol2 += Molecule()  # hack for renumbering.
    methyl = Molecule.from_smiles("C").substitute_group(4, "L")
    r_labels = [x for x in mol2.nodes if mol2.nodes[x]["element"][0] == "R"]

    for _ in r_labels:
        mol2 += methyl

    l_indexes = [x for x in mol2.nodes if mol2.nodes[x]["element"] == "L"]

    assert len(r_labels) == len(l_indexes), "this should never happen"
    for l_atom_index, r_atom_index in zip(r_labels, l_indexes):
        first = list(mol2.neighbors(l_atom_index))
        first_i = first[0]
        second = list(mol2.neighbors(r_atom_index))
        second_i = second[0]
        #         print(f'I need to remove {r_atom_index}, {l_atom_index}, and connect {first} and {second}')
        mol2.add_edge(first_i, second_i, bond_order="single")
        mol2.remove_node(r_atom_index)
        mol2.remove_node(l_atom_index)
    return mol2

`replace_r_with_rf()`

this function seres the purpose of tricking rdkit, making the template molecule a little heavier

Source code in retropaths/molecules/molecule.py

def replace_r_with_rf(self):
    """this function seres the purpose of tricking rdkit, making the template molecule a little heavier"""
    graph = self.copy()
    for x in graph.nodes:
        if graph.nodes[x]["element"][0] == "R":
            graph.nodes[x]["element"] = "Rf"
    return graph

`satisfy_bradt_good_enough(cycles=None, verbosity=0)`

warning, this cannot possibly catch all the bridgeheads nor all cases where the bridge is bigger than 7 carbons. This is checking all nx basis cycles, keeping the nodes that are only present in one cycle.

Source code in retropaths/molecules/molecule.py

def satisfy_bradt_good_enough(self, cycles=None, verbosity=0) -> bool:
    '''
    warning, this cannot possibly catch all the bridgeheads nor all cases where the bridge is bigger than 7 carbons.
    This is checking all nx basis cycles, keeping the nodes that are only present in one cycle.
    '''
    if verbosity > 0:
        print(self.smiles)
    if cycles is None:
        cycles = nx.cycle_basis(self)
    if len(cycles) <= 1:
        return True

    if verbosity > 0:
        print(f'{cycles=}\n')
    bridge_heads = set()
    banish = set()
    for list_index in cycles:
        all_others = set()
        for xs in cycles:
            if xs != list_index:
                for x in xs:
                    all_others.add(x)

        if verbosity > 0:
            print(f'  {all_others=}\n  {list_index=}\n')
        prev_real = None
        nexT_real = None
        for i, index in enumerate(list_index):
            if index not in all_others:
                prev = list_index[i-1]
                if prev in all_others:
                    prev_real = prev
                nexT = list_index[(i + 1) % len(list_index)]
                if nexT in all_others:
                    nexT_real = nexT
        if verbosity > 0:
            print(f'  {prev_real=}\n  {nexT_real=}\n')
        try:
            if prev_real is not None and nexT_real is not None:
                self.edges[(prev_real, nexT_real)]
                banish.add(prev_real)
                banish.add(nexT_real)
        except KeyError:
            bridge_heads.add(prev_real)
            bridge_heads.add(nexT_real)

    bridge_heads_filtered = [x for x in bridge_heads if x not in banish]
    if verbosity > 0:
        print(f'{bridge_heads=}\n{bridge_heads_filtered=}')
    booleans = []
    for index in bridge_heads_filtered:
        this_bool = all([self.edges[edge]['bond_order'] == 'single' for edge in self.edges if index in edge])
        booleans.append(this_bool)
    return all(booleans)

`separate_graph_in_pieces()`

this function returns a list of single connected graphs.

Source code in retropaths/molecules/molecule.py

def separate_graph_in_pieces(self) -> list[Molecule]:
    """
    this function returns a list of single connected graphs.
    """
    return list((self.subgraph(x).copy() for x in nx.connected_components(self)))

`set_neighbors()`

given a molecule, this function will set the neighbors attribute

Source code in retropaths/molecules/molecule.py

def set_neighbors(self):
    """
    given a molecule, this function will set the neighbors attribute
    """
    for node in self.nodes:
        self.nodes[node]["neighbors"] = len(list(self.neighbors(node)))
    return self

`smiles_from_multiple_molecules(mol)`

this method is used to have a unique smile for each graph even when the graph contains multiple molecules. It is used in the pot to have uniqueness.

Source code in retropaths/molecules/molecule.py

def smiles_from_multiple_molecules(mol):
    """
    this method is used to have a unique smile for each graph
    even when the graph contains multiple molecules.
    It is used in the pot to have uniqueness.
    """
    list_smiles = [x.force_smiles() for x in mol.separate_graph_in_pieces()]
    return ".".join(sorted(list_smiles))

`substitute_bond(i, j, bond_order='single', copy=True)`

this will change edge type copy :: Bool <- will it creater a new graph or not?

Source code in retropaths/molecules/molecule.py

def substitute_bond(self, i, j, bond_order="single", copy=True):
    """
    this will change edge type
    copy :: Bool <- will it creater a new graph or not?
    """
    if copy:
        mol = self.copy()
    else:
        mol = self

    mol.remove_edge(i, j)
    mol.add_edge(i, j, bond_order=bond_order)

    return mol

`substitute_group(i, label, cut=None, copy=True, force_bond_order='single', charge=0, element_matching=None)`

It will insert into the molecule a group labeled "label" at node index i, disconnecting it from "exit" i :: int <- the node to be replaced. cut :: (int,int) <- This selects an edge to be cut in case there is an ambiguity label :: Str <- the label of new node. copy :: Bool <- will it creater a new graph or not?

Source code in retropaths/molecules/molecule.py

def substitute_group(self, i, label, cut=None, copy=True, force_bond_order="single", charge=0, element_matching=None):
    """
    It will insert into the molecule a group labeled "label" at node index i, disconnecting it from "exit"
    i :: int <- the node to be replaced.
    cut :: (int,int) <- This selects an edge to be cut in case there is an ambiguity
    label :: Str <- the label of new node.
    copy :: Bool <- will it creater a new graph or not?
    """
    if copy:
        mol = self.copy()
    else:
        mol = self

    if label == "A":
        assert element_matching is not None, "An A atom must be added with element_matching flag."

    counter = mol.give_me_free_index()
    new_index = next(counter)

    if cut is None:
        how_many_edges_connected = sum([i in x for x in mol.edges])
        if how_many_edges_connected > 1:
            print(f"Removing link {i} is ambiguous on {self}, has it has {how_many_edges_connected} edges. You should define the cut=(x,y) keyword\n\n")
        else:
            # renaming node only
            mol.nodes(data=True)[i]["element"] = f"{label}"
            if element_matching:
                mol.nodes(data=True)[i]["element_matching"] = element_matching

    else:

        # I write this bullshit here, because I want the user to be able to write the edges in whatever order
        if i == cut[0]:
            attach_node = cut[1]
        else:
            attach_node = cut[0]

        mol.add_node(new_index, neighbors=0, element=f"{label}", charge=charge, element_matching=element_matching)
        mol.add_edge(new_index, attach_node, bond_order=force_bond_order)
        mol.remove_edge(attach_node, i)
        set_id = nx.node_connected_component(mol, attach_node)
        mol = mol.subgraph(set_id)
        mol.set_neighbors()
    return mol

`substitute_groups(list_of_simple_substitutions, element_matching=None)`

Applies, in order, the changes in list_of_simple_substitutions. Each change is a pair (i, s) which causes node i to be replaced with a node with label 's'.

Source code in retropaths/molecules/molecule.py

def substitute_groups(self, list_of_simple_substitutions, element_matching=None):
    """
    Applies, in order, the changes in list_of_simple_substitutions.
    Each change is a pair (i, s) which causes node i to be replaced with a node with label 's'.
    """
    for i, label in list_of_simple_substitutions:
        self = self.substitute_group(i, label, element_matching=element_matching)
    return self

`update_edges_and_charges(changes)`

Apply changes, a dictionary with keys in {delete, single, double, aromatic, triple} and values lists of tuple pairs (i, j) which define edges to be removed or added.

Parameters:

Name	Type	Description	Default
`changes`	`Changes`	it is a retropath changes object	required

Returns:

Name	Type	Description
`Molecule`	`Molecule`	a molecule with changes applied

Source code in retropaths/molecules/molecule.py

def update_edges_and_charges(self, changes: Changes) -> Molecule:
    """Apply changes, a dictionary with keys in {delete, single, double, aromatic, triple} and
    values lists of tuple pairs (i, j) which define edges to be removed or added.

    Args:
        changes (Changes): it is a retropath changes object

    Returns:
        Molecule: a molecule with changes applied
    """
    new_graph = self.copy()

    dictio = changes.bonds.dict()

    if "delete" in dictio:
        # if delete edges are specified, remove them
        for i, j in dictio["delete"]:
            new_graph.remove_edge(i, j)

    for bond_order in ["single", "double", "aromatic", "triple"]:
        # add edges to the graph for each bond order in the dictio dictionary, if it exists,
        # since the dictio dictionary is keyed by bond order and the values are lists of tuples of atom indices.
        if bond_order in dictio:
            for i, j in dictio[bond_order]:
                new_graph.add_edge(i, j, bond_order=bond_order)

    new_graph.set_neighbors()

    for atom_index, change_in_charge in changes.charges.charges:
        # chnages in charges is a delta
        new_graph.nodes[atom_index]["charge"] += change_in_charge

    return new_graph

`which_atoms_are_in()`

returns a list of unique elements

Source code in retropaths/molecules/molecule.py

def which_atoms_are_in(self):
    """returns a list of unique elements"""
    return {self.nodes[x]["element"] for x in self.nodes}

Molecule

Molecule

empirical_formula property

is_template property

molecular_formula: MolecularFormula property

r_groups property

smiles property

which_r_groups property

__add__(mol2)

__eq__(other_molecule)

__init__(name='', smi='')

__mul__(other)

__str__()

add_list_of_molecules(list_molec) classmethod

are_R_labels_equivalent(first_label, second_label)

change_element_name(lab1, lab2)

collapse_nodes(list_of_collapses)

copy(reindex=False, start=0)

deduplicate_single_graph()

delete_duplicates(graph_list, verbosity=0) staticmethod

deqify()

draw(mode='rdkit', size=None, string_mode=False, node_index=True, percentage=None, force=None, fixed_bond_length=None, fixedScale=None, fontSize=None, lineWidth=None, charges=True, neighbors=False)

draw_list(molecule_list, names=[], mode='rdkit', title='', charges=False, size=(650, 650), width=100, columns=5, fixed_bond_length=None, string_mode=False, node_index=True, neighbors=False, arrows=False, borders=False, display_title=True, arrow_size=2) staticmethod

expand_functional_groups(FG, verbosity=0)

expand_nodes()

force_smiles()

from_debug_string(nodes, edges) classmethod

from_list_of_smiles_to_single_graph(list_of_smiles) classmethod

from_smiles(smi, name='') classmethod

get_L_atom_index()

get_bond_changes(mol)

get_bond_order(first_atom, second_atom)

get_element(index_atom)

get_neighbors_of_node(ind)

get_subgraph_isomorphisms_of(target, verbosity=0)

graph_difference(subtracted_molecule)

graph_difference_with_list_and_duplicates(mols)

is_empty()

is_lower_than_number_of_element(n, element='C')

is_subgraph_isomorphic_to(mol, timeout_seconds=10)

list_of_elements()

neutralize()

neutralize_anions_with_H()

qify(number=True)

remap_element_names(mapping)

remap_element_names_reverse(mapping)

remove_Hs()

remove_fragments(remove)

remove_hydrogens(list_of_hydrogen_to_remove)

remove_r_groups()

renumber_indexes(swaps)

replace_A_with_element_matching()

replace_L_with_R()

replace_R_groups_from_rules(rules, MG, seed=42)

replace_R_groups_with_L_molecule(L_molecules)

replace_R_groups_with_matching_group(MG, MG_list)

replace_r_with_h()

replace_r_with_methyl()

replace_r_with_rf()

satisfy_bradt_good_enough(cycles=None, verbosity=0)

separate_graph_in_pieces()

set_neighbors()

smiles_from_multiple_molecules(mol)

substitute_bond(i, j, bond_order='single', copy=True)

substitute_group(i, label, cut=None, copy=True, force_bond_order='single', charge=0, element_matching=None)

substitute_groups(list_of_simple_substitutions, element_matching=None)

update_edges_and_charges(changes)

which_atoms_are_in()

`Molecule`

`empirical_formula` `property`

`is_template` `property`

`molecular_formula: MolecularFormula` `property`

`r_groups` `property`

`smiles` `property`

`which_r_groups` `property`

`add(mol2)`

`eq(other_molecule)`

`init(name='', smi='')`

`mul(other)`

`str()`

`add_list_of_molecules(list_molec)` `classmethod`

`are_R_labels_equivalent(first_label, second_label)`

`change_element_name(lab1, lab2)`

`collapse_nodes(list_of_collapses)`

`copy(reindex=False, start=0)`

`deduplicate_single_graph()`

`delete_duplicates(graph_list, verbosity=0)` `staticmethod`

`deqify()`

`draw(mode='rdkit', size=None, string_mode=False, node_index=True, percentage=None, force=None, fixed_bond_length=None, fixedScale=None, fontSize=None, lineWidth=None, charges=True, neighbors=False)`

`draw_list(molecule_list, names=[], mode='rdkit', title='', charges=False, size=(650, 650), width=100, columns=5, fixed_bond_length=None, string_mode=False, node_index=True, neighbors=False, arrows=False, borders=False, display_title=True, arrow_size=2)` `staticmethod`

`expand_functional_groups(FG, verbosity=0)`

`expand_nodes()`

`force_smiles()`

`from_debug_string(nodes, edges)` `classmethod`

`from_list_of_smiles_to_single_graph(list_of_smiles)` `classmethod`

`from_smiles(smi, name='')` `classmethod`

`get_L_atom_index()`

`get_bond_changes(mol)`

`get_bond_order(first_atom, second_atom)`

`get_element(index_atom)`

`get_neighbors_of_node(ind)`

`get_subgraph_isomorphisms_of(target, verbosity=0)`

`graph_difference(subtracted_molecule)`

`graph_difference_with_list_and_duplicates(mols)`

`is_empty()`

`is_lower_than_number_of_element(n, element='C')`

`is_subgraph_isomorphic_to(mol, timeout_seconds=10)`

`list_of_elements()`

`neutralize()`

`neutralize_anions_with_H()`

`qify(number=True)`

`remap_element_names(mapping)`

`remap_element_names_reverse(mapping)`

`remove_Hs()`

`remove_fragments(remove)`

`remove_hydrogens(list_of_hydrogen_to_remove)`

`remove_r_groups()`

`renumber_indexes(swaps)`

`replace_A_with_element_matching()`

`replace_L_with_R()`

`replace_R_groups_from_rules(rules, MG, seed=42)`

`replace_R_groups_with_L_molecule(L_molecules)`

`replace_R_groups_with_matching_group(MG, MG_list)`

`replace_r_with_h()`

`replace_r_with_methyl()`

`replace_r_with_rf()`

`satisfy_bradt_good_enough(cycles=None, verbosity=0)`

`separate_graph_in_pieces()`

`set_neighbors()`

`smiles_from_multiple_molecules(mol)`

`substitute_bond(i, j, bond_order='single', copy=True)`

`substitute_group(i, label, cut=None, copy=True, force_bond_order='single', charge=0, element_matching=None)`

`substitute_groups(list_of_simple_substitutions, element_matching=None)`

`update_edges_and_charges(changes)`

`which_atoms_are_in()`