# Fingerprint Generators

## Creating and using a fingerprint generator

Fingerprint generators can be created by using the functions that return the type of generator desired.

In [7]:
from rdkit import Chem
from rdkit.Chem import rdFingerprintGenerator

mol = Chem.MolFromSmiles('CC(O)C(O)(O)C')
generator = rdFingerprintGenerator.GetAtomPairGenerator()
fingerprint = generator.GetSparseCountFingerprint(mol)
non_zero = fingerprint.GetNonzeroElements()

print(non_zero)

{541731: 1, 574497: 1, 574498: 1, 590881: 1, 590882: 1, 590945: 1, 1590306: 3, 1590307: 3, 1590369: 1, 1590370: 2, 1590401: 2, 1590402: 1, 1592354: 1, 1592355: 2}


We can set the parameters for the fingerprint while creating the generator for it.

In [8]:
generator = rdFingerprintGenerator.GetAtomPairGenerator(minDistance = 1, maxDistance = 2, includeChirality = False)
fingerprint = generator.GetSparseCountFingerprint(mol)
non_zero = fingerprint.GetNonzeroElements()

print(non_zero)

{574497: 1, 574498: 1, 590881: 1, 590882: 1, 590945: 1, 1590306: 3, 1590369: 1, 1590370: 2, 1590401: 2, 1590402: 1, 1592354: 1}


We can provide the molecule dependent arguments while creating the fingerprint.

In [9]:
fingerprint = generator.GetSparseCountFingerprint(mol, fromAtoms = [1])
non_zero = fingerprint.GetNonzeroElements()

print(non_zero)

fingerprint = generator.GetSparseCountFingerprint(mol, ignoreAtoms = [1, 5])
non_zero = fingerprint.GetNonzeroElements()

print(non_zero)

{574497: 1, 574498: 1, 590945: 1, 1590369: 1, 1590370: 2}
{590881: 1, 590882: 1, 1590306: 2, 1590401: 1, 1590402: 1}


## Types of fingerprint generators

Currently 4 fingerprint types are supported by fingerprint generators

In [10]:
generator = rdFingerprintGenerator.GetAtomPairGenerator()
fingerprint = generator.GetSparseCountFingerprint(mol)
non_zero = fingerprint.GetNonzeroElements()

print("Atom pair", non_zero)

generator = rdFingerprintGenerator.GetMorganGenerator(radius = 3)
fingerprint = generator.GetSparseCountFingerprint(mol)
non_zero = fingerprint.GetNonzeroElements()

print("Morgan", non_zero)

generator = rdFingerprintGenerator.GetRDKitFPGenerator()
fingerprint = generator.GetSparseCountFingerprint(mol)
non_zero = fingerprint.GetNonzeroElements()

print("RDKitFingerprint", non_zero)

generator = rdFingerprintGenerator.GetTopologicalTorsionGenerator()
fingerprint = generator.GetSparseCountFingerprint(mol)
non_zero = fingerprint.GetNonzeroElements()

print("TopologicalTorsion", non_zero)

Atom pair {541731: 1, 574497: 1, 574498: 1, 590881: 1, 590882: 1, 590945: 1, 1590306: 3, 1590307: 3, 1590369: 1, 1590370: 2, 1590401: 2, 1590402: 1, 1592354: 1, 1592355: 2}
Morgan {864662311: 3, 1542631284: 2, 1542633699: 1, 1741045729: 1, 2245273601: 1, 2245277810: 1, 2246728737: 2, 2782665878: 1, 2927183216: 1, 3537119515: 1, 3537123720: 1}
RDKitFingerprint {398441839: 4, 561308092: 2, 623990427: 1, 1524090560: 6, 1606685044: 2, 1636471275: 3, 1753257252: 1, 1940446997: 2, 2332326087: 1, 2880661462: 1, 2911990635: 1, 3060973103: 1, 3083228099: 1, 3473416248: 3, 3743603664: 1, 3768818763: 1, 3977409745: 3, 4274652475: 3, 4275705116: 3, 4279989780: 2}
TopologicalTorsion {4303897120: 1, 12893570080: 1, 12893831712: 2, 12893831776: 2}


## Invariant generators

It is possible to use a custom invariant generators while creating fingerprints. Invariant generators provide values to be used as invariants for each atom or bond in the molecule and these values affect the generated fingerprint.

In [11]:
simpleMol = Chem.MolFromSmiles('CCC')

generator = rdFingerprintGenerator.GetRDKitFPGenerator()
fingerprint = generator.GetSparseCountFingerprint(simpleMol)
non_zero = fingerprint.GetNonzeroElements()

print("RDKitFingerprint", non_zero)

atomInvariantsGen = rdFingerprintGenerator.GetAtomPairAtomInvGen()

generator = rdFingerprintGenerator.GetRDKitFPGenerator(atomInvariantsGenerator = atomInvariantsGen)
fingerprint = generator.GetSparseCountFingerprint(simpleMol)
non_zero = fingerprint.GetNonzeroElements()

print("RDKitFingerprint", non_zero)

RDKitFingerprint {1940446997: 1, 4275705116: 2}
RDKitFingerprint {578931652: 1, 2298572045: 2}


Currently available invariants generators are:

In [12]:
atomInvariantsGen = rdFingerprintGenerator.GetAtomPairAtomInvGen()

generator = rdFingerprintGenerator.GetMorganGenerator(radius = 3, atomInvariantsGenerator = atomInvariantsGen)
fingerprint = generator.GetSparseCountFingerprint(mol)
non_zero = fingerprint.GetNonzeroElements()

print("Morgan with AtomPairAtomInvGen", non_zero)

atomInvariantsGen = rdFingerprintGenerator.GetMorganAtomInvGen()

generator = rdFingerprintGenerator.GetMorganGenerator(radius = 3, atomInvariantsGenerator = atomInvariantsGen)
fingerprint = generator.GetSparseCountFingerprint(mol)
non_zero = fingerprint.GetNonzeroElements()

# Default for Morgan FP
print("Morgan with MorganAtomInvGen", non_zero)

atomInvariantsGen = rdFingerprintGenerator.GetMorganFeatureAtomInvGen()

generator = rdFingerprintGenerator.GetMorganGenerator(radius = 3, atomInvariantsGenerator = atomInvariantsGen)
fingerprint = generator.GetSparseCountFingerprint(mol)
non_zero = fingerprint.GetNonzeroElements()

print("Morgan with MorganFeatureAtomInvGen", non_zero)

atomInvariantsGen = rdFingerprintGenerator.GetRDKitAtomInvGen()

generator = rdFingerprintGenerator.GetMorganGenerator(radius = 3, atomInvariantsGenerator = atomInvariantsGen)
fingerprint = generator.GetSparseCountFingerprint(mol)
non_zero = fingerprint.GetNonzeroElements()

print("Morgan with RDKitAtomInvGen", non_zero)

bondInvariantsGen = rdFingerprintGenerator.GetMorganBondInvGen()

generator = rdFingerprintGenerator.GetMorganGenerator(radius = 3, bondInvariantsGenerator = bondInvariantsGen)
fingerprint = generator.GetSparseCountFingerprint(mol)
non_zero = fingerprint.GetNonzeroElements()

# Default for Morgan FP
print("Morgan with MorganBondInvGen", non_zero)

Morgan with AtomPairAtomInvGen {33: 2, 35: 1, 36: 1, 97: 3, 523835848: 1, 618975071: 1, 2343097318: 1, 3205489706: 2, 3205489717: 1, 3205494725: 1, 3205494778: 1}
Morgan with MorganAtomInvGen {864662311: 3, 1542631284: 2, 1542633699: 1, 1741045729: 1, 2245273601: 1, 2245277810: 1, 2246728737: 2, 2782665878: 1, 2927183216: 1, 3537119515: 1, 3537123720: 1}
Morgan with MorganFeatureAtomInvGen {0: 4, 3: 3, 614176407: 1, 792807483: 1, 3205495869: 2, 3205496825: 3, 3208860345: 1}
Morgan with RDKitAtomInvGen {12: 4, 16: 3, 165450225: 1, 608338133: 1, 2705297134: 1, 3205492925: 3, 3205493174: 2}
Morgan with MorganBondInvGen {864662311: 3, 1542631284: 2, 1542633699: 1, 1741045729: 1, 2245273601: 1, 2245277810: 1, 2246728737: 2, 2782665878: 1, 2927183216: 1, 3537119515: 1, 3537123720: 1}


## Custom Invariants

It is also possible to provide custom invariants instead of using a invariants generator

In [13]:

generator = rdFingerprintGenerator.GetAtomPairGenerator()
fingerprint = generator.GetSparseCountFingerprint(simpleMol)
non_zero = fingerprint.GetNonzeroElements()

print(non_zero)

customAtomInvariants = [1, 1, 1]
fingerprint = generator.GetSparseCountFingerprint(simpleMol, customAtomInvariants = customAtomInvariants)
non_zero = fingerprint.GetNonzeroElements()

print(non_zero)


{541730: 1, 558113: 2}
{16417: 2, 16418: 1}


## Convenience functions

## Bulk fingerprint