Skip to content

Utilities

qcio.utils

Utility functions for working with qcio objects.

json_dumps

json_dumps(obj: Union[BaseModel, List[BaseModel]]) -> str

Serialization helper for lists of pydantic objects.

Source code in qcio/utils.py
31
32
33
34
35
def json_dumps(obj: Union[BaseModel, List[BaseModel]]) -> str:
    """Serialization helper for lists of pydantic objects."""
    if isinstance(obj, list):
        return json.dumps([o.model_dump() for o in obj])
    return obj.model_dump_json()

rmsd

rmsd(
    struct1: Structure,
    struct2: Structure,
    align: bool = True,
    numthreads: int = 1,
) -> float

Calculate the root mean square deviation between two structures in Angstrom.

Parameters:

Name Type Description Default
struct1 Structure

The first structure.

required
struct2 Structure

The second structure.

required
align bool

Whether to align the structures before calculating the RMSD including atom renumbering. If True, rdkit will compute the optimal alignment of the two structures before calculating the RMSD. If False, the RMSD will be calculated without alignment.

True
numthreads int

The number of threads to use for the RMSD calculation. Applies only to the alignment step if align=True.

1

Returns:

Type Description
float

The RMSD between the two structures in Angstroms.

Source code in qcio/utils.py
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
def rmsd(
    struct1: Structure, struct2: Structure, align: bool = True, numthreads: int = 1
) -> float:
    """
    Calculate the root mean square deviation between two structures in Angstrom.

    Args:
        struct1: The first structure.
        struct2: The second structure.
        align: Whether to align the structures before calculating the RMSD including
            atom renumbering. If True, rdkit will compute the optimal alignment of the
            two structures before calculating the RMSD. If False, the RMSD will be
            calculated without alignment.
        numthreads: The number of threads to use for the RMSD calculation. Applies only
            to the alignment step if `align=True`.


    Returns:
        The RMSD between the two structures in Angstroms.
    """
    _assert_module_installed("rdkit")
    from rdkit.Chem import rdDetermineBonds, rdMolAlign

    # Create RDKit molecules
    mol1 = _rdkit_mol_from_structure(struct1)
    mol2 = _rdkit_mol_from_structure(struct2)

    # Determine connectivity
    rdDetermineBonds.DetermineConnectivity(mol1, charge=struct1.charge)
    rdDetermineBonds.DetermineConnectivity(mol2, charge=struct2.charge)

    # Compute RMSD
    if align:
        rmsd = rdMolAlign.GetBestRMS(mol2, mol1, numThreads=numthreads)
    else:
        rmsd = rdMolAlign.CalcRMS(mol2, mol1)

    return rmsd

align

align(
    struct: Structure,
    refstruct: Structure,
    reorder_atoms: bool = True,
) -> Tuple[Structure, float]

Return a new structure that is optimally aligned to the reference structure.

Parameters:

Name Type Description Default
struct Structure

The structure to align.

required
refstruct Structure

The reference structure.

required
reorder_atoms bool

Reorder the atoms to match the reference structure. If False, the atoms will be aligned without changing their order.

True

Returns:

Type Description
Tuple[Structure, float]

Tuple of the aligned structure and the RMSD in Angstroms.

Source code in qcio/utils.py
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
def align(
    struct: Structure, refstruct: Structure, reorder_atoms: bool = True
) -> Tuple[Structure, float]:
    """Return a new structure that is optimally aligned to the reference structure.

    Args:
        struct: The structure to align.
        refstruct: The reference structure.
        reorder_atoms: Reorder the atoms to match the reference structure. If False,
            the atoms will be aligned without changing their order.

    Returns:
        Tuple of the aligned structure and the RMSD in Angstroms.
    """
    _assert_module_installed("rdkit")
    from rdkit.Chem import rdDetermineBonds, rdMolAlign

    # Create RDKit molecules
    mol = _rdkit_mol_from_structure(struct)
    refmol = _rdkit_mol_from_structure(refstruct)

    # Determine connectivity
    rdDetermineBonds.DetermineConnectivity(mol, charge=struct.charge)
    rdDetermineBonds.DetermineConnectivity(refmol, charge=refstruct.charge)

    # Compute RMSD and align mol to refmol
    if reorder_atoms:
        rmsd, trnsfm_matrix, atm_map = rdMolAlign.GetBestAlignmentTransform(mol, refmol)  # type: ignore # noqa: E501
    else:
        rmsd, trnsfm_matrix = rdMolAlign.GetAlignmentTransform(mol, refmol)

    # Convert to homogeneous coordinates in Angstroms
    coords_homogeneous = np.hstack(
        [struct.geometry_angstrom, np.ones((struct.geometry.shape[0], 1))]
    )

    # Apply the transformation matrix
    transformed_coords_homogeneous = coords_homogeneous @ trnsfm_matrix.T

    # Extract the transformed 3D coordinates and convert to Bohr
    transformed_coords = transformed_coords_homogeneous[:, :3] * ANGSTROM_TO_BOHR

    # Reorder the atoms to match the reference structure
    if reorder_atoms:
        if Counter(struct.symbols) != Counter(refstruct.symbols):
            raise ValueError(
                "Structures must have the same number and type of atoms for "
                "`reorder_atoms=True` at this time. Pass "
                "`reorder_atoms=False` to align structures with different atom "
                "counts."
            )
        symbols = refstruct.symbols
        geometry = np.zeros((len(atm_map), 3))

        for probe_idx, ref_idx in atm_map:
            geometry[ref_idx] = transformed_coords[probe_idx]  # works

    # Otherwise, keep the original atom order
    else:
        symbols = struct.symbols
        geometry = transformed_coords

    return (
        Structure(
            symbols=symbols,
            geometry=geometry,
            charge=struct.charge,
            multiplicity=struct.multiplicity,
            connectivity=struct.connectivity,
            identifiers=struct.identifiers,
        ),
        rmsd,
    )