Skip to content

Utilities

qcio.json_dumps

json_dumps(obj: Union[BaseModel, list[BaseModel]]) -> str

Serialization helper for lists of pydantic objects.

Source code in qcio/utils.py
35
36
37
38
39
def json_dumps(obj: Union[BaseModel, list[BaseModel]]) -> str:
    """Serialization helper for lists of pydantic objects."""
    if isinstance(obj, list):
        return json.dumps([o.model_dump() for o in obj])
    return obj.model_dump_json()

qcio.rmsd

rmsd(struct1: Structure, struct2: Structure, best: bool = True, numthreads: int = 1, use_hueckel: bool = True, use_vdw: bool = False, cov_factor: float = 1.3) -> float

Calculate the root mean square deviation between two structures in Angstrom.

Parameters:

Name Type Description Default
struct1 Structure

The first structure.

required
struct2 Structure

The second structure.

required
best bool

Whether to consider structure symmetries and align the structures before calculating the RMSD, including atom renumbering. This relies on the RDKit DetermineConnectivity and GetBestRMS functions. If False, the RMSD is calculated without alignment or atom renumbering, i.e., naively assuming the atom indices are already correctly indexed and possibly aligned.

True
numthreads int

The number of threads to use for the RMSD calculation. Applies only to the alignment step if best=True.

1
use_hueckel bool

Whether to use Hueckel method when determining connectivity. Applies only to best=True.

True
use_vdw bool

Whether to use Van der Waals radii when determining connectivity. Applies only to best=True.

False
cov_factor float

The scaling factor for the covalent radii when determining connectivity. Applies only to best=True.

1.3

Returns:

Type Description
float

The RMSD between the two structures in Angstroms.

Source code in qcio/models/utils.py
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
def rmsd(
    struct1: "Structure",
    struct2: "Structure",
    best: bool = True,
    numthreads: int = 1,
    use_hueckel: bool = True,
    use_vdw: bool = False,
    cov_factor: float = 1.3,
) -> float:
    """
    Calculate the root mean square deviation between two structures in Angstrom.

    Args:
        struct1: The first structure.
        struct2: The second structure.
        best: Whether to consider structure symmetries and align the structures before
            calculating the RMSD, including atom renumbering. This relies on the RDKit
            `DetermineConnectivity` and `GetBestRMS` functions. If False, the RMSD is
            calculated without alignment or atom renumbering, i.e., naively assuming the
            atom indices are already correctly indexed and possibly aligned.
        numthreads: The number of threads to use for the RMSD calculation. Applies only
            to the alignment step if `best=True`.
        use_hueckel: Whether to use Hueckel method when determining connectivity.
            Applies only to `best=True`.
        use_vdw: Whether to use Van der Waals radii when determining connectivity.
            Applies only to `best=True`.
        cov_factor: The scaling factor for the covalent radii when determining
            connectivity. Applies only to `best=True`.

    Returns:
        The RMSD between the two structures in Angstroms.
    """
    _assert_module_installed("rdkit")
    from rdkit.Chem import rdMolAlign

    # Create RDKit molecules
    mol1 = _rdkit_mol_from_structure(struct1)
    mol2 = _rdkit_mol_from_structure(struct2)

    # Compute RMSD
    if best:
        # Determine connectivity
        _rdkit_determine_connectivity(
            mol1,
            charge=struct1.charge,
            use_hueckel=use_hueckel,
            use_vdw=use_vdw,
            cov_factor=cov_factor,
        )

        _rdkit_determine_connectivity(
            mol2,
            charge=struct2.charge,
            use_hueckel=use_hueckel,
            use_vdw=use_vdw,
            cov_factor=cov_factor,
        )
        # Take symmetry into account, align the two molecules, compute RMSD
        try:
            rmsd = rdMolAlign.GetBestRMS(mol2, mol1, numThreads=numthreads)
        except RuntimeError as e:  # Possible failure to make substructure match
            try:  # Swap the order of the molecules and try again.
                rmsd = rdMolAlign.GetBestRMS(mol1, mol2, numThreads=numthreads)
            except RuntimeError:  # If it fails again, raise the original error
                raise e

    else:  # Do not take symmetry into account. Structs aligned by atom index.
        rmsd, _ = rdMolAlign.GetAlignmentTransform(mol2, mol1)

    return rmsd

qcio.align

align(struct: Structure, refstruct: Structure, reorder_atoms: bool = True, use_hueckel: bool = True, use_vdw: bool = False, cov_factor: float = 1.3) -> tuple[Structure, float]

Return a new structure that is optimally aligned to the reference structure.

Parameters:

Name Type Description Default
struct Structure

The structure to align.

required
refstruct Structure

The reference structure.

required
reorder_atoms bool

Reorder the atoms to match the reference structure. If False, the atoms will be aligned without changing their order.

True
use_hueckel bool

Whether to use Hueckel method when determining connectivity. Applies only to best=True.

True
use_vdw bool

Whether to use Van der Waals radii when determining connectivity. Applies only to best=True.

False
cov_factor float

The scaling factor for the covalent radii when determining connectivity. Applies only to best=True.

1.3

Returns:

Type Description
tuple[Structure, float]

Tuple of the aligned structure and the RMSD in Angstroms.

Source code in qcio/utils.py
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
def align(
    struct: Structure,
    refstruct: Structure,
    reorder_atoms: bool = True,
    use_hueckel: bool = True,
    use_vdw: bool = False,
    cov_factor: float = 1.3,
) -> tuple[Structure, float]:
    """Return a new structure that is optimally aligned to the reference structure.

    Args:
        struct: The structure to align.
        refstruct: The reference structure.
        reorder_atoms: Reorder the atoms to match the reference structure. If False,
            the atoms will be aligned without changing their order.
        use_hueckel: Whether to use Hueckel method when determining connectivity.
            Applies only to `best=True`.
        use_vdw: Whether to use Van der Waals radii when determining connectivity.
            Applies only to `best=True`.
        cov_factor: The scaling factor for the covalent radii when determining
            connectivity. Applies only to `best=True`.

    Returns:
        Tuple of the aligned structure and the RMSD in Angstroms.
    """
    _assert_module_installed("rdkit")
    from rdkit.Chem import rdMolAlign

    # Create RDKit molecules
    mol = _rdkit_mol_from_structure(struct)
    refmol = _rdkit_mol_from_structure(refstruct)

    # Determine connectivity
    _rdkit_determine_connectivity(
        mol,
        charge=struct.charge,
        use_hueckel=use_hueckel,
        use_vdw=use_vdw,
        cov_factor=cov_factor,
    )
    _rdkit_determine_connectivity(
        refmol,
        charge=refstruct.charge,
        use_hueckel=use_hueckel,
        use_vdw=use_vdw,
        cov_factor=cov_factor,
    )

    # Compute RMSD and align mol to refmol
    if reorder_atoms:
        rmsd_val, trnsfm_matrix, atm_map = rdMolAlign.GetBestAlignmentTransform(mol, refmol)  # type: ignore # noqa: E501
    else:
        rmsd_val, trnsfm_matrix = rdMolAlign.GetAlignmentTransform(mol, refmol)

    # Convert to homogeneous coordinates in Angstroms
    coords_homogeneous = np.hstack(
        [struct.geometry_angstrom, np.ones((struct.geometry.shape[0], 1))]
    )

    # Apply the transformation matrix
    transformed_coords_homogeneous = coords_homogeneous @ trnsfm_matrix.T

    # Extract the transformed 3D coordinates and convert to Bohr
    transformed_coords = transformed_coords_homogeneous[:, :3] * ANGSTROM_TO_BOHR

    # Reorder the atoms to match the reference structure
    if reorder_atoms:
        if Counter(struct.symbols) != Counter(refstruct.symbols):
            raise ValueError(
                "Structures must have the same number and type of atoms for "
                "`reorder_atoms=True` at this time. Pass "
                "`reorder_atoms=False` to align structures with different atom "
                "counts."
            )
        symbols = refstruct.symbols
        geometry = np.zeros((len(atm_map), 3))

        for probe_idx, ref_idx in atm_map:
            geometry[ref_idx] = transformed_coords[probe_idx]  # works

    # Otherwise, keep the original atom order
    else:
        symbols = struct.symbols
        geometry = transformed_coords

    return (
        Structure(
            symbols=symbols,
            geometry=geometry,
            charge=struct.charge,
            multiplicity=struct.multiplicity,
            connectivity=struct.connectivity,
            identifiers=struct.identifiers,
        ),
        rmsd_val,
    )