Pipeline Functions

pipes

run_ts_per_lig

run_ts_per_lig(ligand_smiles_df: DataFrame, ts_guess_xyz: str, *, n_confs: int | None = None, n_cores: int = 4, mem_gb: int = 20, debug: bool = False, top_n: int = 10, out_dir: str | None = None, output_parquet: str | None = None, save_output_dir: bool = True, DFT: bool = False)

Run the TS workflow for each ligand in a ligand table.

The function expands each ligand into TS structures using the transition-state guess geometry, generates conformers, runs the XTB pre-screening steps, and optionally performs ORCA DFT refinement.

Parameters:

Name	Type	Description	Default
`ligand_smiles_df`	`DataFrame`	Input table containing at least a `smiles` column with ligand SMILES strings.	required
`ts_guess_xyz`	`str`	Path to the XYZ file containing the transition-state guess geometry. The TS type is inferred from this file.	required
`n_confs`	`int or None`	Number of conformers to generate per TS structure. If `None`, the embedder default is used.	`None`
`n_cores`	`int`	Number of CPU cores to use for downstream calculations.	`4`
`mem_gb`	`int`	Memory limit in gigabytes passed to :class:`frust.stepper.Stepper`.	`20`
`debug`	`bool`	If `True`, disables conformer optimization and keeps the workflow lighter for debugging.	`False`
`top_n`	`int`	Number of lowest-energy structures retained after the XTB screening stage.	`10`
`out_dir`	`str or None`	Base output directory for calculation artifacts.	`None`
`output_parquet`	`str or None`	If provided, write the resulting dataframe to this Parquet file.	`None`
`save_output_dir`	`bool`	Whether to keep the output directory structure created by the stepper.	`True`
`DFT`	`bool`	If `True`, continue from the XTB screen into the DFT refinement stages. If `False`, return after the pre-screening workflow.	`False`

Returns:

Type	Description
`DataFrame`	DataFrame containing the screened or DFT-refined TS candidates.

Source code in frust/pipes.py

def run_ts_per_lig(
    ligand_smiles_df: pd.DataFrame,
    ts_guess_xyz: str,
    *,
    n_confs: int | None = None,
    n_cores: int = 4,
    mem_gb: int = 20,    
    debug: bool = False,
    top_n: int = 10,
    out_dir: str | None = None,
    output_parquet: str | None = None,
    save_output_dir: bool = True,
    DFT: bool = False,
):
    """Run the TS workflow for each ligand in a ligand table.

    The function expands each ligand into TS structures using the
    transition-state guess geometry, generates conformers, runs the XTB
    pre-screening steps, and optionally performs ORCA DFT refinement.

    Parameters
    ----------
    ligand_smiles_df : pandas.DataFrame
        Input table containing at least a ``smiles`` column with ligand
        SMILES strings.
    ts_guess_xyz : str
        Path to the XYZ file containing the transition-state guess geometry.
        The TS type is inferred from this file.
    n_confs : int or None, optional
        Number of conformers to generate per TS structure. If ``None``, the
        embedder default is used.
    n_cores : int, optional
        Number of CPU cores to use for downstream calculations.
    mem_gb : int, optional
        Memory limit in gigabytes passed to :class:`frust.stepper.Stepper`.
    debug : bool, optional
        If ``True``, disables conformer optimization and keeps the workflow
        lighter for debugging.
    top_n : int, optional
        Number of lowest-energy structures retained after the XTB screening
        stage.
    out_dir : str or None, optional
        Base output directory for calculation artifacts.
    output_parquet : str or None, optional
        If provided, write the resulting dataframe to this Parquet file.
    save_output_dir : bool, optional
        Whether to keep the output directory structure created by the stepper.
    DFT : bool, optional
        If ``True``, continue from the XTB screen into the DFT refinement
        stages. If ``False``, return after the pre-screening workflow.

    Returns
    -------
    pandas.DataFrame
        DataFrame containing the screened or DFT-refined TS candidates.

    """
    ts_structs = create_ts_per_rpos(
        ligand_smiles_df,
        ts_guess_xyz,
        return_format="dict",
    )

    ts_type = read_ts_type_from_xyz(ts_guess_xyz)
    # print(ts_type)

    # if ts_type == 'TS1':
    #     from frust.transformers import transformer_ts1
    #     transformer_ts = transformer_ts1
    # elif ts_type == 'TS2':
    #     from frust.transformers import transformer_ts2
    #     transformer_ts = transformer_ts2
    # elif ts_type == 'TS3':
    #     from frust.transformers import transformer_ts3
    #     transformer_ts = transformer_ts3
    # elif ts_type == 'TS4':
    #     from frust.transformers import transformer_ts4
    #     transformer_ts = transformer_ts4
    # elif ts_type == 'INT3':
    #     from frust.transformers import transformer_int3
    #     transformer_ts = transformer_int3
    # else:
    #     raise ValueError(f"Unrecognized TS type: {ts_type}")

    # ts_structs = {}
    # for smi in ligand_smiles_list:
    #     ts_mols = transformer_ts(smi, ts_guess_xyz)
    #     ts_structs.update(ts_mols)

    embedded = embed_ts(ts_structs, ts_type=ts_type, n_confs=n_confs, optimize=not debug)

    step = Stepper(
    step_type=ts_type,
    n_cores=n_cores,
    memory_gb=mem_gb,
    debug=debug,
    output_base=out_dir,
    save_output_dir=save_output_dir,
    )
    df = step.build_initial_df(embedded)
    df = step.xtb(df, name="xtb_preopt", options={"gfnff": None, "opt": None}, constraint=True)
    df = step.xtb(df, name="xtb_sp", options={"gfn": 2})
    df = step.xtb(df, name="xtb_opt", options={"gfn": 2, "opt": None}, constraint=True, lowest=top_n)

    functional      = "wB97X-D3" # wB97X-D3, wB97M-V
    basisset        = "6-31G**" # 6-31G**, def2-TZVPD
    basisset_solv   = "6-31+G**" # 6-31+G**, def2-TZVPD
    freq            = "Freq" # NumFreq, Freq

    df = step.orca(df, name="DFT-pre-SP", options={
        functional  : None,
        basisset    : None,
        "TightSCF"  : None,
        "SP"        : None,
        "NoSym"     : None,
    }, lowest=1)

    if not DFT:
        if output_parquet:
            df.to_parquet(output_parquet)
        return df

    # ↓↓↓↓↓↓↓↓ This code only executes if DFT is True ↓↓↓↓↓↓↓↓

    df = step.orca(df, name="DFT-pre-Opt", options={
        functional : None,
        basisset   : None,
        "TightSCF" : None,
        "SlowConv" : None,
        "Opt"      : None,
        "NoSym"    : None,
    }, constraint=True, lowest=1)

    if ts_type.upper() == "INT3":
        opt = "Opt"
    else:
        opt = "OptTS"

    df = step.orca(df, name="DFT", options={
        functional : None,
        basisset   : None,
        "TightSCF" : None,
        "SlowConv" : None,
        opt        : None,
        freq       : None,
        "NoSym"    : None,
    }, lowest=1)

    df = step.orca(df, name="DFT-SP", options={
        functional      : None,
        basisset_solv   : None,
        "TightSCF"      : None,
        "SP"            : None,
        "NoSym"         : None,
    }, xtra_inp_str="""%CPCM\nSMD TRUE\nSMDSOLVENT "chloroform"\nend""")

    if output_parquet:
        df.to_parquet(output_parquet)
    return df