Skip to content

Pipeline Functions

pipes

run_ts_per_lig

run_ts_per_lig(ligand_smiles_df: DataFrame, ts_guess_xyz: str, *, n_confs: int | None = None, n_cores: int = 4, mem_gb: int = 20, debug: bool = False, top_n: int = 10, out_dir: str | None = None, output_parquet: str | None = None, save_output_dir: bool = True, DFT: bool = False)

Run the TS workflow for each ligand in a ligand table.

The function expands each ligand into TS structures using the transition-state guess geometry, generates conformers, runs the XTB pre-screening steps, and optionally performs ORCA DFT refinement.

Parameters:

Name Type Description Default
ligand_smiles_df DataFrame

Input table containing at least a smiles column with ligand SMILES strings.

required
ts_guess_xyz str

Path to the XYZ file containing the transition-state guess geometry. The TS type is inferred from this file.

required
n_confs int or None

Number of conformers to generate per TS structure. If None, the embedder default is used.

None
n_cores int

Number of CPU cores to use for downstream calculations.

4
mem_gb int

Memory limit in gigabytes passed to :class:frust.stepper.Stepper.

20
debug bool

If True, disables conformer optimization and keeps the workflow lighter for debugging.

False
top_n int

Number of lowest-energy structures retained after the XTB screening stage.

10
out_dir str or None

Base output directory for calculation artifacts.

None
output_parquet str or None

If provided, write the resulting dataframe to this Parquet file.

None
save_output_dir bool

Whether to keep the output directory structure created by the stepper.

True
DFT bool

If True, continue from the XTB screen into the DFT refinement stages. If False, return after the pre-screening workflow.

False

Returns:

Type Description
DataFrame

DataFrame containing the screened or DFT-refined TS candidates.

Source code in frust/pipes.py
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
def run_ts_per_lig(
    ligand_smiles_df: pd.DataFrame,
    ts_guess_xyz: str,
    *,
    n_confs: int | None = None,
    n_cores: int = 4,
    mem_gb: int = 20,    
    debug: bool = False,
    top_n: int = 10,
    out_dir: str | None = None,
    output_parquet: str | None = None,
    save_output_dir: bool = True,
    DFT: bool = False,
):
    """Run the TS workflow for each ligand in a ligand table.

    The function expands each ligand into TS structures using the
    transition-state guess geometry, generates conformers, runs the XTB
    pre-screening steps, and optionally performs ORCA DFT refinement.

    Parameters
    ----------
    ligand_smiles_df : pandas.DataFrame
        Input table containing at least a ``smiles`` column with ligand
        SMILES strings.
    ts_guess_xyz : str
        Path to the XYZ file containing the transition-state guess geometry.
        The TS type is inferred from this file.
    n_confs : int or None, optional
        Number of conformers to generate per TS structure. If ``None``, the
        embedder default is used.
    n_cores : int, optional
        Number of CPU cores to use for downstream calculations.
    mem_gb : int, optional
        Memory limit in gigabytes passed to :class:`frust.stepper.Stepper`.
    debug : bool, optional
        If ``True``, disables conformer optimization and keeps the workflow
        lighter for debugging.
    top_n : int, optional
        Number of lowest-energy structures retained after the XTB screening
        stage.
    out_dir : str or None, optional
        Base output directory for calculation artifacts.
    output_parquet : str or None, optional
        If provided, write the resulting dataframe to this Parquet file.
    save_output_dir : bool, optional
        Whether to keep the output directory structure created by the stepper.
    DFT : bool, optional
        If ``True``, continue from the XTB screen into the DFT refinement
        stages. If ``False``, return after the pre-screening workflow.

    Returns
    -------
    pandas.DataFrame
        DataFrame containing the screened or DFT-refined TS candidates.

    """
    ts_structs = create_ts_per_rpos(
        ligand_smiles_df,
        ts_guess_xyz,
        return_format="dict",
    )

    ts_type = read_ts_type_from_xyz(ts_guess_xyz)
    # print(ts_type)

    # if ts_type == 'TS1':
    #     from frust.transformers import transformer_ts1
    #     transformer_ts = transformer_ts1
    # elif ts_type == 'TS2':
    #     from frust.transformers import transformer_ts2
    #     transformer_ts = transformer_ts2
    # elif ts_type == 'TS3':
    #     from frust.transformers import transformer_ts3
    #     transformer_ts = transformer_ts3
    # elif ts_type == 'TS4':
    #     from frust.transformers import transformer_ts4
    #     transformer_ts = transformer_ts4
    # elif ts_type == 'INT3':
    #     from frust.transformers import transformer_int3
    #     transformer_ts = transformer_int3
    # else:
    #     raise ValueError(f"Unrecognized TS type: {ts_type}")

    # ts_structs = {}
    # for smi in ligand_smiles_list:
    #     ts_mols = transformer_ts(smi, ts_guess_xyz)
    #     ts_structs.update(ts_mols)

    embedded = embed_ts(ts_structs, ts_type=ts_type, n_confs=n_confs, optimize=not debug)

    step = Stepper(
    step_type=ts_type,
    n_cores=n_cores,
    memory_gb=mem_gb,
    debug=debug,
    output_base=out_dir,
    save_output_dir=save_output_dir,
    )
    df = step.build_initial_df(embedded)
    df = step.xtb(df, name="xtb_preopt", options={"gfnff": None, "opt": None}, constraint=True)
    df = step.xtb(df, name="xtb_sp", options={"gfn": 2})
    df = step.xtb(df, name="xtb_opt", options={"gfn": 2, "opt": None}, constraint=True, lowest=top_n)

    functional      = "wB97X-D3" # wB97X-D3, wB97M-V
    basisset        = "6-31G**" # 6-31G**, def2-TZVPD
    basisset_solv   = "6-31+G**" # 6-31+G**, def2-TZVPD
    freq            = "Freq" # NumFreq, Freq

    df = step.orca(df, name="DFT-pre-SP", options={
        functional  : None,
        basisset    : None,
        "TightSCF"  : None,
        "SP"        : None,
        "NoSym"     : None,
    }, lowest=1)

    if not DFT:
        if output_parquet:
            df.to_parquet(output_parquet)
        return df

    # ↓↓↓↓↓↓↓↓ This code only executes if DFT is True ↓↓↓↓↓↓↓↓

    df = step.orca(df, name="DFT-pre-Opt", options={
        functional : None,
        basisset   : None,
        "TightSCF" : None,
        "SlowConv" : None,
        "Opt"      : None,
        "NoSym"    : None,
    }, constraint=True, lowest=1)

    if ts_type.upper() == "INT3":
        opt = "Opt"
    else:
        opt = "OptTS"

    df = step.orca(df, name="DFT", options={
        functional : None,
        basisset   : None,
        "TightSCF" : None,
        "SlowConv" : None,
        opt        : None,
        freq       : None,
        "NoSym"    : None,
    }, lowest=1)

    df = step.orca(df, name="DFT-SP", options={
        functional      : None,
        basisset_solv   : None,
        "TightSCF"      : None,
        "SP"            : None,
        "NoSym"         : None,
    }, xtra_inp_str="""%CPCM\nSMD TRUE\nSMDSOLVENT "chloroform"\nend""")

    if output_parquet:
        df.to_parquet(output_parquet)
    return df