Skip to content

Stepper

Stepper

Chain xTB and ORCA calculations over dataframe-based conformer tables.

Stepper is the low-level workflow layer used after structures have already been generated and embedded. It operates on pandas DataFrames, adds calculation outputs as new columns, and optionally manages run directories and saved engine files.

The class does not generate molecules itself. For higher-level workflows that create TS or ground-state structures from ligand inputs, prefer the pipeline functions in :mod:frust.pipes.

Source code in frust/stepper.py
  47
  48
  49
  50
  51
  52
  53
  54
  55
  56
  57
  58
  59
  60
  61
  62
  63
  64
  65
  66
  67
  68
  69
  70
  71
  72
  73
  74
  75
  76
  77
  78
  79
  80
  81
  82
  83
  84
  85
  86
  87
  88
  89
  90
  91
  92
  93
  94
  95
  96
  97
  98
  99
 100
 101
 102
 103
 104
 105
 106
 107
 108
 109
 110
 111
 112
 113
 114
 115
 116
 117
 118
 119
 120
 121
 122
 123
 124
 125
 126
 127
 128
 129
 130
 131
 132
 133
 134
 135
 136
 137
 138
 139
 140
 141
 142
 143
 144
 145
 146
 147
 148
 149
 150
 151
 152
 153
 154
 155
 156
 157
 158
 159
 160
 161
 162
 163
 164
 165
 166
 167
 168
 169
 170
 171
 172
 173
 174
 175
 176
 177
 178
 179
 180
 181
 182
 183
 184
 185
 186
 187
 188
 189
 190
 191
 192
 193
 194
 195
 196
 197
 198
 199
 200
 201
 202
 203
 204
 205
 206
 207
 208
 209
 210
 211
 212
 213
 214
 215
 216
 217
 218
 219
 220
 221
 222
 223
 224
 225
 226
 227
 228
 229
 230
 231
 232
 233
 234
 235
 236
 237
 238
 239
 240
 241
 242
 243
 244
 245
 246
 247
 248
 249
 250
 251
 252
 253
 254
 255
 256
 257
 258
 259
 260
 261
 262
 263
 264
 265
 266
 267
 268
 269
 270
 271
 272
 273
 274
 275
 276
 277
 278
 279
 280
 281
 282
 283
 284
 285
 286
 287
 288
 289
 290
 291
 292
 293
 294
 295
 296
 297
 298
 299
 300
 301
 302
 303
 304
 305
 306
 307
 308
 309
 310
 311
 312
 313
 314
 315
 316
 317
 318
 319
 320
 321
 322
 323
 324
 325
 326
 327
 328
 329
 330
 331
 332
 333
 334
 335
 336
 337
 338
 339
 340
 341
 342
 343
 344
 345
 346
 347
 348
 349
 350
 351
 352
 353
 354
 355
 356
 357
 358
 359
 360
 361
 362
 363
 364
 365
 366
 367
 368
 369
 370
 371
 372
 373
 374
 375
 376
 377
 378
 379
 380
 381
 382
 383
 384
 385
 386
 387
 388
 389
 390
 391
 392
 393
 394
 395
 396
 397
 398
 399
 400
 401
 402
 403
 404
 405
 406
 407
 408
 409
 410
 411
 412
 413
 414
 415
 416
 417
 418
 419
 420
 421
 422
 423
 424
 425
 426
 427
 428
 429
 430
 431
 432
 433
 434
 435
 436
 437
 438
 439
 440
 441
 442
 443
 444
 445
 446
 447
 448
 449
 450
 451
 452
 453
 454
 455
 456
 457
 458
 459
 460
 461
 462
 463
 464
 465
 466
 467
 468
 469
 470
 471
 472
 473
 474
 475
 476
 477
 478
 479
 480
 481
 482
 483
 484
 485
 486
 487
 488
 489
 490
 491
 492
 493
 494
 495
 496
 497
 498
 499
 500
 501
 502
 503
 504
 505
 506
 507
 508
 509
 510
 511
 512
 513
 514
 515
 516
 517
 518
 519
 520
 521
 522
 523
 524
 525
 526
 527
 528
 529
 530
 531
 532
 533
 534
 535
 536
 537
 538
 539
 540
 541
 542
 543
 544
 545
 546
 547
 548
 549
 550
 551
 552
 553
 554
 555
 556
 557
 558
 559
 560
 561
 562
 563
 564
 565
 566
 567
 568
 569
 570
 571
 572
 573
 574
 575
 576
 577
 578
 579
 580
 581
 582
 583
 584
 585
 586
 587
 588
 589
 590
 591
 592
 593
 594
 595
 596
 597
 598
 599
 600
 601
 602
 603
 604
 605
 606
 607
 608
 609
 610
 611
 612
 613
 614
 615
 616
 617
 618
 619
 620
 621
 622
 623
 624
 625
 626
 627
 628
 629
 630
 631
 632
 633
 634
 635
 636
 637
 638
 639
 640
 641
 642
 643
 644
 645
 646
 647
 648
 649
 650
 651
 652
 653
 654
 655
 656
 657
 658
 659
 660
 661
 662
 663
 664
 665
 666
 667
 668
 669
 670
 671
 672
 673
 674
 675
 676
 677
 678
 679
 680
 681
 682
 683
 684
 685
 686
 687
 688
 689
 690
 691
 692
 693
 694
 695
 696
 697
 698
 699
 700
 701
 702
 703
 704
 705
 706
 707
 708
 709
 710
 711
 712
 713
 714
 715
 716
 717
 718
 719
 720
 721
 722
 723
 724
 725
 726
 727
 728
 729
 730
 731
 732
 733
 734
 735
 736
 737
 738
 739
 740
 741
 742
 743
 744
 745
 746
 747
 748
 749
 750
 751
 752
 753
 754
 755
 756
 757
 758
 759
 760
 761
 762
 763
 764
 765
 766
 767
 768
 769
 770
 771
 772
 773
 774
 775
 776
 777
 778
 779
 780
 781
 782
 783
 784
 785
 786
 787
 788
 789
 790
 791
 792
 793
 794
 795
 796
 797
 798
 799
 800
 801
 802
 803
 804
 805
 806
 807
 808
 809
 810
 811
 812
 813
 814
 815
 816
 817
 818
 819
 820
 821
 822
 823
 824
 825
 826
 827
 828
 829
 830
 831
 832
 833
 834
 835
 836
 837
 838
 839
 840
 841
 842
 843
 844
 845
 846
 847
 848
 849
 850
 851
 852
 853
 854
 855
 856
 857
 858
 859
 860
 861
 862
 863
 864
 865
 866
 867
 868
 869
 870
 871
 872
 873
 874
 875
 876
 877
 878
 879
 880
 881
 882
 883
 884
 885
 886
 887
 888
 889
 890
 891
 892
 893
 894
 895
 896
 897
 898
 899
 900
 901
 902
 903
 904
 905
 906
 907
 908
 909
 910
 911
 912
 913
 914
 915
 916
 917
 918
 919
 920
 921
 922
 923
 924
 925
 926
 927
 928
 929
 930
 931
 932
 933
 934
 935
 936
 937
 938
 939
 940
 941
 942
 943
 944
 945
 946
 947
 948
 949
 950
 951
 952
 953
 954
 955
 956
 957
 958
 959
 960
 961
 962
 963
 964
 965
 966
 967
 968
 969
 970
 971
 972
 973
 974
 975
 976
 977
 978
 979
 980
 981
 982
 983
 984
 985
 986
 987
 988
 989
 990
 991
 992
 993
 994
 995
 996
 997
 998
 999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
class Stepper:
    """Chain xTB and ORCA calculations over dataframe-based conformer tables.

    `Stepper` is the low-level workflow layer used after structures have
    already been generated and embedded. It operates on pandas DataFrames,
    adds calculation outputs as new columns, and optionally manages run
    directories and saved engine files.

    The class does not generate molecules itself. For higher-level workflows
    that create TS or ground-state structures from ligand inputs, prefer the
    pipeline functions in :mod:`frust.pipes`.
    """

    def __init__(
        self,
        step_type: str | None = None,
        output_base: Path | str | None = None,
        job_id: int | None = None,
        debug: bool = False,
        dump_each_step: bool = False,
        n_cores: int = 8,
        memory_gb: float = 20.0,
        save_calc_dirs: bool = False,
        save_output_dir: bool = True,
        work_dir: str | None = None,        
        **kwargs
    ):
        """Configure a Stepper instance.

        Parameters
        ----------
        step_type : str or None, optional
            Workflow label used for built-in constrained TS/INT calculations.
            Supported constrained values are ``TS1``, ``TS2``, ``TS3``,
            ``TS4``, and ``INT3``. If ``None``, unconstrained workflows still
            work, but ``constraint=True`` in :meth:`xtb` or :meth:`orca` will
            raise an error.
        output_base : Path or str or None, optional
            Base directory under which saved outputs are created. The final run
            directory is created lazily on first save request, not at
            construction time.
        job_id : int or None, optional
            Optional job identifier used in output-directory naming and logger
            naming. If omitted, :func:`frust.utils.slurm.detect_job_id` is used
            to infer one when possible.
        debug : bool, optional
            If ``True``, use mock xTB and ORCA backends instead of the real
            engines and enable debug logging.
        dump_each_step : bool, optional
            Reserved for dataframe dumping workflows. Stored on the instance
            but not currently consumed by :class:`Stepper` itself.
        n_cores : int, optional
            Default core count passed to engine calls unless overridden on an
            individual :meth:`xtb` or :meth:`orca` call.
        memory_gb : float, optional
            Default memory setting in gigabytes used for ORCA calls.
        save_calc_dirs : bool, optional
            If ``True``, preserve full calculation directories for each row
            when output saving is enabled.
        save_output_dir : bool, optional
            If ``True``, enable output-directory creation and file saving.
            When ``False``, calculations still run but no output tree is
            created unless a caller explicitly requests per-step saving.
        work_dir : str or None, optional
            Scratch or working directory passed to the engine wrappers. If
            omitted, ``$SCRATCH`` is used when available, otherwise the current
            directory.

        Raises
        ------
        TypeError
            If unexpected keyword arguments are supplied.
        """
        if kwargs:
            unknown = ", ".join(sorted(kwargs))
            raise TypeError(f"Unexpected Stepper keyword arguments: {unknown}")

        self.step_type      = step_type.upper() if step_type is not None else None
        self.debug          = debug
        job_id              = detect_job_id(job_id, True)
        self.job_id         = job_id
        self.logger         = _make_logger(_logger_name(self.step_type, self.job_id), debug)
        self.dump_each_step = dump_each_step
        self.n_cores        = n_cores
        self.memory_gb      = memory_gb
        self.save_calc_dirs = save_calc_dirs
        self.save_output    = save_output_dir
        self.output_base    = Path(output_base) if output_base is not None else None

        self.base_dir = self.output_base if self.output_base is not None else Path(".")

        if self.debug:
            from tooltoad.xtb import mock_xtb_calculate
            from tooltoad.orca import mock_orca_calculate
            from tooltoad.gxtb import mock_gxtb_calculate
            self.xtb_fn  = mock_xtb_calculate
            self.orca_fn = mock_orca_calculate
            self.gxtb_fn = mock_gxtb_calculate
        else:
            from tooltoad.xtb import xtb_calculate
            from tooltoad.orca import orca_calculate
            from tooltoad.gxtb import gxtb_calculate
            self.xtb_fn  = xtb_calculate
            self.orca_fn = orca_calculate
            self.gxtb_fn = gxtb_calculate

        if work_dir:
            self.work_dir = work_dir
            os.makedirs(self.work_dir, exist_ok=True)
            self.logger.info(f"Working dir: {self.work_dir}")
        else:
            try:
                self.work_dir = os.environ["SCRATCH"]
            except Exception:
                self.work_dir = "."
            os.makedirs(self.work_dir, exist_ok=True)
            self.logger.info(f"Working dir: {self.work_dir}")

    def _ensure_base_dir(self) -> Path:
        """Create the output directory lazily on first use."""
        if self.save_output and not getattr(self, "_base_dir_prepared", False):
            self.base_dir = prepare_base_dir(self.output_base, self.job_id)
            self._base_dir_prepared = True
        return self.base_dir

    @staticmethod
    def _coord_columns(df: pd.DataFrame) -> list[str]:
        preferred = []
        if "coords_embedded" in df.columns:
            preferred.append("coords_embedded")
        preferred.extend(
            [
                c
                for c in df.columns
                if "coords" in c and c not in preferred and not c.endswith("-opt_coords")
            ]
        )
        preferred.extend([c for c in df.columns if c.endswith("-opt_coords") or c.endswith("-oc")])
        return preferred

    @classmethod
    def _last_coord_col(cls, df: pd.DataFrame) -> str:
        """Return the most specific available coordinate column."""
        cols = cls._coord_columns(df)
        if not cols:
            raise ValueError(
                "DataFrame must contain coordinates in 'coords_embedded' or a '*-oc'/'*-opt_coords' column"
            )
        return cols[-1]

    def _step_type_upper(self) -> str:
        """Normalize step_type for callers that only need TS dispatch."""
        return (self.step_type or "").upper()

    def _validate_constraint_request(self, df: pd.DataFrame) -> str:
        """Validate that constraint mode is fully specified."""
        step_type = self._step_type_upper()
        if not step_type:
            raise ValueError(
                "`constraint=True` requires `Stepper(step_type=...)` so the correct TS/INT constraints can be selected"
            )
        if step_type not in {"TS1", "TS2", "TS3", "TS4", "INT3"}:
            raise ValueError(
                f"`constraint=True` is only supported for TS1/TS2/TS3/TS4/INT3, got {self.step_type!r}"
            )
        if "constraint_atoms" not in df.columns:
            raise ValueError(
                "`constraint=True` requires a 'constraint_atoms' column in the input DataFrame"
            )
        return step_type

    @staticmethod
    def _validate_required_columns(
        df: pd.DataFrame,
        *,
        needs_grouping: bool = False,
        needs_hessian: bool = False,
    ) -> None:
        missing = [col for col in ("atoms",) if col not in df.columns]
        if missing:
            raise ValueError(
                f"Input DataFrame is missing required columns: {', '.join(missing)}"
            )

        Stepper._last_coord_col(df)

        if needs_grouping and "substrate_name" not in df.columns:
            raise ValueError(
                "`lowest=` requires a 'substrate_name' column so conformers can be grouped before filtering"
            )

        if needs_hessian and not any(col.endswith(".hess") for col in df.columns):
            raise ValueError(
                "`use_last_hess=True` requires a prior '*.hess' column in the input DataFrame"
            )

    @staticmethod
    def _row_name(row: Series, index: object) -> str:
        """Pick a stable human-readable row label from available metadata."""
        for key in ("custom_name", "substrate_name", "moltype"):
            value = row.get(key)
            if value is not None and not pd.isna(value):
                return str(value)
        return f"row_{index}"

    @staticmethod
    def _row_conf_id(row: Series, index: object) -> str:
        """Pick a stable conformer/run identifier when cid is unavailable."""
        value = row.get("cid")
        if value is not None and not pd.isna(value):
            return str(value)
        return str(index)

    @staticmethod
    def _constraint_atoms(row: Series, min_size: int = 6) -> list[int]:
        """Validate and return constraint atoms for TS/INT workflows."""
        atoms = row.get("constraint_atoms")
        if atoms is None:
            raise ValueError("Missing 'constraint_atoms' for a constrained row")
        if not isinstance(atoms, (list, tuple, np.ndarray)):
            if pd.isna(atoms):
                raise ValueError("Missing 'constraint_atoms' for a constrained row")
            raise ValueError("'constraint_atoms' must be a sequence of atom indices")
        atoms = list(atoms)
        if len(atoms) < min_size:
            raise ValueError(
                f"'constraint_atoms' must contain at least {min_size} entries for constrained workflows"
            )
        return atoms

    def build_initial_df(self, embedded_dict: dict) -> pd.DataFrame:
        """
        Turn a dictionary of embedded‐conformer data into a tidy DataFrame.

        The dictionary keys can be either:
          1) TS names of the form 'TS(..._rpos(N))' (with a 4‐ or 5‐tuple value)
          2) plain molecule names (without '_rpos(...)') and a 2‐tuple value

        For TS entries:
            key = 'TS(molname_rpos(N))'
            value = (Mol_with_H, cids, keep_idxs, smiles[, energies])

        For plain‐mol entries:
            key = 'some_name'
            value = (Mol, cids)

        In legacy-input cases, structure metadata is parsed from the key as a
        fallback. New generated records should carry metadata directly.

        Returns
        -------
        pd.DataFrame
            One row per conformer with columns
              - structure_id        (stable structure identifier)
              - custom_name         (the original display/file key)
              - substrate_name      (parsed substrate identity)
              - structure_type      (MOL, TS1, TS2, TS3, TS4, INT3)
              - molecule_role       (ts, ligand, int2, mol2, ...)
              - rpos                (int or None)
              - constraint_atoms    (list[int] or NA)
              - cid                 (conformer ID)
              - smiles              (str or None)
              - atoms               (list of atomic symbols)
              - coords_embedded     (list of (x,y,z) tuples)
              - energy_uff          (float or None)
        """
        rows: list[dict] = []

        for name, val in embedded_dict.items():
            if len(val) == 2:
                mol, cids = val
                keep_idxs = None
                smi = None
                energies: list[tuple[float,int]] = []
                metadata = None
            elif len(val) == 3 and isinstance(val[2], dict):
                mol, cids, metadata = val
                keep_idxs = None
                smi = metadata.get("smiles") or metadata.get("input_smiles")
                energies = []
            elif len(val) == 4:
                mol, cids, keep_idxs, smi = val
                energies = []
                metadata = None
            elif len(val) == 5:
                mol, cids, keep_idxs, smi, energies = val
                metadata = None
            else:
                raise ValueError(f"Bad tuple length for {name}")

            meta = metadata_from_mapping(metadata, fallback_name=name, smiles=smi)

            e_map: dict[int,float] = {cid_val: e_val for (e_val, cid_val) in energies} if energies else {}

            atom_syms = [atom.GetSymbol() for atom in mol.GetAtoms()]

            for cid in cids:
                conf = mol.GetConformer(cid)
                coords = [tuple(conf.GetAtomPosition(i)) for i in range(mol.GetNumAtoms())]

                rows.append({
                    "structure_id":     meta.structure_id,
                    "custom_name":      meta.custom_name,
                    "substrate_name":   meta.substrate_name,
                    "structure_type":   meta.structure_type,
                    "molecule_role":    meta.molecule_role,
                    "rpos":             meta.rpos,
                    "constraint_atoms": keep_idxs,
                    "cid":              cid,
                    "smiles":           meta.smiles or smi,
                    "input_smiles":     meta.input_smiles or smi,
                    "atoms":            atom_syms,
                    "coords_embedded":  coords,
                    "energy_uff":       e_map.get(cid, None)
                })

        return pd.DataFrame(rows)

    def _run_engine(
            self,
            df: pd.DataFrame,
            engine_fn: Callable[..., dict],
            prefix: str,
            build_inputs: Callable[[Series], dict],
            save_step: bool,
            lowest: int | None,
            save_files: list[str] | None = None,
            use_last_hess: bool = False,
        ) -> pd.DataFrame:
        """
        Generic runner for xTB or ORCA or any other engine.

        Parameters
        ----------
        df : pandas.DataFrame
            Input dataframe containing at least ``atoms`` and one coordinate
            column. Depending on the options used, additional columns may be
            required, such as ``substrate_name`` for ``lowest=`` filtering or a
            ``*.hess`` column for Hessian reuse.
        engine_fn : callable
            Backend calculation function such as the wrapped xTB or ORCA
            driver.
        prefix : str
            Prefix used to name output columns for this calculation stage.
        build_inputs : callable
            Row-wise callback that returns backend-specific keyword arguments.
            These inputs are merged with the generic engine inputs assembled by
            :class:`Stepper`.
        save_step : bool
            If ``True``, preserve the full saved directory for each processed
            row.
        lowest : int or None
            If set, keep only the lowest-energy rows per structure grouping before
            passing data to the engine.
        save_files : list of str or None, optional
            Specific files to save from the engine output when partial saving is
            requested.
        use_last_hess : bool, optional
            If ``True``, reuse the most recent ``*.hess`` column from the
            dataframe by passing it back into the engine as an input file.

        Returns
        -------
        pandas.DataFrame
            A copy of the input dataframe with new stage-prefixed output
            columns added.

        Notes
        -----
        - Rows with missing coordinates are skipped and receive
          ``*-NT=False`` and ``*-EE=NaN``.
        - Engine exceptions are caught, logged, and stored as a stage-specific
          ``*-error`` column instead of aborting the whole dataframe run.
        - Output directories are created lazily and only when saving is
          requested.
        """
        import pandas as _pd

        df_out    = normalize_dataframe(df)
        df_out.attrs.update(getattr(df, "attrs", {}))
        self._validate_required_columns(
            df_out,
            needs_grouping=lowest is not None,
            needs_hessian=use_last_hess,
        )
        coord_col = self._last_coord_col(df_out)
        all_row_data: list[dict[str, object]] = []

        if lowest is not None and lowest < 1:
            self.logger.warning(f"ignoring lowest={lowest!r}, must be ≥1")
            lowest = None

        if lowest:
            e_cols = energy_columns(df_out)
            if not e_cols:
                raise ValueError("cannot apply `lowest=` filter: no energy column found")
            last_energy = e_cols[-1]

            group_keys = infer_group_columns(df_out)
            if not group_keys:
                raise ValueError("cannot apply `lowest=` filter: no structure identity columns found")

            sort_keys = group_keys + [last_energy]
            df_out = (
                df_out
                .sort_values(sort_keys, na_position="last")
                .groupby(group_keys, dropna=False)
                .head(lowest)
            )

        for i, row in df_out.iterrows():
            coords = row[coord_col]
            # --- skip any row with no coords ---
            # if coords is None or (_pd.isna(coords) if not isinstance(coords, (list, tuple)) else False):
            if coords is None or (_pd.isna(coords) if not isinstance(coords, (list, tuple, np.ndarray)) else False):            
                all_row_data.append({
                    output_column(prefix, "normal_termination"): False,
                    output_column(prefix, "electronic_energy"):  np.nan
                })
                continue

            row_save_files = save_files
            save_full_calc = (self.save_calc_dirs and self.save_output) or save_step
            save_partial   = row_save_files is not None and not save_step

            if save_full_calc or save_partial:
                self._ensure_base_dir()
                row_name = self._row_name(row, i)
                row_conf_id = self._row_conf_id(row, i)
                dir_name = f"{row_name}_{row_conf_id}"
                engine_base = self.base_dir / prefix
                engine_base.mkdir(parents=True, exist_ok=True)
                created_dir = str(make_step_dir(engine_base, dir_name))
            else:
                created_dir = None

            if save_full_calc:
                calc_dir   = created_dir
                save_dir   = created_dir
                row_save_files = None
            elif save_partial:
                calc_dir   = None
                save_dir   = created_dir
            else:
                calc_dir   = None
                save_dir   = None

            if use_last_hess:
                last_hess_col = [col for col in df.columns if col.endswith(".hess")][-1]
                last_hess = {"private_input.hess": row[last_hess_col]}

            base_args = {
                "atoms":  row["atoms"],
                "coords": [list(c) for c in coords],
                "n_cores": self.n_cores,
                "scr": self.work_dir,
                "data2file": last_hess if use_last_hess else None
            }

            if calc_dir is not None:
                base_args["calc_dir"] = calc_dir
            if save_dir is not None:
                base_args["save_dir"] = save_dir
            if row_save_files is not None:
                base_args["save_files"] = row_save_files

            inputs = {**base_args, **build_inputs(row)}

            # Filter inputs to only those accepted by engine_fn (avoids unexpected kwargs)
            try:
                allowed = set(signature(engine_fn).parameters.keys())
                filtered = {k: v for k, v in inputs.items() if k in allowed}
                if self.debug:
                    dropped = sorted(set(inputs) - set(filtered))
                    if dropped:
                        self.logger.debug(f"[{prefix}] dropped unsupported kwargs: {dropped}")
                inputs = filtered
            except Exception:
                # If introspection fails for any reason, fall back to original inputs
                pass

            # step 3: run engine, catch exceptions
            row_name = self._row_name(row, i)
            self.logger.info(f"[{prefix}] row {i} ({row_name})…")
            try:
                out = engine_fn(**inputs) or {}
            except Exception as e:
                self.logger.exception(f"[{prefix}] row {i} ({row_name}) failed: {e}")
                out = {
                    "normal_termination": False,
                    "error": f"{type(e).__name__}: {e}",
                }
            finally:
                if save_step and save_dir:
                    try:
                        import shutil
                        from pathlib import Path

                        row_conf_id = self._row_conf_id(row, i)
                        dir_name = f"{row_name}_{row_conf_id}"

                        candidates = list(Path(self.work_dir).rglob(dir_name))
                        src = max(candidates, key=lambda p: len(p.parts)) if candidates else None

                        if src is None:
                            self.logger.warning(f"No calc dir named '{dir_name}' found under {self.work_dir}")
                        else:
                            save_path = Path(save_dir)
                            if src.resolve() != save_path.resolve():
                                for p in src.iterdir():
                                    dst = save_path / p.name
                                    if p.is_dir():
                                        shutil.copytree(p, dst, dirs_exist_ok=True)
                                    else:
                                        shutil.copy2(p, dst)
                    except Exception as e:
                        self.logger.error(f"Failed to stage files from '{src}' to '{save_dir}': {e}")

            # step 4: enforce defaults
            out.setdefault("normal_termination", False)
            if out["normal_termination"]:
                out.setdefault("electronic_energy", np.nan)
            # step 5: pick only the keys we know how to handle
            allowed = {"normal_termination", "electronic_energy", "opt_coords", "vibs", "error"}
            def _filelike(k: str) -> bool:
                # Accept only filename-ish keys (no spaces), or our private_* stash
                if not isinstance(k, str) or " " in k:
                    return False
                if k.startswith("private_"):
                    return True
                return bool(re.search(
                    r"\.(hess|xyz|inp|out|log|gbw|molden|wfn|txt|json)$", k
                ))

            for k in out.keys():
                if _filelike(k):
                    allowed.add(k)

            if "vibs" in out and "gibbs_energy" in out:
                allowed.add("gibbs_energy")

            if self.debug:
                ignored = sorted([k for k in out.keys()
                                if '.' in k and k not in allowed])
                if ignored:
                    self.logger.debug(f"[{prefix}] ignoring non-file dot-keys: {ignored}")

            row_data: dict[str, object] = {}
            for key in allowed:
                if key in out:
                    col = output_column(prefix, key)
                    row_data[col] = out[key]

            all_row_data.append(row_data)

        # --- now assemble all_row_data back into df_out ---
        all_cols = sorted({c for rd in all_row_data for c in rd})
        column_arrays = {col: [] for col in all_cols}

        for rd in all_row_data:
            for col in all_cols:
                if col in rd:
                    column_arrays[col].append(rd[col])
                else:
                    # fill defaults for skipped rows
                    if col.endswith("-NT") or col.endswith("-normal_termination"):
                        column_arrays[col].append(False)
                    elif col.endswith("-EE") or col.endswith("-GE") or col.endswith("-electronic_energy") or col.endswith("-gibbs_energy"):
                        column_arrays[col].append(np.nan)
                    elif col.endswith("-error"):
                        column_arrays[col].append(None)
                    else:
                        column_arrays[col].append(None)

        for col, vals in column_arrays.items():
            df_out[col] = vals

        steps = dict(df_out.attrs.get("frust_steps", {}))
        steps[prefix] = {"engine": prefix.split("-", 1)[0], "columns": sorted(column_arrays)}
        df_out.attrs["frust_steps"] = steps

        return df_out

    def xtb(
        self,
        df: pd.DataFrame,
        name: str = "xtb",
        options: dict | None = None,
        detailed_inp_str: str = "",
        constraint: bool = False,
        save_step: bool = False,
        lowest: int | None = None,
        n_cores: int | None = None,
    ) -> pd.DataFrame:
        """Embed multiple conformers with xTB and optionally optimize and/or
        compute frequencies.

        Args:
            df (pd.DataFrame): A DataFrame containing conformers. Required
                columns:
                - ``atoms``: list of atomic symbols
                - one coordinate column, typically ``coords_embedded`` or a
                  prior ``*-oc`` column
                - ``constraint_atoms`` when ``constraint=True``
                - ``substrate_name`` when ``lowest`` is used
            name (str): Base name for the xTB step, used to prefix result
                columns.
            options (dict, optional): xTB driver options, e.g. ``{'gfn': 2,
                'opt': None}``. Defaults to ``{'gfn': 0}``.
            detailed_inp_str (str, optional): Additional xTB input block
                (cards) to include. Defaults to ``""``.
            constraint (bool, optional): If ``True``, applies predefined
                distance and angle constraints for supported ``step_type``
                values. Requires ``Stepper(step_type=...)`` with one of
                ``TS1``, ``TS2``, ``TS3``, ``TS4``, or ``INT3``. Defaults to
                ``False``.
            save_step (bool, optional): If ``True``, saves calculation
                directories for each conformer. Defaults to ``False``.
            lowest (int or None, optional): If set, retains only the
                lowest-energy ``N`` conformers per structure group. Defaults
                to ``None``.
            n_cores (int or None, optional): If set, overrides the Stepper’s
                default core count for this xTB call only. Defaults to
                ``None``.

        Returns:
            pd.DataFrame: The input DataFrame augmented with stage-prefixed xTB
            result columns, typically including:
                - ``{prefix}-NT``
                - ``{prefix}-EE``
                - ``{prefix}-oc`` for optimization jobs
                - ``{prefix}-vibs`` and ``{prefix}-GE`` for
                  frequency jobs
                - ``{prefix}-error`` when a row-level engine failure occurs
                - saved file-content columns when the backend returns them
        """
        opts = dict(options) if options else {"gfn": 0}
        if constraint:
            self._validate_constraint_request(df)
        keys = list(opts)
        if name != "xtb":
            prefix = name
        else:
            level = keys[0]
            opt_flag = next((k for k in keys[1:] if k in ("opt", "ohess")), None)
            prefix = f"{name}-{level}" + (f"-{opt_flag}" if opt_flag else "")

        def build_xtb(row: pd.Series) -> dict:
            inp: dict[str, object] = {"options": opts}
            step_type = self._step_type_upper()

            # Per-call override: only affects xTB, not ORCA.
            if n_cores is not None:
                inp["n_cores"] = int(n_cores)

            # Only add the user‐provided card if it's non‐empty
            base_str = detailed_inp_str.strip()
            if base_str:
                inp["detailed_input_str"] = base_str

            block = None

            if step_type == "TS1" and constraint:
                B, N, H, C = 0, 1, 4, 5
                atom = [x + 1 for x in self._constraint_atoms(row)]
                block = textwrap.dedent(f"""
                $constrain
                force constant=50
                distance: {atom[B]}, {atom[H]}, 2.07696
                distance: {atom[N]}, {atom[H]}, 1.5127 
                distance: {atom[H]}, {atom[C]}, 1.29095
                distance: {atom[B]}, {atom[C]}, 1.68461
                distance: {atom[B]}, {atom[N]}, 3.06223
                angle: {atom[N]}, {atom[H]}, {atom[C]}, 170.1342
                angle: {atom[H]}, {atom[C]}, {atom[B]}, 87.4870
                $end
                """).strip()

            if step_type == "TS2" and constraint:
                BCat10, N17, H40, H41, C46 = 0, 1, 4, 3, 5  # noqa: F841
                atom = [x + 1 for x in self._constraint_atoms(row)]
                block = textwrap.dedent(f"""
                $constrain
                force constant=50
                distance: {atom[BCat10]}, {atom[H41]}, 1.656
                distance: {atom[N17]}, {atom[H40]}, 1.961
                distance: {atom[BCat10]}, {atom[N17]}, 3.080
                angle: {atom[BCat10]}, {atom[H41]}, {atom[N17]}, 86.58
                $end
                """).strip()

            if step_type == "TS3" and constraint:
                BCat10, H11, BPin22, H21, C = 0, 2, 3, 4, 5
                atom = [x + 1 for x in self._constraint_atoms(row)]
                block = textwrap.dedent(f"""
                $constrain
                force constant=50
                distance: {atom[H21]}, {atom[BCat10]}, 1.376
                distance: {atom[H21]}, {atom[BPin22]}, 1.264
                distance: {atom[H21]}, {atom[C]}, 2.477
                distance: {atom[BCat10]}, {atom[C]}, 1.616
                distance: {atom[BPin22]}, {atom[C]}, 2.180
                distance: {atom[BPin22]}, {atom[BCat10]}, 2.007
                angle: {atom[BCat10]}, {atom[H21]}, {atom[BPin22]}, 98.89
                angle: {atom[BCat10]}, {atom[C]}, {atom[BPin22]}, 61.75
                $end
                """).strip()

            if step_type == "TS4" and constraint:
                BCat11, H12, H13, BPin37, C = 0, 2, 3, 4, 5 # noqa: F841
                atom = [x + 1 for x in self._constraint_atoms(row)]
                block = textwrap.dedent(f"""
                $constrain
                force constant=50
                distance: {atom[BCat11]}, {atom[BPin37]}, 2.219
                distance: {atom[BPin37]}, {atom[H13]}, 1.868
                distance: {atom[C]}, {atom[H13]}, 2.489
                distance: {atom[BCat11]}, {atom[H13]}, 1.216
                distance: {atom[BCat11]}, {atom[C]}, 1.946
                distance: {atom[BPin37]}, {atom[C]}, 1.585
                angle: {atom[BCat11]}, {atom[H13]}, {atom[BPin37]}, 89.48
                angle: {atom[BCat11]}, {atom[C]}, {atom[BPin37]}, 77.13
                $end
                """).strip()

            if step_type == "INT3" and constraint:
                BCat10, BPin42, H11, C = 0, 3, 4, 5
                atom = [x + 1 for x in self._constraint_atoms(row)]
                block = textwrap.dedent(f"""
                $constrain
                force constant=50
                distance: {atom[BCat10]}, {atom[H11]}, 1.279
                distance: {atom[BCat10]}, {atom[C]}, 1.688
                distance: {atom[BPin42]}, {atom[H11]}, 1.378
                distance: {atom[BPin42]}, {atom[C]}, 1.749
                angle: {atom[BCat10]}, {atom[H11]}, {atom[BPin42]}, 89.85
                angle: {atom[BCat10]}, {atom[C]}, {atom[BPin42]}, 66.22
                $end
                """).strip()

            if block:
                if "detailed_input_str" in inp:
                    inp["detailed_input_str"] += "\n\n" + block
                else:
                    inp["detailed_input_str"] = block

            return inp

        result = self._run_engine(
            df, self.xtb_fn, prefix, build_xtb, save_step, lowest
        )
        result.attrs.setdefault("frust_steps", {}).setdefault(prefix, {}).update(
            {"engine": "xtb", "options": opts}
        )
        return result

    def gxtb(
        self,
        df: pd.DataFrame,
        name: str = "gxtb",
        options: dict | None = None,
        detailed_inp_str: str = "",
        constraint: bool = False,
        save_step: bool = False,
        lowest: int | None = None,
        n_cores: int | None = None,
    ) -> pd.DataFrame:
        """Run g-xTB v2 calculations through Tooltoad's g-xTB calculator."""
        opts = dict(options) if options else {}
        if constraint:
            self._validate_constraint_request(df)
        keys = list(opts)
        if name != "gxtb":
            prefix = name
        else:
            opt_flag = next((k for k in keys if k in ("opt", "ohess")), None)
            prefix = f"{name}" + (f"-{opt_flag}" if opt_flag else "")

        def build_gxtb(row: pd.Series) -> dict:
            inp: dict[str, object] = {"options": opts}
            step_type = self._step_type_upper()

            if n_cores is not None:
                inp["n_cores"] = int(n_cores)

            base_str = detailed_inp_str.strip()
            if base_str:
                inp["detailed_input_str"] = base_str

            block = None

            if step_type == "TS1" and constraint:
                B, N, H, C = 0, 1, 4, 5
                atom = [x + 1 for x in self._constraint_atoms(row)]
                block = textwrap.dedent(f"""
                $constrain
                force constant=50
                distance: {atom[B]}, {atom[H]}, 2.07696
                distance: {atom[N]}, {atom[H]}, 1.5127
                distance: {atom[H]}, {atom[C]}, 1.29095
                distance: {atom[B]}, {atom[C]}, 1.68461
                distance: {atom[B]}, {atom[N]}, 3.06223
                angle: {atom[N]}, {atom[H]}, {atom[C]}, 170.1342
                angle: {atom[H]}, {atom[C]}, {atom[B]}, 87.4870
                $end
                """).strip()

            if step_type == "TS2" and constraint:
                BCat10, N17, H40, H41, C46 = 0, 1, 4, 3, 5  # noqa: F841
                atom = [x + 1 for x in self._constraint_atoms(row)]
                block = textwrap.dedent(f"""
                $constrain
                force constant=50
                distance: {atom[BCat10]}, {atom[H41]}, 1.656
                distance: {atom[N17]}, {atom[H40]}, 1.961
                distance: {atom[BCat10]}, {atom[N17]}, 3.080
                angle: {atom[BCat10]}, {atom[H41]}, {atom[N17]}, 86.58
                $end
                """).strip()

            if step_type == "TS3" and constraint:
                BCat10, H11, BPin22, H21, C = 0, 2, 3, 4, 5
                atom = [x + 1 for x in self._constraint_atoms(row)]
                block = textwrap.dedent(f"""
                $constrain
                force constant=50
                distance: {atom[H21]}, {atom[BCat10]}, 1.376
                distance: {atom[H21]}, {atom[BPin22]}, 1.264
                distance: {atom[H21]}, {atom[C]}, 2.477
                distance: {atom[BCat10]}, {atom[C]}, 1.616
                distance: {atom[BPin22]}, {atom[C]}, 2.180
                distance: {atom[BPin22]}, {atom[BCat10]}, 2.007
                angle: {atom[BCat10]}, {atom[H21]}, {atom[BPin22]}, 98.89
                angle: {atom[BCat10]}, {atom[C]}, {atom[BPin22]}, 61.75
                $end
                """).strip()

            if step_type == "TS4" and constraint:
                BCat11, H12, H13, BPin37, C = 0, 2, 3, 4, 5  # noqa: F841
                atom = [x + 1 for x in self._constraint_atoms(row)]
                block = textwrap.dedent(f"""
                $constrain
                force constant=50
                distance: {atom[BCat11]}, {atom[BPin37]}, 2.219
                distance: {atom[BPin37]}, {atom[H13]}, 1.868
                distance: {atom[C]}, {atom[H13]}, 2.489
                distance: {atom[BCat11]}, {atom[H13]}, 1.216
                distance: {atom[BCat11]}, {atom[C]}, 1.946
                distance: {atom[BPin37]}, {atom[C]}, 1.585
                angle: {atom[BCat11]}, {atom[H13]}, {atom[BPin37]}, 89.48
                angle: {atom[BCat11]}, {atom[C]}, {atom[BPin37]}, 77.13
                $end
                """).strip()

            if step_type == "INT3" and constraint:
                BCat10, BPin42, H11, C = 0, 3, 4, 5
                atom = [x + 1 for x in self._constraint_atoms(row)]
                block = textwrap.dedent(f"""
                $constrain
                force constant=50
                distance: {atom[BCat10]}, {atom[H11]}, 1.279
                distance: {atom[BCat10]}, {atom[C]}, 1.688
                distance: {atom[BPin42]}, {atom[H11]}, 1.378
                distance: {atom[BPin42]}, {atom[C]}, 1.749
                angle: {atom[BCat10]}, {atom[H11]}, {atom[BPin42]}, 89.85
                angle: {atom[BCat10]}, {atom[C]}, {atom[BPin42]}, 66.22
                $end
                """).strip()

            if block:
                if "detailed_input_str" in inp:
                    inp["detailed_input_str"] += "\n\n" + block
                else:
                    inp["detailed_input_str"] = block

            return inp

        result = self._run_engine(
            df, self.gxtb_fn, prefix, build_gxtb, save_step, lowest
        )
        result.attrs.setdefault("frust_steps", {}).setdefault(prefix, {}).update(
            {"engine": "gxtb", "options": opts}
        )
        return result


    def orca(
        self,
        df: pd.DataFrame,
        name: str = "orca",
        options: dict | None = None,
        xtra_inp_str: str = "",
        constraint: bool = False,
        save_step: bool = False,
        save_files: list[str] | None = None,
        lowest: int | None = None,
        uma: str | None = None,
        read_files: list | None = None,
        use_last_hess: bool = False,
        n_cores: int | None = None,
        uma_server: bool = True,
        uma_device: str = "cpu",
        uma_cache_dir: str | None = None,
        uma_offline: bool = False,
        uma_server_cores: int | None = None,
        uma_memory_per_thread_mib: int = 500,
        uma_keep_logs: bool | str = "on_failure",
        uma_log_dir: str | None = None,
        gxtb: bool = False,
        gxtb_exe: str | None = None,
        gxtb_ext_params: str | None = None,
        **kw        
    ) -> pd.DataFrame:
        """Run ORCA calculations and attach results to the DataFrame.

        Args:
            df (pd.DataFrame): DataFrame of conformers to compute. Must include:
                - ``atoms``: list of atomic symbols
                - one coordinate column, typically ``coords_embedded`` or a
                  prior ``*-oc`` column
                - ``constraint_atoms`` when ``constraint=True``
                - ``substrate_name`` when ``lowest`` is used
                - a prior ``*.hess`` column when ``use_last_hess=True``
            name (str): Base name for the ORCA step, used to prefix result
                columns.
            options (dict): ORCA input keywords, e.g.
                ``{'wB97X-D3': None, '6-31G**': None, 'OptTS': None,
                'Freq': None}``.
            xtra_inp_str (str, optional): Additional ORCA input block such as
                CPCM settings or custom geometry directives. Defaults to
                ``""``.
            constraint (bool, optional): If ``True``, applies predefined
                distance and angle constraints for supported ``step_type``
                values. Requires ``Stepper(step_type=...)`` with one of
                ``TS1``, ``TS2``, ``TS3``, ``TS4``, or ``INT3``. Defaults to
                ``False``.
            save_step (bool, optional): If ``True``, saves ORCA run
                directories for inspection. Defaults to ``False``.
            save_files (list[str] or None, optional): Specific ORCA output
                files to retain when partial saving is enabled. If omitted and
                instance-level output saving is enabled, defaults to
                ``["orca.out"]``.
            lowest (int or None, optional): If set, keeps only the
                lowest-energy conformer per structure group. Defaults to
                ``None``.
            uma (str or None, optional): Optional UMA task/profile identifier
                used to inject ORCA external optimization settings. Defaults to
                ``None``.
            read_files (list or None, optional): Deposit contents from files in
                the work directory directly into the dataframe, e.g.
                ``["input.hess"]``.
            use_last_hess (bool, optional): If ``True``, scan the dataframe for
                the most recent ``*.hess`` column and feed it back to ORCA as
                ``private_input.hess``. Defaults to ``False``.
            n_cores (int or None, optional): If set, overrides the Stepper's
                default core count for this ORCA call only. Defaults to
                ``None``.
            uma_server (bool, optional): If ``True``, runs UMA through OET's
                server/client mode. If ``False``, uses standalone ``oet_uma``.
                Defaults to ``True``.
            uma_device (str, optional): OET UMA device argument, typically
                ``"cpu"`` or ``"cuda"``. Defaults to ``"cpu"``.
            uma_cache_dir (str or None, optional): Optional FairChem cache
                directory passed to OET UMA. Defaults to ``None``.
            uma_offline (bool, optional): If ``True``, asks OET UMA to use
                offline mode. Defaults to ``False``.
            uma_server_cores (int or None, optional): Total core budget passed
                to ``oet_server --nthreads``. Defaults to this ORCA call's
                core count.
            uma_memory_per_thread_mib (int, optional): Memory budget passed to
                ``oet_server --memory-per-thread``. Defaults to ``500``.
            uma_keep_logs (bool or str, optional): Server-log retention policy:
                ``"on_failure"`` preserves logs only when the UMA-backed ORCA
                step fails, ``True``/``"always"`` keeps logs, and
                ``False``/``"never"`` removes them. Defaults to
                ``"on_failure"``.
            uma_log_dir (str or None, optional): Directory for preserved UMA
                server logs. If omitted, transient logs are written to a temp
                directory and preserved failures are copied to ``UMA-logs``.
            gxtb (bool, optional): If ``True``, runs ORCA with OET g-xTB v2 as
                an external method provider. ORCA still owns ``Opt``,
                ``OptTS``, ``NEB-TS``, and related run types. Defaults to
                ``False``.
            gxtb_exe (str or None, optional): Optional path to the g-xTB v2
                ``xtb`` executable. If omitted, ``GXTB_EXE`` is used.
            gxtb_ext_params (str or None, optional): Extra parameters appended
                to OET ``oet_gxtb`` through ORCA ``Ext_Params``.

        Returns:
            pd.DataFrame: The input DataFrame extended with stage-prefixed ORCA
            output columns, typically including:
                - ``{prefix}-NT``
                - ``{prefix}-EE``
                - ``{prefix}-oc`` for optimization jobs
                - ``{prefix}-vibs`` and ``{prefix}-GE`` for
                  frequency jobs
                - ``{prefix}-error`` when a row-level engine failure occurs
                - saved file-content columns when ORCA returns them
        """
        opts = dict(options or {})
        if kw:
            unknown = ", ".join(sorted(kw))
            raise TypeError(f"Unexpected orca() keyword arguments: {unknown}")
        if uma is not None and gxtb:
            raise ValueError("orca() cannot use both UMA and g-xTB external methods")
        if gxtb and "Freq" in opts:
            raise ValueError(
                "ORCA Freq is not compatible with g-xTB ExtOpt. "
                "Use NumFreq for finite-difference frequencies with external g-xTB gradients."
            )
        if gxtb and "calc_hess" in (xtra_inp_str or "").lower():
            raise ValueError(
                "ORCA %geom Calc_Hess is not compatible with g-xTB ExtOpt. "
                "Use OptTS with the approximate Hessian, and add NumFreq only when you need "
                "a post-optimization numerical frequency check."
            )
        if gxtb and "ExtOpt" not in opts:
            opts = {"ExtOpt": None, **opts}
        if constraint:
            self._validate_constraint_request(df)
        if save_files is None and self.save_output:
            save_files = ["orca.out"]
        keys = list(opts)
        if len(keys) < 1:
            raise ValueError("`options` must include at least one ORCA method key")

        if name != "orca":
            prefix = name
        elif len(keys) == 1:
            prefix = f"{name}-{keys[0]}"
        else:
            func, basis = keys[0], keys[1]
            opt_flag = next((k for k in keys[2:] if k in ("OptTS", "Freq", "NumFreq", "NoSym")), None)
            prefix = f"{name}-{func}-{basis}" + (f"-{opt_flag}" if opt_flag else "")

        def build_orca(row: Series) -> dict:
            step_type = self._step_type_upper()
            inp = {
                "options": opts,
                "xtra_inp_str": xtra_inp_str.strip(),
                "memory": self.memory_gb,
                "n_cores": int(n_cores) if n_cores is not None else self.n_cores,
                "read_files": read_files,
            }

            if "Freq" in opts:
                block = textwrap.dedent("""
                    %geom
                      Calc_Hess true
                    end
                """).strip()
                inp["xtra_inp_str"] += ("\n\n" + block) if inp["xtra_inp_str"] else block

            if use_last_hess:
                block = textwrap.dedent(
                    """
                    %geom
                      inhess Read
                      InHessName "private_input.hess"
                    end
                    """
                ).strip()
                inp["xtra_inp_str"] += ("\n\n" + block) if inp["xtra_inp_str"] else block

            if constraint and step_type == "TS1":
                atom = self._constraint_atoms(row)
                B, N, H, C = atom[0], atom[1], atom[4], atom[5]
                block = textwrap.dedent(f"""
                    %geom Constraints
                      {{B {B} {H} 2.07696 C}}
                      {{B {N} {H} 1.5127 C}}
                      {{B {H} {C} 1.29095 C}}
                      {{B {B} {C} 1.68461 C}}
                      {{B {B} {N} 3.06223 C}}
                      {{A {N} {H} {C} C}}
                      {{A {H} {C} {B} 87.4870 C}}
                    end
                    end
                """).strip() # {{A {N} {H} {C} 170.1342 C}}
                inp["xtra_inp_str"] += ("\n\n" + block) if inp["xtra_inp_str"] else block

            if constraint and step_type == "TS2":
                atom = self._constraint_atoms(row)
                BCat, N17, H40, H41 = atom[0], atom[1], atom[4], atom[3]
                block = textwrap.dedent(f"""
                    %geom Constraints
                      {{B {BCat} {H41} 1.656 C}}
                      {{B {N17} {H40} 1.961 C}}
                      {{B {BCat} {N17} 3.080 C}}
                      {{A {BCat} {H41} {N17} 86.58 C}}
                    end
                    end
                """).strip()
                inp["xtra_inp_str"] += ("\n\n" + block) if inp["xtra_inp_str"] else block

            if constraint and step_type == "TS3":
                atom = self._constraint_atoms(row)
                BCat, H11, BPin, H21, C = atom[0], atom[2], atom[3], atom[4], atom[5]
                block = textwrap.dedent(f"""
                    %geom Constraints
                      {{B {H21} {BCat} 1.376 C}}
                      {{B {H21} {BPin} 1.264 C}}
                      {{B {H21} {C} 2.477 C}}
                      {{B {BCat} {C} 1.616 C}}
                      {{B {BPin} {C} 2.180 C}}
                      {{B {BPin} {BCat} 2.007 C}}
                      {{A {BCat} {H21} {BPin} 98.89 C}}
                      {{A {BCat} {C} {BPin} 61.75 C}}
                    end
                    end
                """).strip()
                inp["xtra_inp_str"] += ("\n\n" + block) if inp["xtra_inp_str"] else block

            if constraint and step_type == "TS4":
                atom = self._constraint_atoms(row)
                BCat, H12, H13, BPin, C = atom[0], atom[2], atom[3], atom[4], atom[5]  # noqa: F841
                block = textwrap.dedent(f"""
                    %geom Constraints
                      {{B {BCat} {BPin} 2.219 C}}
                      {{B {BPin} {H13} 1.868 C}}
                      {{B {C} {H13} 2.489 C}}
                      {{B {BCat} {H13} 1.216 C}}
                      {{B {BCat} {C} 1.946 C}}
                      {{B {BPin} {C} 1.585 C}}
                      {{A {BCat} {H13} {BPin} 89.48 C}}
                      {{A {BCat} {C} {BPin} 77.13 C}}
                    end
                    end
                """).strip()
                inp["xtra_inp_str"] += ("\n\n" + block) if inp["xtra_inp_str"] else block

            if constraint and step_type == "INT3":
                atom = self._constraint_atoms(row)
                BCat, BPin, H11, C = atom[0], atom[3], atom[4], atom[5]
                block = textwrap.dedent(f"""
                    %geom Constraints
                      {{B {BCat} {H11} 1.279 C}}
                      {{B {BCat} {C} 1.688 C}}
                      {{B {BPin} {H11} 1.378 C}}
                      {{B {BPin} {C} 1.749 C}}
                      {{A {BCat} {H11} {BPin} 89.85 C}}
                      {{A {BCat} {C} {BPin} 66.22 C}}
                    end
                    end
                """).strip()
                inp["xtra_inp_str"] += ("\n\n" + block) if inp["xtra_inp_str"] else block

            return inp

        if uma is None and not gxtb:
            result = self._run_engine(df, self.orca_fn, prefix, build_orca, save_step, lowest, save_files, use_last_hess)
            result.attrs.setdefault("frust_steps", {}).setdefault(prefix, {}).update(
                {"engine": "orca", "options": opts}
            )
            return result

        if gxtb:
            from frust.utils.gxtb import gxtb_orca_block

            client_block = gxtb_orca_block(gxtb_exe=gxtb_exe, ext_params=gxtb_ext_params)

            def build_orca_gxtb(row: Series) -> dict:
                inp = build_orca(row)
                xin = inp.get("xtra_inp_str", "")
                inp["xtra_inp_str"] = (xin + "\n\n" + client_block).strip() if xin else client_block
                return inp

            result = self._run_engine(
                df,
                self.orca_fn,
                prefix,
                build_orca_gxtb,
                save_step,
                lowest,
                save_files,
                use_last_hess,
            )
            result.attrs.setdefault("frust_steps", {}).setdefault(prefix, {}).update(
                {
                    "engine": "orca",
                    "options": opts,
                    "gxtb": True,
                    "gxtb_exe": gxtb_exe,
                }
            )
            return result

        from frust.utils.uma import parse_uma_spec, uma_orca_block, uma_server as run_uma_server

        spec = parse_uma_spec(
            uma,
            device=uma_device,
            cache_dir=uma_cache_dir,
            offline=uma_offline,
        )

        def run_with_uma_block(client_block: str) -> pd.DataFrame:
            orig_build = build_orca

            def build_orca_uma(row: Series) -> dict:
                inp = orig_build(row)
                xin = inp.get("xtra_inp_str", "")
                inp["xtra_inp_str"] = (xin + "\n\n" + client_block).strip() if xin else client_block
                return inp

            result = self._run_engine(df, self.orca_fn, prefix, build_orca_uma, save_step, lowest, save_files)
            result.attrs.setdefault("frust_steps", {}).setdefault(prefix, {}).update(
                {
                    "engine": "orca",
                    "options": opts,
                    "uma": uma,
                    "uma_task": spec.task,
                    "uma_model": spec.model,
                    "uma_server": uma_server,
                }
            )
            return result

        def uma_result_failed(result: pd.DataFrame) -> bool:
            nt_col = output_column(prefix, "normal_termination")
            err_col = output_column(prefix, "error")
            if nt_col in result and result[nt_col].eq(False).any():
                return True
            if err_col in result and result[err_col].notna().any():
                return True
            return False

        if not uma_server:
            client_block = uma_orca_block(spec, server=False)
            return run_with_uma_block(client_block)

        effective_cores = int(n_cores) if n_cores is not None else self.n_cores
        server_cores = uma_server_cores if uma_server_cores is not None else effective_cores
        with run_uma_server(
            log_dir=uma_log_dir,
            keep_logs=uma_keep_logs,
            use_gpu=uma_device == "cuda",
            server_cores=server_cores,
            memory_per_thread_mib=uma_memory_per_thread_mib,
        ) as server_handle:
            client_block = uma_orca_block(spec, server=True, bind=server_handle.bind)
            result = run_with_uma_block(client_block)
            if uma_keep_logs == "on_failure" and uma_result_failed(result):
                server_handle.preserve()
            return result

__init__

__init__(step_type: str | None = None, output_base: Path | str | None = None, job_id: int | None = None, debug: bool = False, dump_each_step: bool = False, n_cores: int = 8, memory_gb: float = 20.0, save_calc_dirs: bool = False, save_output_dir: bool = True, work_dir: str | None = None, **kwargs)

Configure a Stepper instance.

Parameters:

Name Type Description Default
step_type str or None

Workflow label used for built-in constrained TS/INT calculations. Supported constrained values are TS1, TS2, TS3, TS4, and INT3. If None, unconstrained workflows still work, but constraint=True in :meth:xtb or :meth:orca will raise an error.

None
output_base Path or str or None

Base directory under which saved outputs are created. The final run directory is created lazily on first save request, not at construction time.

None
job_id int or None

Optional job identifier used in output-directory naming and logger naming. If omitted, :func:frust.utils.slurm.detect_job_id is used to infer one when possible.

None
debug bool

If True, use mock xTB and ORCA backends instead of the real engines and enable debug logging.

False
dump_each_step bool

Reserved for dataframe dumping workflows. Stored on the instance but not currently consumed by :class:Stepper itself.

False
n_cores int

Default core count passed to engine calls unless overridden on an individual :meth:xtb or :meth:orca call.

8
memory_gb float

Default memory setting in gigabytes used for ORCA calls.

20.0
save_calc_dirs bool

If True, preserve full calculation directories for each row when output saving is enabled.

False
save_output_dir bool

If True, enable output-directory creation and file saving. When False, calculations still run but no output tree is created unless a caller explicitly requests per-step saving.

True
work_dir str or None

Scratch or working directory passed to the engine wrappers. If omitted, $SCRATCH is used when available, otherwise the current directory.

None

Raises:

Type Description
TypeError

If unexpected keyword arguments are supplied.

Source code in frust/stepper.py
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
def __init__(
    self,
    step_type: str | None = None,
    output_base: Path | str | None = None,
    job_id: int | None = None,
    debug: bool = False,
    dump_each_step: bool = False,
    n_cores: int = 8,
    memory_gb: float = 20.0,
    save_calc_dirs: bool = False,
    save_output_dir: bool = True,
    work_dir: str | None = None,        
    **kwargs
):
    """Configure a Stepper instance.

    Parameters
    ----------
    step_type : str or None, optional
        Workflow label used for built-in constrained TS/INT calculations.
        Supported constrained values are ``TS1``, ``TS2``, ``TS3``,
        ``TS4``, and ``INT3``. If ``None``, unconstrained workflows still
        work, but ``constraint=True`` in :meth:`xtb` or :meth:`orca` will
        raise an error.
    output_base : Path or str or None, optional
        Base directory under which saved outputs are created. The final run
        directory is created lazily on first save request, not at
        construction time.
    job_id : int or None, optional
        Optional job identifier used in output-directory naming and logger
        naming. If omitted, :func:`frust.utils.slurm.detect_job_id` is used
        to infer one when possible.
    debug : bool, optional
        If ``True``, use mock xTB and ORCA backends instead of the real
        engines and enable debug logging.
    dump_each_step : bool, optional
        Reserved for dataframe dumping workflows. Stored on the instance
        but not currently consumed by :class:`Stepper` itself.
    n_cores : int, optional
        Default core count passed to engine calls unless overridden on an
        individual :meth:`xtb` or :meth:`orca` call.
    memory_gb : float, optional
        Default memory setting in gigabytes used for ORCA calls.
    save_calc_dirs : bool, optional
        If ``True``, preserve full calculation directories for each row
        when output saving is enabled.
    save_output_dir : bool, optional
        If ``True``, enable output-directory creation and file saving.
        When ``False``, calculations still run but no output tree is
        created unless a caller explicitly requests per-step saving.
    work_dir : str or None, optional
        Scratch or working directory passed to the engine wrappers. If
        omitted, ``$SCRATCH`` is used when available, otherwise the current
        directory.

    Raises
    ------
    TypeError
        If unexpected keyword arguments are supplied.
    """
    if kwargs:
        unknown = ", ".join(sorted(kwargs))
        raise TypeError(f"Unexpected Stepper keyword arguments: {unknown}")

    self.step_type      = step_type.upper() if step_type is not None else None
    self.debug          = debug
    job_id              = detect_job_id(job_id, True)
    self.job_id         = job_id
    self.logger         = _make_logger(_logger_name(self.step_type, self.job_id), debug)
    self.dump_each_step = dump_each_step
    self.n_cores        = n_cores
    self.memory_gb      = memory_gb
    self.save_calc_dirs = save_calc_dirs
    self.save_output    = save_output_dir
    self.output_base    = Path(output_base) if output_base is not None else None

    self.base_dir = self.output_base if self.output_base is not None else Path(".")

    if self.debug:
        from tooltoad.xtb import mock_xtb_calculate
        from tooltoad.orca import mock_orca_calculate
        from tooltoad.gxtb import mock_gxtb_calculate
        self.xtb_fn  = mock_xtb_calculate
        self.orca_fn = mock_orca_calculate
        self.gxtb_fn = mock_gxtb_calculate
    else:
        from tooltoad.xtb import xtb_calculate
        from tooltoad.orca import orca_calculate
        from tooltoad.gxtb import gxtb_calculate
        self.xtb_fn  = xtb_calculate
        self.orca_fn = orca_calculate
        self.gxtb_fn = gxtb_calculate

    if work_dir:
        self.work_dir = work_dir
        os.makedirs(self.work_dir, exist_ok=True)
        self.logger.info(f"Working dir: {self.work_dir}")
    else:
        try:
            self.work_dir = os.environ["SCRATCH"]
        except Exception:
            self.work_dir = "."
        os.makedirs(self.work_dir, exist_ok=True)
        self.logger.info(f"Working dir: {self.work_dir}")

build_initial_df

build_initial_df(embedded_dict: dict) -> pd.DataFrame

Turn a dictionary of embedded‐conformer data into a tidy DataFrame.

The dictionary keys can be either: 1) TS names of the form 'TS(..._rpos(N))' (with a 4‐ or 5‐tuple value) 2) plain molecule names (without '_rpos(...)') and a 2‐tuple value

For TS entries: key = 'TS(molname_rpos(N))' value = (Mol_with_H, cids, keep_idxs, smiles[, energies])

For plain‐mol entries: key = 'some_name' value = (Mol, cids)

In legacy-input cases, structure metadata is parsed from the key as a fallback. New generated records should carry metadata directly.

Returns:

Type Description
DataFrame

One row per conformer with columns - structure_id (stable structure identifier) - custom_name (the original display/file key) - substrate_name (parsed substrate identity) - structure_type (MOL, TS1, TS2, TS3, TS4, INT3) - molecule_role (ts, ligand, int2, mol2, ...) - rpos (int or None) - constraint_atoms (list[int] or NA) - cid (conformer ID) - smiles (str or None) - atoms (list of atomic symbols) - coords_embedded (list of (x,y,z) tuples) - energy_uff (float or None)

Source code in frust/stepper.py
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
def build_initial_df(self, embedded_dict: dict) -> pd.DataFrame:
    """
    Turn a dictionary of embedded‐conformer data into a tidy DataFrame.

    The dictionary keys can be either:
      1) TS names of the form 'TS(..._rpos(N))' (with a 4‐ or 5‐tuple value)
      2) plain molecule names (without '_rpos(...)') and a 2‐tuple value

    For TS entries:
        key = 'TS(molname_rpos(N))'
        value = (Mol_with_H, cids, keep_idxs, smiles[, energies])

    For plain‐mol entries:
        key = 'some_name'
        value = (Mol, cids)

    In legacy-input cases, structure metadata is parsed from the key as a
    fallback. New generated records should carry metadata directly.

    Returns
    -------
    pd.DataFrame
        One row per conformer with columns
          - structure_id        (stable structure identifier)
          - custom_name         (the original display/file key)
          - substrate_name      (parsed substrate identity)
          - structure_type      (MOL, TS1, TS2, TS3, TS4, INT3)
          - molecule_role       (ts, ligand, int2, mol2, ...)
          - rpos                (int or None)
          - constraint_atoms    (list[int] or NA)
          - cid                 (conformer ID)
          - smiles              (str or None)
          - atoms               (list of atomic symbols)
          - coords_embedded     (list of (x,y,z) tuples)
          - energy_uff          (float or None)
    """
    rows: list[dict] = []

    for name, val in embedded_dict.items():
        if len(val) == 2:
            mol, cids = val
            keep_idxs = None
            smi = None
            energies: list[tuple[float,int]] = []
            metadata = None
        elif len(val) == 3 and isinstance(val[2], dict):
            mol, cids, metadata = val
            keep_idxs = None
            smi = metadata.get("smiles") or metadata.get("input_smiles")
            energies = []
        elif len(val) == 4:
            mol, cids, keep_idxs, smi = val
            energies = []
            metadata = None
        elif len(val) == 5:
            mol, cids, keep_idxs, smi, energies = val
            metadata = None
        else:
            raise ValueError(f"Bad tuple length for {name}")

        meta = metadata_from_mapping(metadata, fallback_name=name, smiles=smi)

        e_map: dict[int,float] = {cid_val: e_val for (e_val, cid_val) in energies} if energies else {}

        atom_syms = [atom.GetSymbol() for atom in mol.GetAtoms()]

        for cid in cids:
            conf = mol.GetConformer(cid)
            coords = [tuple(conf.GetAtomPosition(i)) for i in range(mol.GetNumAtoms())]

            rows.append({
                "structure_id":     meta.structure_id,
                "custom_name":      meta.custom_name,
                "substrate_name":   meta.substrate_name,
                "structure_type":   meta.structure_type,
                "molecule_role":    meta.molecule_role,
                "rpos":             meta.rpos,
                "constraint_atoms": keep_idxs,
                "cid":              cid,
                "smiles":           meta.smiles or smi,
                "input_smiles":     meta.input_smiles or smi,
                "atoms":            atom_syms,
                "coords_embedded":  coords,
                "energy_uff":       e_map.get(cid, None)
            })

    return pd.DataFrame(rows)

xtb

xtb(df: DataFrame, name: str = 'xtb', options: dict | None = None, detailed_inp_str: str = '', constraint: bool = False, save_step: bool = False, lowest: int | None = None, n_cores: int | None = None) -> pd.DataFrame

Embed multiple conformers with xTB and optionally optimize and/or compute frequencies.

Args: df (pd.DataFrame): A DataFrame containing conformers. Required columns: - atoms: list of atomic symbols - one coordinate column, typically coords_embedded or a prior *-oc column - constraint_atoms when constraint=True - substrate_name when lowest is used name (str): Base name for the xTB step, used to prefix result columns. options (dict, optional): xTB driver options, e.g. {'gfn': 2, 'opt': None}. Defaults to {'gfn': 0}. detailed_inp_str (str, optional): Additional xTB input block (cards) to include. Defaults to "". constraint (bool, optional): If True, applies predefined distance and angle constraints for supported step_type values. Requires Stepper(step_type=...) with one of TS1, TS2, TS3, TS4, or INT3. Defaults to False. save_step (bool, optional): If True, saves calculation directories for each conformer. Defaults to False. lowest (int or None, optional): If set, retains only the lowest-energy N conformers per structure group. Defaults to None. n_cores (int or None, optional): If set, overrides the Stepper’s default core count for this xTB call only. Defaults to None.

Returns: pd.DataFrame: The input DataFrame augmented with stage-prefixed xTB result columns, typically including: - {prefix}-NT - {prefix}-EE - {prefix}-oc for optimization jobs - {prefix}-vibs and {prefix}-GE for frequency jobs - {prefix}-error when a row-level engine failure occurs - saved file-content columns when the backend returns them

Source code in frust/stepper.py
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
def xtb(
    self,
    df: pd.DataFrame,
    name: str = "xtb",
    options: dict | None = None,
    detailed_inp_str: str = "",
    constraint: bool = False,
    save_step: bool = False,
    lowest: int | None = None,
    n_cores: int | None = None,
) -> pd.DataFrame:
    """Embed multiple conformers with xTB and optionally optimize and/or
    compute frequencies.

    Args:
        df (pd.DataFrame): A DataFrame containing conformers. Required
            columns:
            - ``atoms``: list of atomic symbols
            - one coordinate column, typically ``coords_embedded`` or a
              prior ``*-oc`` column
            - ``constraint_atoms`` when ``constraint=True``
            - ``substrate_name`` when ``lowest`` is used
        name (str): Base name for the xTB step, used to prefix result
            columns.
        options (dict, optional): xTB driver options, e.g. ``{'gfn': 2,
            'opt': None}``. Defaults to ``{'gfn': 0}``.
        detailed_inp_str (str, optional): Additional xTB input block
            (cards) to include. Defaults to ``""``.
        constraint (bool, optional): If ``True``, applies predefined
            distance and angle constraints for supported ``step_type``
            values. Requires ``Stepper(step_type=...)`` with one of
            ``TS1``, ``TS2``, ``TS3``, ``TS4``, or ``INT3``. Defaults to
            ``False``.
        save_step (bool, optional): If ``True``, saves calculation
            directories for each conformer. Defaults to ``False``.
        lowest (int or None, optional): If set, retains only the
            lowest-energy ``N`` conformers per structure group. Defaults
            to ``None``.
        n_cores (int or None, optional): If set, overrides the Stepper’s
            default core count for this xTB call only. Defaults to
            ``None``.

    Returns:
        pd.DataFrame: The input DataFrame augmented with stage-prefixed xTB
        result columns, typically including:
            - ``{prefix}-NT``
            - ``{prefix}-EE``
            - ``{prefix}-oc`` for optimization jobs
            - ``{prefix}-vibs`` and ``{prefix}-GE`` for
              frequency jobs
            - ``{prefix}-error`` when a row-level engine failure occurs
            - saved file-content columns when the backend returns them
    """
    opts = dict(options) if options else {"gfn": 0}
    if constraint:
        self._validate_constraint_request(df)
    keys = list(opts)
    if name != "xtb":
        prefix = name
    else:
        level = keys[0]
        opt_flag = next((k for k in keys[1:] if k in ("opt", "ohess")), None)
        prefix = f"{name}-{level}" + (f"-{opt_flag}" if opt_flag else "")

    def build_xtb(row: pd.Series) -> dict:
        inp: dict[str, object] = {"options": opts}
        step_type = self._step_type_upper()

        # Per-call override: only affects xTB, not ORCA.
        if n_cores is not None:
            inp["n_cores"] = int(n_cores)

        # Only add the user‐provided card if it's non‐empty
        base_str = detailed_inp_str.strip()
        if base_str:
            inp["detailed_input_str"] = base_str

        block = None

        if step_type == "TS1" and constraint:
            B, N, H, C = 0, 1, 4, 5
            atom = [x + 1 for x in self._constraint_atoms(row)]
            block = textwrap.dedent(f"""
            $constrain
            force constant=50
            distance: {atom[B]}, {atom[H]}, 2.07696
            distance: {atom[N]}, {atom[H]}, 1.5127 
            distance: {atom[H]}, {atom[C]}, 1.29095
            distance: {atom[B]}, {atom[C]}, 1.68461
            distance: {atom[B]}, {atom[N]}, 3.06223
            angle: {atom[N]}, {atom[H]}, {atom[C]}, 170.1342
            angle: {atom[H]}, {atom[C]}, {atom[B]}, 87.4870
            $end
            """).strip()

        if step_type == "TS2" and constraint:
            BCat10, N17, H40, H41, C46 = 0, 1, 4, 3, 5  # noqa: F841
            atom = [x + 1 for x in self._constraint_atoms(row)]
            block = textwrap.dedent(f"""
            $constrain
            force constant=50
            distance: {atom[BCat10]}, {atom[H41]}, 1.656
            distance: {atom[N17]}, {atom[H40]}, 1.961
            distance: {atom[BCat10]}, {atom[N17]}, 3.080
            angle: {atom[BCat10]}, {atom[H41]}, {atom[N17]}, 86.58
            $end
            """).strip()

        if step_type == "TS3" and constraint:
            BCat10, H11, BPin22, H21, C = 0, 2, 3, 4, 5
            atom = [x + 1 for x in self._constraint_atoms(row)]
            block = textwrap.dedent(f"""
            $constrain
            force constant=50
            distance: {atom[H21]}, {atom[BCat10]}, 1.376
            distance: {atom[H21]}, {atom[BPin22]}, 1.264
            distance: {atom[H21]}, {atom[C]}, 2.477
            distance: {atom[BCat10]}, {atom[C]}, 1.616
            distance: {atom[BPin22]}, {atom[C]}, 2.180
            distance: {atom[BPin22]}, {atom[BCat10]}, 2.007
            angle: {atom[BCat10]}, {atom[H21]}, {atom[BPin22]}, 98.89
            angle: {atom[BCat10]}, {atom[C]}, {atom[BPin22]}, 61.75
            $end
            """).strip()

        if step_type == "TS4" and constraint:
            BCat11, H12, H13, BPin37, C = 0, 2, 3, 4, 5 # noqa: F841
            atom = [x + 1 for x in self._constraint_atoms(row)]
            block = textwrap.dedent(f"""
            $constrain
            force constant=50
            distance: {atom[BCat11]}, {atom[BPin37]}, 2.219
            distance: {atom[BPin37]}, {atom[H13]}, 1.868
            distance: {atom[C]}, {atom[H13]}, 2.489
            distance: {atom[BCat11]}, {atom[H13]}, 1.216
            distance: {atom[BCat11]}, {atom[C]}, 1.946
            distance: {atom[BPin37]}, {atom[C]}, 1.585
            angle: {atom[BCat11]}, {atom[H13]}, {atom[BPin37]}, 89.48
            angle: {atom[BCat11]}, {atom[C]}, {atom[BPin37]}, 77.13
            $end
            """).strip()

        if step_type == "INT3" and constraint:
            BCat10, BPin42, H11, C = 0, 3, 4, 5
            atom = [x + 1 for x in self._constraint_atoms(row)]
            block = textwrap.dedent(f"""
            $constrain
            force constant=50
            distance: {atom[BCat10]}, {atom[H11]}, 1.279
            distance: {atom[BCat10]}, {atom[C]}, 1.688
            distance: {atom[BPin42]}, {atom[H11]}, 1.378
            distance: {atom[BPin42]}, {atom[C]}, 1.749
            angle: {atom[BCat10]}, {atom[H11]}, {atom[BPin42]}, 89.85
            angle: {atom[BCat10]}, {atom[C]}, {atom[BPin42]}, 66.22
            $end
            """).strip()

        if block:
            if "detailed_input_str" in inp:
                inp["detailed_input_str"] += "\n\n" + block
            else:
                inp["detailed_input_str"] = block

        return inp

    result = self._run_engine(
        df, self.xtb_fn, prefix, build_xtb, save_step, lowest
    )
    result.attrs.setdefault("frust_steps", {}).setdefault(prefix, {}).update(
        {"engine": "xtb", "options": opts}
    )
    return result

gxtb

gxtb(df: DataFrame, name: str = 'gxtb', options: dict | None = None, detailed_inp_str: str = '', constraint: bool = False, save_step: bool = False, lowest: int | None = None, n_cores: int | None = None) -> pd.DataFrame

Run g-xTB v2 calculations through Tooltoad's g-xTB calculator.

Source code in frust/stepper.py
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
def gxtb(
    self,
    df: pd.DataFrame,
    name: str = "gxtb",
    options: dict | None = None,
    detailed_inp_str: str = "",
    constraint: bool = False,
    save_step: bool = False,
    lowest: int | None = None,
    n_cores: int | None = None,
) -> pd.DataFrame:
    """Run g-xTB v2 calculations through Tooltoad's g-xTB calculator."""
    opts = dict(options) if options else {}
    if constraint:
        self._validate_constraint_request(df)
    keys = list(opts)
    if name != "gxtb":
        prefix = name
    else:
        opt_flag = next((k for k in keys if k in ("opt", "ohess")), None)
        prefix = f"{name}" + (f"-{opt_flag}" if opt_flag else "")

    def build_gxtb(row: pd.Series) -> dict:
        inp: dict[str, object] = {"options": opts}
        step_type = self._step_type_upper()

        if n_cores is not None:
            inp["n_cores"] = int(n_cores)

        base_str = detailed_inp_str.strip()
        if base_str:
            inp["detailed_input_str"] = base_str

        block = None

        if step_type == "TS1" and constraint:
            B, N, H, C = 0, 1, 4, 5
            atom = [x + 1 for x in self._constraint_atoms(row)]
            block = textwrap.dedent(f"""
            $constrain
            force constant=50
            distance: {atom[B]}, {atom[H]}, 2.07696
            distance: {atom[N]}, {atom[H]}, 1.5127
            distance: {atom[H]}, {atom[C]}, 1.29095
            distance: {atom[B]}, {atom[C]}, 1.68461
            distance: {atom[B]}, {atom[N]}, 3.06223
            angle: {atom[N]}, {atom[H]}, {atom[C]}, 170.1342
            angle: {atom[H]}, {atom[C]}, {atom[B]}, 87.4870
            $end
            """).strip()

        if step_type == "TS2" and constraint:
            BCat10, N17, H40, H41, C46 = 0, 1, 4, 3, 5  # noqa: F841
            atom = [x + 1 for x in self._constraint_atoms(row)]
            block = textwrap.dedent(f"""
            $constrain
            force constant=50
            distance: {atom[BCat10]}, {atom[H41]}, 1.656
            distance: {atom[N17]}, {atom[H40]}, 1.961
            distance: {atom[BCat10]}, {atom[N17]}, 3.080
            angle: {atom[BCat10]}, {atom[H41]}, {atom[N17]}, 86.58
            $end
            """).strip()

        if step_type == "TS3" and constraint:
            BCat10, H11, BPin22, H21, C = 0, 2, 3, 4, 5
            atom = [x + 1 for x in self._constraint_atoms(row)]
            block = textwrap.dedent(f"""
            $constrain
            force constant=50
            distance: {atom[H21]}, {atom[BCat10]}, 1.376
            distance: {atom[H21]}, {atom[BPin22]}, 1.264
            distance: {atom[H21]}, {atom[C]}, 2.477
            distance: {atom[BCat10]}, {atom[C]}, 1.616
            distance: {atom[BPin22]}, {atom[C]}, 2.180
            distance: {atom[BPin22]}, {atom[BCat10]}, 2.007
            angle: {atom[BCat10]}, {atom[H21]}, {atom[BPin22]}, 98.89
            angle: {atom[BCat10]}, {atom[C]}, {atom[BPin22]}, 61.75
            $end
            """).strip()

        if step_type == "TS4" and constraint:
            BCat11, H12, H13, BPin37, C = 0, 2, 3, 4, 5  # noqa: F841
            atom = [x + 1 for x in self._constraint_atoms(row)]
            block = textwrap.dedent(f"""
            $constrain
            force constant=50
            distance: {atom[BCat11]}, {atom[BPin37]}, 2.219
            distance: {atom[BPin37]}, {atom[H13]}, 1.868
            distance: {atom[C]}, {atom[H13]}, 2.489
            distance: {atom[BCat11]}, {atom[H13]}, 1.216
            distance: {atom[BCat11]}, {atom[C]}, 1.946
            distance: {atom[BPin37]}, {atom[C]}, 1.585
            angle: {atom[BCat11]}, {atom[H13]}, {atom[BPin37]}, 89.48
            angle: {atom[BCat11]}, {atom[C]}, {atom[BPin37]}, 77.13
            $end
            """).strip()

        if step_type == "INT3" and constraint:
            BCat10, BPin42, H11, C = 0, 3, 4, 5
            atom = [x + 1 for x in self._constraint_atoms(row)]
            block = textwrap.dedent(f"""
            $constrain
            force constant=50
            distance: {atom[BCat10]}, {atom[H11]}, 1.279
            distance: {atom[BCat10]}, {atom[C]}, 1.688
            distance: {atom[BPin42]}, {atom[H11]}, 1.378
            distance: {atom[BPin42]}, {atom[C]}, 1.749
            angle: {atom[BCat10]}, {atom[H11]}, {atom[BPin42]}, 89.85
            angle: {atom[BCat10]}, {atom[C]}, {atom[BPin42]}, 66.22
            $end
            """).strip()

        if block:
            if "detailed_input_str" in inp:
                inp["detailed_input_str"] += "\n\n" + block
            else:
                inp["detailed_input_str"] = block

        return inp

    result = self._run_engine(
        df, self.gxtb_fn, prefix, build_gxtb, save_step, lowest
    )
    result.attrs.setdefault("frust_steps", {}).setdefault(prefix, {}).update(
        {"engine": "gxtb", "options": opts}
    )
    return result

orca

orca(df: DataFrame, name: str = 'orca', options: dict | None = None, xtra_inp_str: str = '', constraint: bool = False, save_step: bool = False, save_files: list[str] | None = None, lowest: int | None = None, uma: str | None = None, read_files: list | None = None, use_last_hess: bool = False, n_cores: int | None = None, uma_server: bool = True, uma_device: str = 'cpu', uma_cache_dir: str | None = None, uma_offline: bool = False, uma_server_cores: int | None = None, uma_memory_per_thread_mib: int = 500, uma_keep_logs: bool | str = 'on_failure', uma_log_dir: str | None = None, gxtb: bool = False, gxtb_exe: str | None = None, gxtb_ext_params: str | None = None, **kw) -> pd.DataFrame

Run ORCA calculations and attach results to the DataFrame.

Args: df (pd.DataFrame): DataFrame of conformers to compute. Must include: - atoms: list of atomic symbols - one coordinate column, typically coords_embedded or a prior *-oc column - constraint_atoms when constraint=True - substrate_name when lowest is used - a prior *.hess column when use_last_hess=True name (str): Base name for the ORCA step, used to prefix result columns. options (dict): ORCA input keywords, e.g. {'wB97X-D3': None, '6-31G**': None, 'OptTS': None, 'Freq': None}. xtra_inp_str (str, optional): Additional ORCA input block such as CPCM settings or custom geometry directives. Defaults to "". constraint (bool, optional): If True, applies predefined distance and angle constraints for supported step_type values. Requires Stepper(step_type=...) with one of TS1, TS2, TS3, TS4, or INT3. Defaults to False. save_step (bool, optional): If True, saves ORCA run directories for inspection. Defaults to False. save_files (list[str] or None, optional): Specific ORCA output files to retain when partial saving is enabled. If omitted and instance-level output saving is enabled, defaults to ["orca.out"]. lowest (int or None, optional): If set, keeps only the lowest-energy conformer per structure group. Defaults to None. uma (str or None, optional): Optional UMA task/profile identifier used to inject ORCA external optimization settings. Defaults to None. read_files (list or None, optional): Deposit contents from files in the work directory directly into the dataframe, e.g. ["input.hess"]. use_last_hess (bool, optional): If True, scan the dataframe for the most recent *.hess column and feed it back to ORCA as private_input.hess. Defaults to False. n_cores (int or None, optional): If set, overrides the Stepper's default core count for this ORCA call only. Defaults to None. uma_server (bool, optional): If True, runs UMA through OET's server/client mode. If False, uses standalone oet_uma. Defaults to True. uma_device (str, optional): OET UMA device argument, typically "cpu" or "cuda". Defaults to "cpu". uma_cache_dir (str or None, optional): Optional FairChem cache directory passed to OET UMA. Defaults to None. uma_offline (bool, optional): If True, asks OET UMA to use offline mode. Defaults to False. uma_server_cores (int or None, optional): Total core budget passed to oet_server --nthreads. Defaults to this ORCA call's core count. uma_memory_per_thread_mib (int, optional): Memory budget passed to oet_server --memory-per-thread. Defaults to 500. uma_keep_logs (bool or str, optional): Server-log retention policy: "on_failure" preserves logs only when the UMA-backed ORCA step fails, True/"always" keeps logs, and False/"never" removes them. Defaults to "on_failure". uma_log_dir (str or None, optional): Directory for preserved UMA server logs. If omitted, transient logs are written to a temp directory and preserved failures are copied to UMA-logs. gxtb (bool, optional): If True, runs ORCA with OET g-xTB v2 as an external method provider. ORCA still owns Opt, OptTS, NEB-TS, and related run types. Defaults to False. gxtb_exe (str or None, optional): Optional path to the g-xTB v2 xtb executable. If omitted, GXTB_EXE is used. gxtb_ext_params (str or None, optional): Extra parameters appended to OET oet_gxtb through ORCA Ext_Params.

Returns: pd.DataFrame: The input DataFrame extended with stage-prefixed ORCA output columns, typically including: - {prefix}-NT - {prefix}-EE - {prefix}-oc for optimization jobs - {prefix}-vibs and {prefix}-GE for frequency jobs - {prefix}-error when a row-level engine failure occurs - saved file-content columns when ORCA returns them

Source code in frust/stepper.py
 933
 934
 935
 936
 937
 938
 939
 940
 941
 942
 943
 944
 945
 946
 947
 948
 949
 950
 951
 952
 953
 954
 955
 956
 957
 958
 959
 960
 961
 962
 963
 964
 965
 966
 967
 968
 969
 970
 971
 972
 973
 974
 975
 976
 977
 978
 979
 980
 981
 982
 983
 984
 985
 986
 987
 988
 989
 990
 991
 992
 993
 994
 995
 996
 997
 998
 999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
def orca(
    self,
    df: pd.DataFrame,
    name: str = "orca",
    options: dict | None = None,
    xtra_inp_str: str = "",
    constraint: bool = False,
    save_step: bool = False,
    save_files: list[str] | None = None,
    lowest: int | None = None,
    uma: str | None = None,
    read_files: list | None = None,
    use_last_hess: bool = False,
    n_cores: int | None = None,
    uma_server: bool = True,
    uma_device: str = "cpu",
    uma_cache_dir: str | None = None,
    uma_offline: bool = False,
    uma_server_cores: int | None = None,
    uma_memory_per_thread_mib: int = 500,
    uma_keep_logs: bool | str = "on_failure",
    uma_log_dir: str | None = None,
    gxtb: bool = False,
    gxtb_exe: str | None = None,
    gxtb_ext_params: str | None = None,
    **kw        
) -> pd.DataFrame:
    """Run ORCA calculations and attach results to the DataFrame.

    Args:
        df (pd.DataFrame): DataFrame of conformers to compute. Must include:
            - ``atoms``: list of atomic symbols
            - one coordinate column, typically ``coords_embedded`` or a
              prior ``*-oc`` column
            - ``constraint_atoms`` when ``constraint=True``
            - ``substrate_name`` when ``lowest`` is used
            - a prior ``*.hess`` column when ``use_last_hess=True``
        name (str): Base name for the ORCA step, used to prefix result
            columns.
        options (dict): ORCA input keywords, e.g.
            ``{'wB97X-D3': None, '6-31G**': None, 'OptTS': None,
            'Freq': None}``.
        xtra_inp_str (str, optional): Additional ORCA input block such as
            CPCM settings or custom geometry directives. Defaults to
            ``""``.
        constraint (bool, optional): If ``True``, applies predefined
            distance and angle constraints for supported ``step_type``
            values. Requires ``Stepper(step_type=...)`` with one of
            ``TS1``, ``TS2``, ``TS3``, ``TS4``, or ``INT3``. Defaults to
            ``False``.
        save_step (bool, optional): If ``True``, saves ORCA run
            directories for inspection. Defaults to ``False``.
        save_files (list[str] or None, optional): Specific ORCA output
            files to retain when partial saving is enabled. If omitted and
            instance-level output saving is enabled, defaults to
            ``["orca.out"]``.
        lowest (int or None, optional): If set, keeps only the
            lowest-energy conformer per structure group. Defaults to
            ``None``.
        uma (str or None, optional): Optional UMA task/profile identifier
            used to inject ORCA external optimization settings. Defaults to
            ``None``.
        read_files (list or None, optional): Deposit contents from files in
            the work directory directly into the dataframe, e.g.
            ``["input.hess"]``.
        use_last_hess (bool, optional): If ``True``, scan the dataframe for
            the most recent ``*.hess`` column and feed it back to ORCA as
            ``private_input.hess``. Defaults to ``False``.
        n_cores (int or None, optional): If set, overrides the Stepper's
            default core count for this ORCA call only. Defaults to
            ``None``.
        uma_server (bool, optional): If ``True``, runs UMA through OET's
            server/client mode. If ``False``, uses standalone ``oet_uma``.
            Defaults to ``True``.
        uma_device (str, optional): OET UMA device argument, typically
            ``"cpu"`` or ``"cuda"``. Defaults to ``"cpu"``.
        uma_cache_dir (str or None, optional): Optional FairChem cache
            directory passed to OET UMA. Defaults to ``None``.
        uma_offline (bool, optional): If ``True``, asks OET UMA to use
            offline mode. Defaults to ``False``.
        uma_server_cores (int or None, optional): Total core budget passed
            to ``oet_server --nthreads``. Defaults to this ORCA call's
            core count.
        uma_memory_per_thread_mib (int, optional): Memory budget passed to
            ``oet_server --memory-per-thread``. Defaults to ``500``.
        uma_keep_logs (bool or str, optional): Server-log retention policy:
            ``"on_failure"`` preserves logs only when the UMA-backed ORCA
            step fails, ``True``/``"always"`` keeps logs, and
            ``False``/``"never"`` removes them. Defaults to
            ``"on_failure"``.
        uma_log_dir (str or None, optional): Directory for preserved UMA
            server logs. If omitted, transient logs are written to a temp
            directory and preserved failures are copied to ``UMA-logs``.
        gxtb (bool, optional): If ``True``, runs ORCA with OET g-xTB v2 as
            an external method provider. ORCA still owns ``Opt``,
            ``OptTS``, ``NEB-TS``, and related run types. Defaults to
            ``False``.
        gxtb_exe (str or None, optional): Optional path to the g-xTB v2
            ``xtb`` executable. If omitted, ``GXTB_EXE`` is used.
        gxtb_ext_params (str or None, optional): Extra parameters appended
            to OET ``oet_gxtb`` through ORCA ``Ext_Params``.

    Returns:
        pd.DataFrame: The input DataFrame extended with stage-prefixed ORCA
        output columns, typically including:
            - ``{prefix}-NT``
            - ``{prefix}-EE``
            - ``{prefix}-oc`` for optimization jobs
            - ``{prefix}-vibs`` and ``{prefix}-GE`` for
              frequency jobs
            - ``{prefix}-error`` when a row-level engine failure occurs
            - saved file-content columns when ORCA returns them
    """
    opts = dict(options or {})
    if kw:
        unknown = ", ".join(sorted(kw))
        raise TypeError(f"Unexpected orca() keyword arguments: {unknown}")
    if uma is not None and gxtb:
        raise ValueError("orca() cannot use both UMA and g-xTB external methods")
    if gxtb and "Freq" in opts:
        raise ValueError(
            "ORCA Freq is not compatible with g-xTB ExtOpt. "
            "Use NumFreq for finite-difference frequencies with external g-xTB gradients."
        )
    if gxtb and "calc_hess" in (xtra_inp_str or "").lower():
        raise ValueError(
            "ORCA %geom Calc_Hess is not compatible with g-xTB ExtOpt. "
            "Use OptTS with the approximate Hessian, and add NumFreq only when you need "
            "a post-optimization numerical frequency check."
        )
    if gxtb and "ExtOpt" not in opts:
        opts = {"ExtOpt": None, **opts}
    if constraint:
        self._validate_constraint_request(df)
    if save_files is None and self.save_output:
        save_files = ["orca.out"]
    keys = list(opts)
    if len(keys) < 1:
        raise ValueError("`options` must include at least one ORCA method key")

    if name != "orca":
        prefix = name
    elif len(keys) == 1:
        prefix = f"{name}-{keys[0]}"
    else:
        func, basis = keys[0], keys[1]
        opt_flag = next((k for k in keys[2:] if k in ("OptTS", "Freq", "NumFreq", "NoSym")), None)
        prefix = f"{name}-{func}-{basis}" + (f"-{opt_flag}" if opt_flag else "")

    def build_orca(row: Series) -> dict:
        step_type = self._step_type_upper()
        inp = {
            "options": opts,
            "xtra_inp_str": xtra_inp_str.strip(),
            "memory": self.memory_gb,
            "n_cores": int(n_cores) if n_cores is not None else self.n_cores,
            "read_files": read_files,
        }

        if "Freq" in opts:
            block = textwrap.dedent("""
                %geom
                  Calc_Hess true
                end
            """).strip()
            inp["xtra_inp_str"] += ("\n\n" + block) if inp["xtra_inp_str"] else block

        if use_last_hess:
            block = textwrap.dedent(
                """
                %geom
                  inhess Read
                  InHessName "private_input.hess"
                end
                """
            ).strip()
            inp["xtra_inp_str"] += ("\n\n" + block) if inp["xtra_inp_str"] else block

        if constraint and step_type == "TS1":
            atom = self._constraint_atoms(row)
            B, N, H, C = atom[0], atom[1], atom[4], atom[5]
            block = textwrap.dedent(f"""
                %geom Constraints
                  {{B {B} {H} 2.07696 C}}
                  {{B {N} {H} 1.5127 C}}
                  {{B {H} {C} 1.29095 C}}
                  {{B {B} {C} 1.68461 C}}
                  {{B {B} {N} 3.06223 C}}
                  {{A {N} {H} {C} C}}
                  {{A {H} {C} {B} 87.4870 C}}
                end
                end
            """).strip() # {{A {N} {H} {C} 170.1342 C}}
            inp["xtra_inp_str"] += ("\n\n" + block) if inp["xtra_inp_str"] else block

        if constraint and step_type == "TS2":
            atom = self._constraint_atoms(row)
            BCat, N17, H40, H41 = atom[0], atom[1], atom[4], atom[3]
            block = textwrap.dedent(f"""
                %geom Constraints
                  {{B {BCat} {H41} 1.656 C}}
                  {{B {N17} {H40} 1.961 C}}
                  {{B {BCat} {N17} 3.080 C}}
                  {{A {BCat} {H41} {N17} 86.58 C}}
                end
                end
            """).strip()
            inp["xtra_inp_str"] += ("\n\n" + block) if inp["xtra_inp_str"] else block

        if constraint and step_type == "TS3":
            atom = self._constraint_atoms(row)
            BCat, H11, BPin, H21, C = atom[0], atom[2], atom[3], atom[4], atom[5]
            block = textwrap.dedent(f"""
                %geom Constraints
                  {{B {H21} {BCat} 1.376 C}}
                  {{B {H21} {BPin} 1.264 C}}
                  {{B {H21} {C} 2.477 C}}
                  {{B {BCat} {C} 1.616 C}}
                  {{B {BPin} {C} 2.180 C}}
                  {{B {BPin} {BCat} 2.007 C}}
                  {{A {BCat} {H21} {BPin} 98.89 C}}
                  {{A {BCat} {C} {BPin} 61.75 C}}
                end
                end
            """).strip()
            inp["xtra_inp_str"] += ("\n\n" + block) if inp["xtra_inp_str"] else block

        if constraint and step_type == "TS4":
            atom = self._constraint_atoms(row)
            BCat, H12, H13, BPin, C = atom[0], atom[2], atom[3], atom[4], atom[5]  # noqa: F841
            block = textwrap.dedent(f"""
                %geom Constraints
                  {{B {BCat} {BPin} 2.219 C}}
                  {{B {BPin} {H13} 1.868 C}}
                  {{B {C} {H13} 2.489 C}}
                  {{B {BCat} {H13} 1.216 C}}
                  {{B {BCat} {C} 1.946 C}}
                  {{B {BPin} {C} 1.585 C}}
                  {{A {BCat} {H13} {BPin} 89.48 C}}
                  {{A {BCat} {C} {BPin} 77.13 C}}
                end
                end
            """).strip()
            inp["xtra_inp_str"] += ("\n\n" + block) if inp["xtra_inp_str"] else block

        if constraint and step_type == "INT3":
            atom = self._constraint_atoms(row)
            BCat, BPin, H11, C = atom[0], atom[3], atom[4], atom[5]
            block = textwrap.dedent(f"""
                %geom Constraints
                  {{B {BCat} {H11} 1.279 C}}
                  {{B {BCat} {C} 1.688 C}}
                  {{B {BPin} {H11} 1.378 C}}
                  {{B {BPin} {C} 1.749 C}}
                  {{A {BCat} {H11} {BPin} 89.85 C}}
                  {{A {BCat} {C} {BPin} 66.22 C}}
                end
                end
            """).strip()
            inp["xtra_inp_str"] += ("\n\n" + block) if inp["xtra_inp_str"] else block

        return inp

    if uma is None and not gxtb:
        result = self._run_engine(df, self.orca_fn, prefix, build_orca, save_step, lowest, save_files, use_last_hess)
        result.attrs.setdefault("frust_steps", {}).setdefault(prefix, {}).update(
            {"engine": "orca", "options": opts}
        )
        return result

    if gxtb:
        from frust.utils.gxtb import gxtb_orca_block

        client_block = gxtb_orca_block(gxtb_exe=gxtb_exe, ext_params=gxtb_ext_params)

        def build_orca_gxtb(row: Series) -> dict:
            inp = build_orca(row)
            xin = inp.get("xtra_inp_str", "")
            inp["xtra_inp_str"] = (xin + "\n\n" + client_block).strip() if xin else client_block
            return inp

        result = self._run_engine(
            df,
            self.orca_fn,
            prefix,
            build_orca_gxtb,
            save_step,
            lowest,
            save_files,
            use_last_hess,
        )
        result.attrs.setdefault("frust_steps", {}).setdefault(prefix, {}).update(
            {
                "engine": "orca",
                "options": opts,
                "gxtb": True,
                "gxtb_exe": gxtb_exe,
            }
        )
        return result

    from frust.utils.uma import parse_uma_spec, uma_orca_block, uma_server as run_uma_server

    spec = parse_uma_spec(
        uma,
        device=uma_device,
        cache_dir=uma_cache_dir,
        offline=uma_offline,
    )

    def run_with_uma_block(client_block: str) -> pd.DataFrame:
        orig_build = build_orca

        def build_orca_uma(row: Series) -> dict:
            inp = orig_build(row)
            xin = inp.get("xtra_inp_str", "")
            inp["xtra_inp_str"] = (xin + "\n\n" + client_block).strip() if xin else client_block
            return inp

        result = self._run_engine(df, self.orca_fn, prefix, build_orca_uma, save_step, lowest, save_files)
        result.attrs.setdefault("frust_steps", {}).setdefault(prefix, {}).update(
            {
                "engine": "orca",
                "options": opts,
                "uma": uma,
                "uma_task": spec.task,
                "uma_model": spec.model,
                "uma_server": uma_server,
            }
        )
        return result

    def uma_result_failed(result: pd.DataFrame) -> bool:
        nt_col = output_column(prefix, "normal_termination")
        err_col = output_column(prefix, "error")
        if nt_col in result and result[nt_col].eq(False).any():
            return True
        if err_col in result and result[err_col].notna().any():
            return True
        return False

    if not uma_server:
        client_block = uma_orca_block(spec, server=False)
        return run_with_uma_block(client_block)

    effective_cores = int(n_cores) if n_cores is not None else self.n_cores
    server_cores = uma_server_cores if uma_server_cores is not None else effective_cores
    with run_uma_server(
        log_dir=uma_log_dir,
        keep_logs=uma_keep_logs,
        use_gpu=uma_device == "cuda",
        server_cores=server_cores,
        memory_per_thread_mib=uma_memory_per_thread_mib,
    ) as server_handle:
        client_block = uma_orca_block(spec, server=True, bind=server_handle.bind)
        result = run_with_uma_block(client_block)
        if uma_keep_logs == "on_failure" and uma_result_failed(result):
            server_handle.preserve()
        return result