Skip to content

Utility Modules

g-xTB

gxtb

get_gxtb_exe

get_gxtb_exe(gxtb_exe: str | None = None) -> Path

Return the g-xTB v2 xtb executable path.

Source code in frust/utils/gxtb.py
 8
 9
10
11
12
13
14
15
16
17
18
def get_gxtb_exe(gxtb_exe: str | None = None) -> Path:
    """Return the g-xTB v2 xtb executable path."""
    value = gxtb_exe or os.environ.get("GXTB_EXE")
    if not value:
        raise RuntimeError("Set GXTB_EXE to the g-xTB v2 xtb executable.")
    path = Path(value).expanduser()
    if not path.exists():
        raise RuntimeError(f"g-xTB executable does not exist: {path}")
    if not os.access(path, os.X_OK):
        raise RuntimeError(f"g-xTB executable is not executable: {path}")
    return path

gxtb_ext_params

gxtb_ext_params(*, gxtb_exe: str | None = None, extra_params: str | None = None) -> str

Build Ext_Params for OET's g-xTB v2 wrapper.

Source code in frust/utils/gxtb.py
30
31
32
33
34
35
36
37
38
39
def gxtb_ext_params(
    *,
    gxtb_exe: str | None = None,
    extra_params: str | None = None,
) -> str:
    """Build Ext_Params for OET's g-xTB v2 wrapper."""
    args = ["--exe", str(get_gxtb_exe(gxtb_exe))]
    if extra_params:
        args.extend(shlex.split(extra_params))
    return shlex.join(args)

gxtb_orca_block

gxtb_orca_block(*, gxtb_exe: str | None = None, ext_params: str | None = None, tools: Path | None = None) -> str

Build the ORCA method block for OET g-xTB v2 external calculations.

Source code in frust/utils/gxtb.py
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
def gxtb_orca_block(
    *,
    gxtb_exe: str | None = None,
    ext_params: str | None = None,
    tools: Path | None = None,
) -> str:
    """Build the ORCA method block for OET g-xTB v2 external calculations."""
    prog = oet_gxtb_bin(tools=tools)
    params = gxtb_ext_params(gxtb_exe=gxtb_exe, extra_params=ext_params)
    return f"""
%method
ProgExt "{prog}"
Ext_Params "{params}"
end
%output
Print[P_EXT_OUT] 1
Print[P_EXT_GRAD] 1
end
""".strip()

oet_gxtb_bin

oet_gxtb_bin(*, tools: Path | None = None) -> Path

Return the OET g-xTB wrapper executable.

Source code in frust/utils/gxtb.py
21
22
23
24
25
26
27
def oet_gxtb_bin(*, tools: Path | None = None) -> Path:
    """Return the OET g-xTB wrapper executable."""
    root = tools or get_oet_tools()
    exe = root / "bin" / "oet_gxtb"
    if not exe.exists():
        raise RuntimeError(f"Expected OET g-xTB executable not found: {exe}")
    return exe

UMA

uma

UmaServerHandle dataclass

Source code in frust/utils/uma.py
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
@dataclass
class UmaServerHandle:
    bind: str
    log_path: str
    _preserve_dir: Path
    _preserved_path: str | None = None

    def __iter__(self):
        yield self.bind
        yield self.log_path

    def preserve(self) -> str:
        """Copy the transient server log to the configured preserved-log directory."""
        if self._preserved_path is not None:
            return self._preserved_path
        src = Path(self.log_path)
        self._preserve_dir.mkdir(parents=True, exist_ok=True)
        dest = self._preserve_dir / src.name
        if src.exists() and src.resolve() != dest.resolve():
            shutil.copy2(src, dest)
        self._preserved_path = str(dest)
        return self._preserved_path

preserve

preserve() -> str

Copy the transient server log to the configured preserved-log directory.

Source code in frust/utils/uma.py
40
41
42
43
44
45
46
47
48
49
50
def preserve(self) -> str:
    """Copy the transient server log to the configured preserved-log directory."""
    if self._preserved_path is not None:
        return self._preserved_path
    src = Path(self.log_path)
    self._preserve_dir.mkdir(parents=True, exist_ok=True)
    dest = self._preserve_dir / src.name
    if src.exists() and src.resolve() != dest.resolve():
        shutil.copy2(src, dest)
    self._preserved_path = str(dest)
    return self._preserved_path

oet_bin

oet_bin(name: str, *, tools: Path | None = None) -> Path

Return an OET 2 executable path and validate it exists.

Source code in frust/utils/uma.py
 95
 96
 97
 98
 99
100
101
def oet_bin(name: str, *, tools: Path | None = None) -> Path:
    """Return an OET 2 executable path and validate it exists."""
    root = tools or get_oet_tools()
    exe = root / "bin" / name
    if not exe.exists():
        raise RuntimeError(f"Expected OET executable not found: {exe}")
    return exe

parse_uma_spec

parse_uma_spec(uma: str, *, device: str = 'cpu', cache_dir: str | None = None, offline: bool = False) -> UmaSpec

Parse FRUST's task or task@model UMA shorthand.

Source code in frust/utils/uma.py
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
def parse_uma_spec(
    uma: str,
    *,
    device: str = "cpu",
    cache_dir: str | None = None,
    offline: bool = False,
) -> UmaSpec:
    """Parse FRUST's ``task`` or ``task@model`` UMA shorthand."""
    value = uma.strip() if isinstance(uma, str) else ""
    if not value:
        raise ValueError("UMA spec must be a non-empty string")

    if "@" in value:
        task, model = value.split("@", 1)
        task = task.strip()
        model = model.strip()
    else:
        task = value
        model = DEFAULT_UMA_MODEL

    if not task:
        raise ValueError(f"UMA spec {uma!r} is missing a task before '@'")
    if not model:
        raise ValueError(f"UMA spec {uma!r} is missing a model after '@'")

    return UmaSpec(
        task=task,
        model=model,
        device=device,
        cache_dir=cache_dir,
        offline=offline,
    )

uma_server

uma_server(*, log_dir: str | None = None, keep_logs: bool | str = 'on_failure', use_gpu: bool = False, server_cores: int | None = None, memory_per_thread_mib: int = 500, port: int | None = None)

Run an OET 2 UMA server bound to localhost for the current process.

Source code in frust/utils/uma.py
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
@contextmanager
def uma_server(
    *,
    log_dir: str | None = None,
    keep_logs: bool | str = "on_failure",
    use_gpu: bool = False,
    server_cores: int | None = None,
    memory_per_thread_mib: int = 500,
    port: int | None = None,
):
    """Run an OET 2 UMA server bound to localhost for the current process."""
    log_policy = _normalize_log_policy(keep_logs)
    preserve_dir = Path(log_dir or "UMA-logs")
    temp_log_dir = log_dir is None and log_policy != "always"
    active_log_dir = Path(tempfile.mkdtemp(prefix="frust-uma-")) if temp_log_dir else preserve_dir

    port = port or _free_local_port()
    bind = f"{LOCAL_BIND_HOST}:{port}"
    env = _server_env(use_gpu=use_gpu)

    if server_cores is None:
        server_cores = int(env.get("SLURM_CPUS_PER_TASK") or (os.cpu_count() or 1))
    server_cores = max(1, int(server_cores))

    active_log_dir.mkdir(parents=True, exist_ok=True)
    log_path = active_log_dir / f"oet_uma_server_{port}.log"
    logf = open(log_path, "wb")

    cmd = [
        str(oet_bin("oet_server")),
        "uma",
        "--bind",
        bind,
        "--nthreads",
        str(server_cores),
        "--memory-per-thread",
        str(int(memory_per_thread_mib)),
    ]

    header = (
        f"[launcher] bind={bind} server_cores={server_cores} "
        f"memory_per_thread_mib={memory_per_thread_mib} "
        f"slurm_job_id={env.get('SLURM_JOB_ID', '')} "
        f"slurm_job_nodelist={env.get('SLURM_JOB_NODELIST', '')} "
        f"cmd={shlex.join(cmd)}\n"
    )
    logf.write(header.encode())
    logf.flush()

    p = subprocess.Popen(
        cmd,
        stdout=logf,
        stderr=subprocess.STDOUT,
        env=env,
        close_fds=True,
        start_new_session=True,
    )

    ready = False
    for _ in range(120):
        if p.poll() is not None:
            break
        try:
            if _healthz_ready(bind):
                ready = True
                break
        except Exception:
            time.sleep(1)

    if not ready:
        try:
            os.killpg(p.pid, signal.SIGTERM)
        except Exception:
            p.terminate()
        logf.close()
        handle = UmaServerHandle(bind=bind, log_path=str(log_path), _preserve_dir=preserve_dir)
        preserved = handle.preserve()
        if temp_log_dir:
            shutil.rmtree(active_log_dir, ignore_errors=True)
        raise RuntimeError(f"OET UMA server failed to start. See log: {preserved}")

    handle = UmaServerHandle(bind=bind, log_path=str(log_path), _preserve_dir=preserve_dir)
    failed = True
    try:
        yield handle
        failed = False
    except Exception:
        if log_policy == "on_failure":
            handle.preserve()
        raise
    finally:
        try:
            os.killpg(p.pid, signal.SIGTERM)
            p.wait(timeout=10)
        except Exception:
            try:
                os.killpg(p.pid, signal.SIGKILL)
            except Exception:
                p.kill()
        logf.flush()
        logf.close()
        if log_policy == "always":
            handle.preserve()
        elif log_policy == "never" and not temp_log_dir:
            try:
                Path(handle.log_path).unlink()
            except FileNotFoundError:
                pass
        elif log_policy == "on_failure" and not failed and handle._preserved_path is None:
            try:
                Path(handle.log_path).unlink()
            except FileNotFoundError:
                pass
        elif failed and log_policy == "on_failure":
            handle.preserve()
        if temp_log_dir:
            shutil.rmtree(active_log_dir, ignore_errors=True)

IO

io

dump_df

dump_df(df: DataFrame, step: str, base_dir: Path) -> Path

If dump_each_step is True, writes DataFrame to base_dir/{step}.csv.

Source code in frust/utils/io.py
12
13
14
15
16
17
18
19
def dump_df(df: pd.DataFrame, step: str, base_dir: Path) -> Path:
    """
    If dump_each_step is True, writes DataFrame to `base_dir/{step}.csv`.
    """
    base_dir.mkdir(parents=True, exist_ok=True)
    path = base_dir / f"{step}.csv"
    df.to_csv(path, index=False)
    return path

read_ts_type_from_xyz

read_ts_type_from_xyz(xyz_file: str)

Reads the transition state (TS) type from the comment line of an XYZ file.

Args: xyz_file (str): Path to the XYZ file containing the transition state structure. The TS type must be specified in the second line as 'TS' followed by a number (e.g., 'TS1 guess', 'TS2').

Returns: str: The transition state type in uppercase (e.g., 'TS1', 'TS2').

Source code in frust/utils/io.py
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
def read_ts_type_from_xyz(xyz_file: str):
    """
    Reads the transition state (TS) type from the comment line of an XYZ file.

    Args:
        xyz_file (str): Path to the XYZ file containing the transition state structure.
            The TS type must be specified in the second line as 'TS' followed by a number
            (e.g., 'TS1 guess', 'TS2').

    Returns:
        str: The transition state type in uppercase (e.g., 'TS1', 'TS2').
    """    
    try:
        with open(xyz_file, 'r') as file:
            file.readline()  # Skip first line
            comment = file.readline()  # Read second line

            if not comment: 
                raise ValueError("XYZ file must have at least 2 lines with a comment on the second line")

    except FileNotFoundError:
        print(f"Error: Transition state structure file not found: {xyz_file}")
        raise
    except PermissionError:
        print(f"Error: Permission denied when accessing file: {xyz_file}")
        raise
    except IOError as e:
        print(f"Error: Failed to read transition state structure file {xyz_file}: {e}")
        raise
    except Exception as e:
        print(f"Unexpected error loading transition state structure from {xyz_file}: {e}")
        raise

    match = re.search(r'\b(?:TS|INT)\d+\b', comment, re.IGNORECASE)
    if match:
        return match.group().upper()
    else:
        raise ValueError(
            "XYZ file must specify a structure type in the comments. "
            "Please include TSX or INTX in the comment (e.g. TS1, INT3)."
        )

write_xyz_structures

write_xyz_structures(df: DataFrame, path: Path | str, coord_options: Mapping[str, str], name_col: str = 'custom_name', atoms_col: str = 'atoms', show_mols: bool = False, **molto3d_kwargs) -> None

Write XYZ structure files from coordinate columns in a dataframe.

Args: df: Dataframe containing atoms, names, and coordinate columns. path: Base directory where the XYZ folders should be created. coord_options: Mapping from output folder/suffix to coordinate column. The key is used both as the subfolder name and filename suffix. name_col: Column containing the base structure name. atoms_col: Column containing atomic symbols. show_mols: Whether to display the written structures. **molto3d_kwargs: Additional keyword arguments passed to MolTo3DGrid.

Source code in frust/utils/io.py
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
def write_xyz_structures(
    df: pd.DataFrame,
    path: Path | str,
    coord_options: Mapping[str, str],
    name_col: str = "custom_name",
    atoms_col: str = "atoms",
    show_mols: bool = False,
    **molto3d_kwargs,
) -> None:
    """Write XYZ structure files from coordinate columns in a dataframe.

    Args:
        df: Dataframe containing atoms, names, and coordinate columns.
        path: Base directory where the XYZ folders should be created.
        coord_options: Mapping from output folder/suffix to coordinate column.
            The key is used both as the subfolder name and filename suffix.
        name_col: Column containing the base structure name.
        atoms_col: Column containing atomic symbols.
        show_mols: Whether to display the written structures.
        **molto3d_kwargs: Additional keyword arguments passed to MolTo3DGrid.
    """
    path = Path(path)

    for option in coord_options:
        (path / option).mkdir(parents=True, exist_ok=True)

    mols = []
    legends = []

    for _, row in df.iterrows():
        name = row[name_col]
        atoms = row[atoms_col]

        for option, coord_col in coord_options.items():
            coords = row[coord_col]
            xyz_str = ac2xyz(atoms, coords)

            xyz_path = path / option / f"{name}_{option}.xyz"

            with open(xyz_path, "w") as f:
                f.write(xyz_str)

            if show_mols:
                mol = Chem.MolFromXYZBlock(xyz_str)
                if mol is not None:
                    rdDetermineBonds.DetermineConnectivity(mol)
                    mols.append(mol)
                    legends.append(f"{name}_{option}")

    if show_mols and mols:
        MolTo3DGrid(mols, legends=legends, **molto3d_kwargs)