Skip to content

Cluster Submission

cluster

ChainPreset

Bases: StrEnum

Named dependent-stage submission presets bundled with FRUST.

Source code in frust/cluster/config.py
10
11
12
13
14
class ChainPreset(StrEnum):
    """Named dependent-stage submission presets bundled with FRUST."""

    TS_PER_RPOS = "ts_per_rpos"
    INT3_PER_RPOS = "int3_per_rpos"

ClusterConfig dataclass

Cluster and executor settings shared across submitted jobs.

Parameters:

Name Type Description Default
backend ('slurm', 'local')

Execution backend. Use "slurm" for cluster submission through :mod:submitit or "local" for local testing. Defaults to "slurm".

"slurm"
partition str or None

Slurm partition name. Ignored for the local backend.

None
log_dir str or Path

Directory in which submitit writes executor logs.

'logs'
work_dir str or Path or None

Optional scratch or work directory forwarded to FRUST pipelines when they accept a work_dir argument.

None
extra_slurm_parameters dict[str, str] or None

Additional scheduler parameters forwarded as slurm_additional_parameters.

None
Source code in frust/cluster/config.py
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
@dataclass(frozen=True)
class ClusterConfig:
    """Cluster and executor settings shared across submitted jobs.

    Parameters
    ----------
    backend : {"slurm", "local"}, optional
        Execution backend. Use ``"slurm"`` for cluster submission through
        :mod:`submitit` or ``"local"`` for local testing. Defaults to
        ``"slurm"``.
    partition : str or None, optional
        Slurm partition name. Ignored for the local backend.
    log_dir : str or pathlib.Path, optional
        Directory in which submitit writes executor logs.
    work_dir : str or pathlib.Path or None, optional
        Optional scratch or work directory forwarded to FRUST pipelines when
        they accept a ``work_dir`` argument.
    extra_slurm_parameters : dict[str, str] or None, optional
        Additional scheduler parameters forwarded as
        ``slurm_additional_parameters``.
    """

    backend: str = "slurm"
    partition: str | None = None
    log_dir: str | Path = "logs"
    work_dir: str | Path | None = None
    extra_slurm_parameters: dict[str, str] | None = None

JobSubmissionResult dataclass

Summary information returned after submission.

Parameters:

Name Type Description Default
job_ids list[str or int]

Scheduler or executor job identifiers in submission order.

required
tags list[str]

Sanitized job tags used for naming and logging.

required
save_dirs list[str]

Output directories associated with the submitted jobs.

required
mode str

Submitted workflow mode, such as a pipeline name or chain preset.

required
backend str

Backend used for submission, typically "slurm" or "local".

required
Source code in frust/cluster/config.py
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
@dataclass(frozen=True)
class JobSubmissionResult:
    """Summary information returned after submission.

    Parameters
    ----------
    job_ids : list[str or int]
        Scheduler or executor job identifiers in submission order.
    tags : list[str]
        Sanitized job tags used for naming and logging.
    save_dirs : list[str]
        Output directories associated with the submitted jobs.
    mode : str
        Submitted workflow mode, such as a pipeline name or chain preset.
    backend : str
        Backend used for submission, typically ``"slurm"`` or ``"local"``.
    """

    job_ids: list[str | int]
    tags: list[str]
    save_dirs: list[str]
    mode: str
    backend: str

Resources dataclass

Execution resources for a single submitted job.

Parameters:

Name Type Description Default
cpus int

Number of CPU cores requested for the job.

required
mem_gb int or float

Memory requested for the job in gigabytes.

required
timeout_min int

Wall-clock timeout in minutes.

required
Source code in frust/cluster/config.py
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
@dataclass(frozen=True)
class Resources:
    """Execution resources for a single submitted job.

    Parameters
    ----------
    cpus : int
        Number of CPU cores requested for the job.
    mem_gb : int or float
        Memory requested for the job in gigabytes.
    timeout_min : int
        Wall-clock timeout in minutes.
    """

    cpus: int
    mem_gb: int | float
    timeout_min: int

submit_chain

submit_chain(*, csv_path: str | Path, preset: str | None = None, module_path: str | None = None, stage_order: list[str] | None = None, ts_xyz: str | Path, out_dir: str | Path, cluster: ClusterConfig, stage_resources: dict[str, Resources] | None = None, debug: bool = False, production: bool = True, n_confs: int | None = None, functional: str | None = None, basisset: str | None = None, basisset_solv: str | None = None, save_output_dir: bool = True, work_dir: str | Path | None = None) -> JobSubmissionResult

Submit a dependent stage chain from a CSV input file.

Parameters:

Name Type Description Default
csv_path str or Path

Path to a CSV file containing at least a smiles column.

required
preset str or None

Built-in FRUST chain preset, such as "ts_per_rpos" or "int3_per_rpos". Use either preset or the custom module_path/stage_order combination.

None
module_path str or None

Custom Python module containing stage functions for advanced use.

None
stage_order list[str] or None

Explicit stage order for custom chains.

None
ts_xyz str or Path

TS template XYZ file used to prepare dependent chain inputs.

required
out_dir str or Path

Root directory under which per-tag stage outputs are written.

required
cluster ClusterConfig

Shared cluster or local-executor configuration.

required
stage_resources dict[str, Resources] or None

Optional per-stage resource overrides.

None
debug bool

Forwarded to the stage functions when supported.

False
production bool

If True and n_confs is None, preserve the stage default conformer behavior.

True
n_confs int or None

Conformer count forwarded to initialization stages when supported.

None
functional str or None

ORCA functional override for preset stage modules. If omitted, the preset module defaults are used.

None
basisset str or None

ORCA basis set override for gas-phase preset stages. If omitted, the preset module defaults are used.

None
basisset_solv str or None

ORCA basis set override for solvent single-point preset stages. If omitted, the preset module defaults are used.

None
save_output_dir bool

Forwarded to initialization stages when supported.

True
work_dir str or Path or None

Optional work directory override. If omitted, cluster.work_dir is used.

None

Returns:

Type Description
JobSubmissionResult

Summary of the submitted chain jobs.

Source code in frust/cluster/facade.py
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
def submit_chain(
    *,
    csv_path: str | Path,
    preset: str | None = None,
    module_path: str | None = None,
    stage_order: list[str] | None = None,
    ts_xyz: str | Path,
    out_dir: str | Path,
    cluster: ClusterConfig,
    stage_resources: dict[str, Resources] | None = None,
    debug: bool = False,
    production: bool = True,
    n_confs: int | None = None,
    functional: str | None = None,
    basisset: str | None = None,
    basisset_solv: str | None = None,
    save_output_dir: bool = True,
    work_dir: str | Path | None = None,
) -> JobSubmissionResult:
    """Submit a dependent stage chain from a CSV input file.

    Parameters
    ----------
    csv_path : str or pathlib.Path
        Path to a CSV file containing at least a ``smiles`` column.
    preset : str or None, optional
        Built-in FRUST chain preset, such as ``"ts_per_rpos"`` or
        ``"int3_per_rpos"``. Use either ``preset`` or the custom
        ``module_path``/``stage_order`` combination.
    module_path : str or None, optional
        Custom Python module containing stage functions for advanced use.
    stage_order : list[str] or None, optional
        Explicit stage order for custom chains.
    ts_xyz : str or pathlib.Path
        TS template XYZ file used to prepare dependent chain inputs.
    out_dir : str or pathlib.Path
        Root directory under which per-tag stage outputs are written.
    cluster : frust.cluster.config.ClusterConfig
        Shared cluster or local-executor configuration.
    stage_resources : dict[str, Resources] or None, optional
        Optional per-stage resource overrides.
    debug : bool, optional
        Forwarded to the stage functions when supported.
    production : bool, optional
        If ``True`` and ``n_confs`` is ``None``, preserve the stage default
        conformer behavior.
    n_confs : int or None, optional
        Conformer count forwarded to initialization stages when supported.
    functional : str or None, optional
        ORCA functional override for preset stage modules. If omitted, the
        preset module defaults are used.
    basisset : str or None, optional
        ORCA basis set override for gas-phase preset stages. If omitted, the
        preset module defaults are used.
    basisset_solv : str or None, optional
        ORCA basis set override for solvent single-point preset stages. If
        omitted, the preset module defaults are used.
    save_output_dir : bool, optional
        Forwarded to initialization stages when supported.
    work_dir : str or pathlib.Path or None, optional
        Optional work directory override. If omitted, ``cluster.work_dir`` is
        used.

    Returns
    -------
    frust.cluster.config.JobSubmissionResult
        Summary of the submitted chain jobs.
    """
    return submit_chain_jobs(
        csv_path=csv_path,
        preset=preset,
        module_path=module_path,
        stage_order=stage_order,
        ts_xyz=ts_xyz,
        out_dir=out_dir,
        cluster=cluster,
        stage_resources=stage_resources,
        debug=debug,
        production=production,
        n_confs=n_confs,
        functional=functional,
        basisset=basisset,
        basisset_solv=basisset_solv,
        save_output_dir=save_output_dir,
        work_dir=work_dir,
    )

submit_jobs

submit_jobs(*, csv_path: str | Path, pipeline: str, out_dir: str | Path, cluster: ClusterConfig, resources: Resources, ts_xyz: str | Path | None = None, debug: bool = False, production: bool = True, n_confs: int | None = None, save_output_dir: bool = True, dft: bool = False, select_mols: str | list[str] = 'all', work_dir: str | Path | None = None) -> JobSubmissionResult

Submit independent FRUST workflow jobs from a CSV input file.

Parameters:

Name Type Description Default
csv_path str or Path

Path to a CSV file containing at least a smiles column.

required
pipeline str

High-level pipeline name from :mod:frust.pipes.

required
out_dir str or Path

Output directory under which parquet files and run outputs are written.

required
cluster ClusterConfig

Shared cluster or local-executor configuration.

required
resources Resources

CPU, memory, and timeout settings for every submitted job in this submission call.

required
ts_xyz str or Path or None

TS template XYZ file required by TS-dependent pipelines.

None
debug bool

Forwarded to the selected FRUST pipeline.

False
production bool

If True and n_confs is None, preserve the pipeline default conformer behavior.

True
n_confs int or None

Conformer count forwarded to the selected pipeline when supported.

None
save_output_dir bool

Forwarded to the selected FRUST pipeline.

True
dft bool

Forwarded to the selected FRUST pipeline as DFT when supported.

False
select_mols str or list[str]

Molecule selection forwarded to run_mols when supported.

'all'
work_dir str or Path or None

Optional work directory override. If omitted, cluster.work_dir is used.

None

Returns:

Type Description
JobSubmissionResult

Summary of the submitted jobs, including scheduler ids and tags.

Raises:

Type Description
ValueError

If the pipeline name is unsupported, if required TS inputs are missing, or if the CSV input is invalid.

Source code in frust/cluster/facade.py
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
def submit_jobs(
    *,
    csv_path: str | Path,
    pipeline: str,
    out_dir: str | Path,
    cluster: ClusterConfig,
    resources: Resources,
    ts_xyz: str | Path | None = None,
    debug: bool = False,
    production: bool = True,
    n_confs: int | None = None,
    save_output_dir: bool = True,
    dft: bool = False,
    select_mols: str | list[str] = "all",
    work_dir: str | Path | None = None,
) -> JobSubmissionResult:
    """Submit independent FRUST workflow jobs from a CSV input file.

    Parameters
    ----------
    csv_path : str or pathlib.Path
        Path to a CSV file containing at least a ``smiles`` column.
    pipeline : str
        High-level pipeline name from :mod:`frust.pipes`.
    out_dir : str or pathlib.Path
        Output directory under which parquet files and run outputs are written.
    cluster : frust.cluster.config.ClusterConfig
        Shared cluster or local-executor configuration.
    resources : frust.cluster.config.Resources
        CPU, memory, and timeout settings for every submitted job in this
        submission call.
    ts_xyz : str or pathlib.Path or None, optional
        TS template XYZ file required by TS-dependent pipelines.
    debug : bool, optional
        Forwarded to the selected FRUST pipeline.
    production : bool, optional
        If ``True`` and ``n_confs`` is ``None``, preserve the pipeline default
        conformer behavior.
    n_confs : int or None, optional
        Conformer count forwarded to the selected pipeline when supported.
    save_output_dir : bool, optional
        Forwarded to the selected FRUST pipeline.
    dft : bool, optional
        Forwarded to the selected FRUST pipeline as ``DFT`` when supported.
    select_mols : str or list[str], optional
        Molecule selection forwarded to ``run_mols`` when supported.
    work_dir : str or pathlib.Path or None, optional
        Optional work directory override. If omitted, ``cluster.work_dir`` is
        used.

    Returns
    -------
    frust.cluster.config.JobSubmissionResult
        Summary of the submitted jobs, including scheduler ids and tags.

    Raises
    ------
    ValueError
        If the pipeline name is unsupported, if required TS inputs are
        missing, or if the CSV input is invalid.
    """
    prepared = prepare_pipeline_inputs(csv_path, pipeline, ts_xyz=ts_xyz, select_mols=select_mols)
    pipeline_fn = load_pipeline(pipeline)
    sig = inspect.signature(pipeline_fn)

    out_path = Path(out_dir)
    out_path.mkdir(parents=True, exist_ok=True)
    executor = create_executor(cluster)
    job_ids: list[str | int] = []
    tags: list[str] = []
    save_dirs: list[str] = []

    for payload, raw_tag in zip(prepared["payloads"], prepared["tags"]):
        tag = sanitize_tag(raw_tag)
        update_executor(executor, cluster, resources, job_name=f"{sanitize_tag(pipeline)}_{tag}")
        output_parquet = pipeline_output_parquet(out_path, pipeline, tag)

        kwargs = {
            "n_confs": None if production and n_confs is None else n_confs,
            "n_cores": resources.cpus,
            "mem_gb": resources.mem_gb,
            "debug": debug,
            "out_dir": str(out_path),
            "output_parquet": output_parquet,
            "save_output_dir": save_output_dir,
            "DFT": dft,
            "select_mols": select_mols,
            "work_dir": work_dir or cluster.work_dir,
        }

        if pipeline == "run_mols":
            kwargs["ligand_smiles_df"] = payload
        elif pipeline == "run_ts_per_lig":
            kwargs["ligand_smiles_df"] = payload
            kwargs["ts_guess_xyz"] = str(ts_xyz)
        elif pipeline in {"run_ts_per_rpos", "run_ts_per_rpos_UMA", "run_ts_per_rpos_UMA_short", "run_orca_smoke_test"}:
            kwargs["ts_struct"] = payload
        else:
            raise ValueError(f"Unsupported pipeline {pipeline!r}")

        call_kwargs = {k: v for k, v in kwargs.items() if k in sig.parameters}
        job = executor.submit(pipeline_fn, **call_kwargs)
        job_ids.append(getattr(job, "job_id", f"{pipeline}_{tag}"))
        tags.append(tag)
        save_dirs.append(str(out_path))

    print("Submitted job IDs:", job_ids)
    return JobSubmissionResult(
        job_ids=job_ids,
        tags=tags,
        save_dirs=save_dirs,
        mode=pipeline,
        backend=cluster.backend,
    )

config

ChainPreset

Bases: StrEnum

Named dependent-stage submission presets bundled with FRUST.

Source code in frust/cluster/config.py
10
11
12
13
14
class ChainPreset(StrEnum):
    """Named dependent-stage submission presets bundled with FRUST."""

    TS_PER_RPOS = "ts_per_rpos"
    INT3_PER_RPOS = "int3_per_rpos"

ClusterConfig dataclass

Cluster and executor settings shared across submitted jobs.

Parameters:

Name Type Description Default
backend ('slurm', 'local')

Execution backend. Use "slurm" for cluster submission through :mod:submitit or "local" for local testing. Defaults to "slurm".

"slurm"
partition str or None

Slurm partition name. Ignored for the local backend.

None
log_dir str or Path

Directory in which submitit writes executor logs.

'logs'
work_dir str or Path or None

Optional scratch or work directory forwarded to FRUST pipelines when they accept a work_dir argument.

None
extra_slurm_parameters dict[str, str] or None

Additional scheduler parameters forwarded as slurm_additional_parameters.

None
Source code in frust/cluster/config.py
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
@dataclass(frozen=True)
class ClusterConfig:
    """Cluster and executor settings shared across submitted jobs.

    Parameters
    ----------
    backend : {"slurm", "local"}, optional
        Execution backend. Use ``"slurm"`` for cluster submission through
        :mod:`submitit` or ``"local"`` for local testing. Defaults to
        ``"slurm"``.
    partition : str or None, optional
        Slurm partition name. Ignored for the local backend.
    log_dir : str or pathlib.Path, optional
        Directory in which submitit writes executor logs.
    work_dir : str or pathlib.Path or None, optional
        Optional scratch or work directory forwarded to FRUST pipelines when
        they accept a ``work_dir`` argument.
    extra_slurm_parameters : dict[str, str] or None, optional
        Additional scheduler parameters forwarded as
        ``slurm_additional_parameters``.
    """

    backend: str = "slurm"
    partition: str | None = None
    log_dir: str | Path = "logs"
    work_dir: str | Path | None = None
    extra_slurm_parameters: dict[str, str] | None = None

JobSubmissionResult dataclass

Summary information returned after submission.

Parameters:

Name Type Description Default
job_ids list[str or int]

Scheduler or executor job identifiers in submission order.

required
tags list[str]

Sanitized job tags used for naming and logging.

required
save_dirs list[str]

Output directories associated with the submitted jobs.

required
mode str

Submitted workflow mode, such as a pipeline name or chain preset.

required
backend str

Backend used for submission, typically "slurm" or "local".

required
Source code in frust/cluster/config.py
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
@dataclass(frozen=True)
class JobSubmissionResult:
    """Summary information returned after submission.

    Parameters
    ----------
    job_ids : list[str or int]
        Scheduler or executor job identifiers in submission order.
    tags : list[str]
        Sanitized job tags used for naming and logging.
    save_dirs : list[str]
        Output directories associated with the submitted jobs.
    mode : str
        Submitted workflow mode, such as a pipeline name or chain preset.
    backend : str
        Backend used for submission, typically ``"slurm"`` or ``"local"``.
    """

    job_ids: list[str | int]
    tags: list[str]
    save_dirs: list[str]
    mode: str
    backend: str

Resources dataclass

Execution resources for a single submitted job.

Parameters:

Name Type Description Default
cpus int

Number of CPU cores requested for the job.

required
mem_gb int or float

Memory requested for the job in gigabytes.

required
timeout_min int

Wall-clock timeout in minutes.

required
Source code in frust/cluster/config.py
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
@dataclass(frozen=True)
class Resources:
    """Execution resources for a single submitted job.

    Parameters
    ----------
    cpus : int
        Number of CPU cores requested for the job.
    mem_gb : int or float
        Memory requested for the job in gigabytes.
    timeout_min : int
        Wall-clock timeout in minutes.
    """

    cpus: int
    mem_gb: int | float
    timeout_min: int

facade

submit_chain

submit_chain(*, csv_path: str | Path, preset: str | None = None, module_path: str | None = None, stage_order: list[str] | None = None, ts_xyz: str | Path, out_dir: str | Path, cluster: ClusterConfig, stage_resources: dict[str, Resources] | None = None, debug: bool = False, production: bool = True, n_confs: int | None = None, functional: str | None = None, basisset: str | None = None, basisset_solv: str | None = None, save_output_dir: bool = True, work_dir: str | Path | None = None) -> JobSubmissionResult

Submit a dependent stage chain from a CSV input file.

Parameters:

Name Type Description Default
csv_path str or Path

Path to a CSV file containing at least a smiles column.

required
preset str or None

Built-in FRUST chain preset, such as "ts_per_rpos" or "int3_per_rpos". Use either preset or the custom module_path/stage_order combination.

None
module_path str or None

Custom Python module containing stage functions for advanced use.

None
stage_order list[str] or None

Explicit stage order for custom chains.

None
ts_xyz str or Path

TS template XYZ file used to prepare dependent chain inputs.

required
out_dir str or Path

Root directory under which per-tag stage outputs are written.

required
cluster ClusterConfig

Shared cluster or local-executor configuration.

required
stage_resources dict[str, Resources] or None

Optional per-stage resource overrides.

None
debug bool

Forwarded to the stage functions when supported.

False
production bool

If True and n_confs is None, preserve the stage default conformer behavior.

True
n_confs int or None

Conformer count forwarded to initialization stages when supported.

None
functional str or None

ORCA functional override for preset stage modules. If omitted, the preset module defaults are used.

None
basisset str or None

ORCA basis set override for gas-phase preset stages. If omitted, the preset module defaults are used.

None
basisset_solv str or None

ORCA basis set override for solvent single-point preset stages. If omitted, the preset module defaults are used.

None
save_output_dir bool

Forwarded to initialization stages when supported.

True
work_dir str or Path or None

Optional work directory override. If omitted, cluster.work_dir is used.

None

Returns:

Type Description
JobSubmissionResult

Summary of the submitted chain jobs.

Source code in frust/cluster/facade.py
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
def submit_chain(
    *,
    csv_path: str | Path,
    preset: str | None = None,
    module_path: str | None = None,
    stage_order: list[str] | None = None,
    ts_xyz: str | Path,
    out_dir: str | Path,
    cluster: ClusterConfig,
    stage_resources: dict[str, Resources] | None = None,
    debug: bool = False,
    production: bool = True,
    n_confs: int | None = None,
    functional: str | None = None,
    basisset: str | None = None,
    basisset_solv: str | None = None,
    save_output_dir: bool = True,
    work_dir: str | Path | None = None,
) -> JobSubmissionResult:
    """Submit a dependent stage chain from a CSV input file.

    Parameters
    ----------
    csv_path : str or pathlib.Path
        Path to a CSV file containing at least a ``smiles`` column.
    preset : str or None, optional
        Built-in FRUST chain preset, such as ``"ts_per_rpos"`` or
        ``"int3_per_rpos"``. Use either ``preset`` or the custom
        ``module_path``/``stage_order`` combination.
    module_path : str or None, optional
        Custom Python module containing stage functions for advanced use.
    stage_order : list[str] or None, optional
        Explicit stage order for custom chains.
    ts_xyz : str or pathlib.Path
        TS template XYZ file used to prepare dependent chain inputs.
    out_dir : str or pathlib.Path
        Root directory under which per-tag stage outputs are written.
    cluster : frust.cluster.config.ClusterConfig
        Shared cluster or local-executor configuration.
    stage_resources : dict[str, Resources] or None, optional
        Optional per-stage resource overrides.
    debug : bool, optional
        Forwarded to the stage functions when supported.
    production : bool, optional
        If ``True`` and ``n_confs`` is ``None``, preserve the stage default
        conformer behavior.
    n_confs : int or None, optional
        Conformer count forwarded to initialization stages when supported.
    functional : str or None, optional
        ORCA functional override for preset stage modules. If omitted, the
        preset module defaults are used.
    basisset : str or None, optional
        ORCA basis set override for gas-phase preset stages. If omitted, the
        preset module defaults are used.
    basisset_solv : str or None, optional
        ORCA basis set override for solvent single-point preset stages. If
        omitted, the preset module defaults are used.
    save_output_dir : bool, optional
        Forwarded to initialization stages when supported.
    work_dir : str or pathlib.Path or None, optional
        Optional work directory override. If omitted, ``cluster.work_dir`` is
        used.

    Returns
    -------
    frust.cluster.config.JobSubmissionResult
        Summary of the submitted chain jobs.
    """
    return submit_chain_jobs(
        csv_path=csv_path,
        preset=preset,
        module_path=module_path,
        stage_order=stage_order,
        ts_xyz=ts_xyz,
        out_dir=out_dir,
        cluster=cluster,
        stage_resources=stage_resources,
        debug=debug,
        production=production,
        n_confs=n_confs,
        functional=functional,
        basisset=basisset,
        basisset_solv=basisset_solv,
        save_output_dir=save_output_dir,
        work_dir=work_dir,
    )

submit_jobs

submit_jobs(*, csv_path: str | Path, pipeline: str, out_dir: str | Path, cluster: ClusterConfig, resources: Resources, ts_xyz: str | Path | None = None, debug: bool = False, production: bool = True, n_confs: int | None = None, save_output_dir: bool = True, dft: bool = False, select_mols: str | list[str] = 'all', work_dir: str | Path | None = None) -> JobSubmissionResult

Submit independent FRUST workflow jobs from a CSV input file.

Parameters:

Name Type Description Default
csv_path str or Path

Path to a CSV file containing at least a smiles column.

required
pipeline str

High-level pipeline name from :mod:frust.pipes.

required
out_dir str or Path

Output directory under which parquet files and run outputs are written.

required
cluster ClusterConfig

Shared cluster or local-executor configuration.

required
resources Resources

CPU, memory, and timeout settings for every submitted job in this submission call.

required
ts_xyz str or Path or None

TS template XYZ file required by TS-dependent pipelines.

None
debug bool

Forwarded to the selected FRUST pipeline.

False
production bool

If True and n_confs is None, preserve the pipeline default conformer behavior.

True
n_confs int or None

Conformer count forwarded to the selected pipeline when supported.

None
save_output_dir bool

Forwarded to the selected FRUST pipeline.

True
dft bool

Forwarded to the selected FRUST pipeline as DFT when supported.

False
select_mols str or list[str]

Molecule selection forwarded to run_mols when supported.

'all'
work_dir str or Path or None

Optional work directory override. If omitted, cluster.work_dir is used.

None

Returns:

Type Description
JobSubmissionResult

Summary of the submitted jobs, including scheduler ids and tags.

Raises:

Type Description
ValueError

If the pipeline name is unsupported, if required TS inputs are missing, or if the CSV input is invalid.

Source code in frust/cluster/facade.py
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
def submit_jobs(
    *,
    csv_path: str | Path,
    pipeline: str,
    out_dir: str | Path,
    cluster: ClusterConfig,
    resources: Resources,
    ts_xyz: str | Path | None = None,
    debug: bool = False,
    production: bool = True,
    n_confs: int | None = None,
    save_output_dir: bool = True,
    dft: bool = False,
    select_mols: str | list[str] = "all",
    work_dir: str | Path | None = None,
) -> JobSubmissionResult:
    """Submit independent FRUST workflow jobs from a CSV input file.

    Parameters
    ----------
    csv_path : str or pathlib.Path
        Path to a CSV file containing at least a ``smiles`` column.
    pipeline : str
        High-level pipeline name from :mod:`frust.pipes`.
    out_dir : str or pathlib.Path
        Output directory under which parquet files and run outputs are written.
    cluster : frust.cluster.config.ClusterConfig
        Shared cluster or local-executor configuration.
    resources : frust.cluster.config.Resources
        CPU, memory, and timeout settings for every submitted job in this
        submission call.
    ts_xyz : str or pathlib.Path or None, optional
        TS template XYZ file required by TS-dependent pipelines.
    debug : bool, optional
        Forwarded to the selected FRUST pipeline.
    production : bool, optional
        If ``True`` and ``n_confs`` is ``None``, preserve the pipeline default
        conformer behavior.
    n_confs : int or None, optional
        Conformer count forwarded to the selected pipeline when supported.
    save_output_dir : bool, optional
        Forwarded to the selected FRUST pipeline.
    dft : bool, optional
        Forwarded to the selected FRUST pipeline as ``DFT`` when supported.
    select_mols : str or list[str], optional
        Molecule selection forwarded to ``run_mols`` when supported.
    work_dir : str or pathlib.Path or None, optional
        Optional work directory override. If omitted, ``cluster.work_dir`` is
        used.

    Returns
    -------
    frust.cluster.config.JobSubmissionResult
        Summary of the submitted jobs, including scheduler ids and tags.

    Raises
    ------
    ValueError
        If the pipeline name is unsupported, if required TS inputs are
        missing, or if the CSV input is invalid.
    """
    prepared = prepare_pipeline_inputs(csv_path, pipeline, ts_xyz=ts_xyz, select_mols=select_mols)
    pipeline_fn = load_pipeline(pipeline)
    sig = inspect.signature(pipeline_fn)

    out_path = Path(out_dir)
    out_path.mkdir(parents=True, exist_ok=True)
    executor = create_executor(cluster)
    job_ids: list[str | int] = []
    tags: list[str] = []
    save_dirs: list[str] = []

    for payload, raw_tag in zip(prepared["payloads"], prepared["tags"]):
        tag = sanitize_tag(raw_tag)
        update_executor(executor, cluster, resources, job_name=f"{sanitize_tag(pipeline)}_{tag}")
        output_parquet = pipeline_output_parquet(out_path, pipeline, tag)

        kwargs = {
            "n_confs": None if production and n_confs is None else n_confs,
            "n_cores": resources.cpus,
            "mem_gb": resources.mem_gb,
            "debug": debug,
            "out_dir": str(out_path),
            "output_parquet": output_parquet,
            "save_output_dir": save_output_dir,
            "DFT": dft,
            "select_mols": select_mols,
            "work_dir": work_dir or cluster.work_dir,
        }

        if pipeline == "run_mols":
            kwargs["ligand_smiles_df"] = payload
        elif pipeline == "run_ts_per_lig":
            kwargs["ligand_smiles_df"] = payload
            kwargs["ts_guess_xyz"] = str(ts_xyz)
        elif pipeline in {"run_ts_per_rpos", "run_ts_per_rpos_UMA", "run_ts_per_rpos_UMA_short", "run_orca_smoke_test"}:
            kwargs["ts_struct"] = payload
        else:
            raise ValueError(f"Unsupported pipeline {pipeline!r}")

        call_kwargs = {k: v for k, v in kwargs.items() if k in sig.parameters}
        job = executor.submit(pipeline_fn, **call_kwargs)
        job_ids.append(getattr(job, "job_id", f"{pipeline}_{tag}"))
        tags.append(tag)
        save_dirs.append(str(out_path))

    print("Submitted job IDs:", job_ids)
    return JobSubmissionResult(
        job_ids=job_ids,
        tags=tags,
        save_dirs=save_dirs,
        mode=pipeline,
        backend=cluster.backend,
    )