"""Module for DataIO class.
The metadata spec is documented as a JSON schema, stored under schema/.
"""
from __future__ import annotations
import warnings
from dataclasses import dataclass, field, fields
from pathlib import Path
from typing import TYPE_CHECKING, Any, ClassVar, Final, Literal
from fmu.datamodels.fmu_results.global_configuration import GlobalConfiguration
from ._export import ExportConfig, export_with_metadata, export_without_metadata
from ._export.deprecations import _check_vertical_domain_dict
from ._logging import null_logger
from ._metadata import generate_metadata
from ._utils import read_metadata_from_file
from .exceptions import ValidationError
from .preprocessed import ExportPreprocessedData
if TYPE_CHECKING:
from . import types
logger: Final = null_logger(__name__)
def _future_warning_preprocessed() -> None:
warnings.warn(
"Using the ExportData class for re-exporting preprocessed data is no "
"longer supported. Use the dedicated ExportPreprocessedData class "
"instead. In a deprecation period the ExportPreprocessedData is used "
"under the hood when a filepath is input to ExportData. "
"Please update your script, as this will be discontinued in the future.",
FutureWarning,
)
# ======================================================================================
# Public function to read/load assosiated metadata given a file (e.g. a map file)
# ======================================================================================
# ======================================================================================
# ExportData, public class
# ======================================================================================
[docs]
@dataclass
class ExportData:
"""This class provides context for the metadata generated when data is exported.
Here is a complete example of how it is used:
.. code-block:: python
for name in ["TopOne", "TopTwo", "TopThree"]:
poly = xtgeo.polygons_from_roxar(project, name, POL_FOLDER)
ed = dataio.ExportData(
config=CFG,
content="depth",
unit="m",
vertical_domain="fault_lines",
domain_reference="msl",
timedata=None,
is_observation=False,
tagname="faultlines",
workflow="rms structural model",
name=name
)
out = ed.export(poly)
In general, fmu-dataio tries to take care of exporting data automatically to
conventional and standard locations. In the documentation below you might find
references to the following terms.
``pwd``
The present working directory. This is the directory a script or application is
started from.
``rootpath``
The directory from which relative file names are relative to. This is
auto-detected by fmu-dataio.
``casepath``
The path where the FMU case originates from (is started from). This should be
equivalent to the ``rootpath`` in most circumstances.
Examples:
.. code-block:: shell
/project/foo/resmod/ff/2022.1.0/rms/model # pwd
/project/foo/resmod/ff/2022.1.0/ # rootpath
A file:
.. code-block:: shell
/project/foo/resmod/ff/2022.1.0/share/results/maps/xx.gri # example absolute
share/results/maps/xx.gri # example relative
When running an Ert forward job using a normal Ert job (e.g. a script):
.. code-block:: shell
/scratch/nn/case/realization-44/iter-2 # pwd
/scratch/nn/case # rootpath
A file:
.. code-block:: shell
/scratch/nn/case/realization-44/iter-2/share/results/maps/xx.gri # absolute
realization-44/iter-2/share/results/maps/xx.gri # relative
When running an Ert forward job but here executed from RMS:
.. code-block:: shell
/scratch/nn/case/realization-44/iter-2/rms/model # pwd
/scratch/nn/case # rootpath
A file:
.. code-block:: shell
/scratch/nn/case/realization-44/iter-2/share/results/maps/xx.gri # absolute
realization-44/iter-2/share/results/maps/xx.gri # relative
"""
# ----------------------------------------------------------------------------------
#
# This role for this class is to be:
# - public (end user) interface
# - collect the full settings from global config, user keys and class variables
# - process and validate these settings
# - establish PWD and rootpath
#
# Then other classes will further do the detailed metadata processing, cf _MetaData
# and subsequent classes called by _MetaData
#
# ----------------------------------------------------------------------------------
# ##################################################################################
# ----------------------------------------------------------------------------------
#
# Required input values to create metadata. These should be ordered from Required
# parameters first, and in order of importance, as they will be rendered in the
# documentation in the order listed here.
#
# ----------------------------------------------------------------------------------
config: dict[str, Any] | GlobalConfiguration = field(default_factory=dict)
"""Required in order to produce valid metadata.
This global config must be provided either as an input value here or through an
environment variable.
This value should be a dictionary with static settings. In the standard case
this is read from FMU global variables produced by ``fmuconfig``. The dictionary
must contain some predefined main level keys to work with fmu-dataio.
.. note::
If missing or empty, an :meth:`export` may still be done, but without any
metadata produced.
"""
content: str | dict | None = None
"""A required string describing the content of the data, e.g. ``"volumes"``.
.. warning::
Using the ``content`` argument as a ``dict`` to set both the content and the
content metadata will be deprecated. Set the ``content`` argument to a valid
content string, and provide the extra information through the
:attr:`content_metadata` argument instead.
Some content types, like ``"seismic"``, require additional information. This should
be provided through the :attr:`content_metadata` argument described below.
The list of content types that can be provided is controlled and input values are
validated against a current list of them. In the following enumeration you would use
**only** the string values of the content type.
.. autoclass:: fmu.datamodels.fmu_results.enums.Content
:members:
:exclude-members: __new__, parameters
:no-index:
:no-special-members:
"""
# ^ parameters is specially excluded to discourage users from attempting this
# It is handled automatically.
content_metadata: dict | None = None
"""Optional. Dictionary with additional information about the provided content. Only
required for some :attr:`content` types, e.g. ``"seismic"``.
Example:
.. code-block:: python
content_metadata={"attribute": "amplitude", "calculation": "mean"},
"""
classification: str | None = None
"""Optional. Security classification level of the data object.
If present it will override the default found in the config.
The list of classification types that can be provided is controlled and input values
are validated against a current list of them. In the following enumeration you would
use **only** the string values of the classification type.
.. autoclass:: fmu.datamodels.common.enums.Classification
:members:
:exclude-members: __new__
:no-index:
:no-special-members:
"""
domain_reference: str = "msl"
"""Optional. Reference to the vertical scale of the data.
The list of classification types that can be provided is controlled and input values
are validated against a current list of them. In the following enumeration you would
use **only** the string values of the classification type.
.. autoclass:: fmu.datamodels.fmu_results.enums.DomainReference
:members:
:exclude-members: __new__
:no-index:
:no-special-members:
.. note:: Use the :attr:`vertical_domain` key to set the domain (depth or time).
"""
vertical_domain: str | dict = "depth"
"""Optional. The vertical domain of the data.
The list of classification types that can be provided is controlled and input values
are validated against a current list of them. In the following enumeration you would
use **only** the string values of the classification type.
.. autoclass:: fmu.datamodels.fmu_results.enums.VerticalDomain
:members:
:exclude-members: __new__
:no-index:
:no-special-members:
A reference for the vertical scale can be provided with the
:attr:`domain_reference` value.
.. note::
If the :attr:`content` is ``"depth"`` or ``"time"`` this value will be set
accordingly.
.. warning::
Providing a dictionary as a value is deprecated.
"""
geometry: str | None = None
"""Optional. For grid properties **only** which need a reference to the 3D grid
geometry object.
The value must point to an existing file which has already been exported with
fmu-dataio, and hence has an associated metadata file. The grid name will be derived
from the grid metadata, if present, and applied as part of the grid property file
name.
.. note::
This value may replace the usage of both the :attr:`parent` value and the
``grid_model`` value in the near future.
"""
is_observation: bool = False
"""If ``True`` then data will be exported to the ``share/observations/`` directory.
By default this is ``False`` which will export results to the ``share/results/``
directory.
However, if :attr:`preprocessed` is ``True``, then the export directory will be set
to ``share/preprocessed/`` irrespective the value of :attr:`is_observation`.
"""
is_prediction: bool = True
"""Indicates if the exported data is model prediction data."""
timedata: list[str] | list[list[str]] | None = None
"""Optional. List of dates, where the dates are strings on form ``"YYYYMMDD"``.
.. code-block:: python
timedata=["20200101"],
.. code-block:: python
timedata=["20200101", "20180101"],
A maximum of two dates can be input. The oldest date will be set as ``t0`` in the
metadata and the latest date will be ``t1``.
.. note::
It is also possible to provide a label to each date by using a list of lists,
e.g. ``[["20200101", "monitor"], ["20180101", "base"]]``.
"""
unit: str | None = ""
"""Optional. The measurement unit relevant to the exported data.
For example, ``"m"`` would be set if the measurement unit is meters.
.. caution::
This value is not currently controlled by a known list but will be in the future.
"""
table_index: list[str] | None = None
"""Optional. A list of strings indicating the index columns for tabular data.
This value should be set for tabular data like Pandas data frames **only**.
Example:
.. code-block::
table_index=["ZONE", "REGION"],
This can also be applied to points or polygons objects that are exported in table
format to specify attributes that should act as index columns.
.. tip::
Index columns in tabular data refer to one or more columns that uniquely identify
each row in the dataset. They serve as a reference point for data retrieval and
manipulation, enabling simple and efficient access to specific rows.
"""
preprocessed: bool = False
"""If True, data is exported to the ``"share/preprocessed/"`` directory.
This metadata can be partially re-used in an Ert model run using the
``ExportPreprocessedData`` class.
.. note::
Most data are not preprocessed data, and as such this key shouldn't often be
used. An example of preprocessed data is seismic data.
"""
description: str | list[str] = ""
"""Optional. A multi-line description of the data either as a string or a list of
strings.
.. tip::
You do not need to set this.
"""
display_name: str | None = None
"""Optional. Set a display name for clients to use when visualizing.
.. tip::
You do not need to set this.
"""
name: str = ""
"""Optional. The name of the data object being exported.
If not set, fmu-dataio infers it from object data type. If the name is found in the
``stratigraphy`` static metadata list, the official stratigraphic name will be used.
For example, if ``"TopValysar"`` is the model name and the actual name is ``"Valysar
Top Fm."``, the latter name will be used.
.. tip::
You do not need to set this.
"""
tagname: str = ""
"""Optional. A short tag description which will be a part of the file name.
As an example, if exporting a fault polygon from a horizon named ``"TopVolantis"``,
.. code-block:: python
tagname="faultlines",
The exported filename will be ``volantis_gp_top--faultlines.csv``
.. tip::
You do not need to set this, but it may be useful for local workflows.
"""
workflow: str | dict[str, str] | None = None
"""Optional. Short string description of workflow.
.. warning::
Providing a dictionary as a value is deprecated.
.. tip::
You do not need to set this.
"""
forcefolder: str = ""
"""Optional. This value allows exporting to a non-standard directory relative to
the casepath/rootpath.
.. warning::
Using this optional is generally not recommended.
This option is dependent upon the FMU context (case or realization) and the
:attr:`is_observation` boolean value.
Example:
.. code-block:: python
forcefolder="seismic",
This will replace the ``cubes/`` standard directory for ``xtgeo.Cube`` output with
``seismic/``.
.. caution::
Use with care and avoid if possible!
"""
parent: str = ""
"""Optional. This value is required for datatype ``xtgeo.GridProperty``, unless the
:attr:`geometry` value is given.
"Parent" refers to the name of the grid geometry. It will only be added in the
filename, and not as genuine metadata entry.
.. warning::
This value is a candidate for deprecation. Use :attr:`geometry` instead.
If both :attr:`parent` and :attr:`geometry` are given, the grid name derived from
the :attr:`geometry` object will have precedence.
"""
casepath: str | Path | None = None
"""Optional. Path to a case directory that contains valid case metadata
``fmu_case.yml`` in folder ``<CASE_DIR>/share/metadata/``.
.. tip::
You typically do not need to set this.
"""
# ----------------------------------------------------------------------------------
#
# Undocumented members.
#
# These are not yet deprecated, but are not encouraged for use. Convert the doc
# string to a comment to prevent it from rendering in the documentation.
#
# ----------------------------------------------------------------------------------
aggregation: bool = False
# Does not appear to have been used for anything.
fmu_context: str | None = None
# Optional string with value ``realization`` or ``case``.
#
# .. tip::
# You most likely do not need to set this. fmu-dataio infers this by itself.
#
# If not explicitly given it will be inferred based on the presence of Ert
# environment variables.
#
# If ``fmu_context="realization"`` fmu-dataio will export data per realization, and
# should be used in normal Ert forward models.
#
# If ``fmu_context="case"`` fmu-dataio will export data relative to the case
# directory. When specifying the case context the ``casepath`` must provided through
# the ``casepath`` value.
rep_include: bool | None = None
# Optional. If True then the data object will be available in REP.
subfolder: str = ""
# Set subfolders for file output. The input should be a string representing a
# relative path of at least one additional directory.
undef_is_zero: bool = False
# Flags that NaNs should be considered as zero in aggregations.
# ----------------------------------------------------------------------------------
#
# Class variables
#
# ----------------------------------------------------------------------------------
case_folder: ClassVar[str] = "share/metadata"
polygons_fformat: ClassVar[str] = "csv"
points_fformat: ClassVar[str] = "csv"
table_fformat: ClassVar[str] = "csv"
# ----------------------------------------------------------------------------------
#
# Deprecated members.
#
# Convert the doc string to a comment, or remove it, to prevent it from rendering in
# the documentation.
#
# ----------------------------------------------------------------------------------
access_ssdl: dict = field(default_factory=dict) # deprecated
depth_reference: str | None = None # deprecated
realization: int | None = None # deprecated
reuse_metadata_rule: str | None = None # deprecated
runpath: str | Path | None = None # Deprecated. Issues warning.
verbosity: str = "DEPRECATED" # remove in version 2
grid_model: str | None = None
allow_forcefolder_absolute: ClassVar[bool] = False # deprecated
arrow_fformat: ClassVar[str | None] = None # deprecated and no effect
createfolder: ClassVar[bool] = True # deprecated
cube_fformat: ClassVar[str | None] = None # deprecated and no effect
filename_timedata_reverse: ClassVar[bool] = False # reverse order output file name
grid_fformat: ClassVar[str | None] = None # deprecated and no effect
include_ertjobs: ClassVar[bool] = False # deprecated
legacy_time_format: ClassVar[bool] = False # deprecated
meta_format: ClassVar[Literal["yaml", "json"] | None] = None # deprecated
surface_fformat: ClassVar[str | None] = None # deprecated and no effect
dict_fformat: ClassVar[str | None] = None # deprecated and no effect
table_include_index: ClassVar[bool] = False # deprecated
verifyfolder: ClassVar[bool] = True # deprecated
# ----------------------------------------------------------------------------------
#
# Stateful members.
#
# Need to store these temporarily in variables until we stop updating state of the
# class also on export and generate_metadata
#
# ----------------------------------------------------------------------------------
_initialized: bool = field(default=False, init=False, repr=False)
_cached_export_config: ExportConfig | None = field(
default=None, init=False, repr=False
)
def __post_init__(self) -> None:
logger.info("Running __post_init__ ExportData")
self._cached_export_config = ExportConfig.from_export_data(self)
object.__setattr__(self, "_initialized", True)
logger.info("Ran __post_init__")
@property
def _export_config(self) -> ExportConfig:
"""Get or create the immutable ExportConfig.
Returns:
An immutable ExportConfig with all resolved values.
"""
if self._cached_export_config is None:
self._cached_export_config = ExportConfig.from_export_data(self)
return self._cached_export_config
def __setattr__(self, name: str, value: Any) -> None:
"""Catch attribute mutations and warn."""
is_initialized = getattr(self, "_initialized", False)
if is_initialized and not name.startswith("_") and name != "config":
warnings.warn(
f"Mutating ExportData.{name} after initialization is deprecated "
"and will be removed in a future version. Create a new ExportData "
"instance with the desired values instead.",
FutureWarning,
)
# Invalidate cached config when public properties change. It needs to be
# re-created with the new values.
object.__setattr__(self, "_cached_export_config", None)
object.__setattr__(self, name, value)
if name == "vertical_domain":
maybe_warnings = _check_vertical_domain_dict(value)
for warning, category in maybe_warnings:
warnings.warn(warning, category)
def _apply_deprecated_kwargs(self, kwargs: dict[str, Any]) -> None:
"""Deprecated. Updates attributes from kwargs."""
if not kwargs:
return
warnings.warn(
"In the future it will not be possible to enter following arguments "
f"inside the export() / generate_metadata() methods: {list(kwargs)}. "
"Please move them up to initialization of the ExportData instance.",
FutureWarning,
)
if "config" in kwargs:
raise ValueError("Cannot have 'config' outside instance initialization")
known_attrs = [field.name for field in fields(ExportData)]
for key, value in kwargs.items():
if key not in known_attrs:
raise ValidationError(
f"Cannot update attribute '{key}'. Not a valid attribute."
)
setattr(self, key, value)
logger.debug(f"Set attribute {key}={value}")
# Values have changed, so we need a new configuration.
self._cached_export_config = ExportConfig.from_export_data(self)
# ==================================================================================
# Public methods:
# ==================================================================================
[docs]
def export(
self,
obj: types.ExportableData,
**kwargs: Any,
) -> str:
"""Export supported data objects with metadata.
This function exports data without changing the *content* of the data. The *file
format* of the data may be determined by values set in the class.
A file containing metadata will be exported next to it. It will have the same
name as the data, but will be prefixed with a `.`. This causes the metadata to
not be visible by a standard `ls` command. The metadata is stored in a YAML
file.
.. code-block:: shell
top_volantis--depth.gri
.top_volantis--depth.gri.yml
Args:
obj: An xtgeo object, Pandas dataframe, or other supported object. A full
list of supported data types can be found in the documentation.
Returns:
str: The full path to the exported item.
Note:
Providing ``**kwargs`` is deprecated and will be removed in a later version.
"""
if "return_symlink" in kwargs:
warnings.warn(
"The return_symlink option is deprecated and can safely be removed."
)
if isinstance(obj, str | Path):
self._apply_deprecated_kwargs(kwargs)
if self._export_config.casepath is None:
raise TypeError("No 'casepath' argument provided")
_future_warning_preprocessed()
return ExportPreprocessedData(
casepath=self._export_config.casepath,
is_observation=self._export_config.is_observation,
).export(obj)
logger.info("Object type is: %s", type(obj))
self._apply_deprecated_kwargs(kwargs)
return str(
export_without_metadata(self._export_config, obj)
if self._export_config.config is None
else export_with_metadata(self._export_config, obj)
)