Schemas

mt5cli.schemas

Canonical DataFrame schemas for MT5 market and account datasets.

DEDUP_KEYS module-attribute

DEDUP_KEYS: dict[DataKind, tuple[tuple[str, ...], ...]] = {
    rates: (
        ("symbol", "timeframe", "time"),
        ("symbol", "time"),
    ),
    ticks: (("symbol", "time_msc"), ("symbol", "time")),
    history_orders: (
        ("ticket",),
        ("symbol", "time", "type"),
    ),
    history_deals: (
        ("ticket",),
        ("symbol", "time", "type", "entry"),
    ),
}

KNOWN_MT5_TIME_COLUMNS module-attribute

KNOWN_MT5_TIME_COLUMNS: Final[frozenset[str]] = frozenset({
    "time",
    "time_setup",
    "time_setup_msc",
    "time_done",
    "time_done_msc",
    "time_msc",
})

REQUIRED_COLUMNS module-attribute

REQUIRED_COLUMNS: dict[DataKind, frozenset[str]] = {
    rates: frozenset({
        "time",
        "open",
        "high",
        "low",
        "close",
        "tick_volume",
        "spread",
        "real_volume",
    }),
    ticks: frozenset({
        "time",
        "bid",
        "ask",
        "last",
        "volume",
        "time_msc",
        "flags",
        "volume_real",
    }),
    orders: frozenset({
        "ticket",
        "time_setup",
        "type",
        "state",
        "symbol",
        "volume_current",
        "price_open",
    }),
    positions: frozenset({
        "ticket",
        "time",
        "type",
        "symbol",
        "volume",
        "price_open",
        "price_current",
        "profit",
    }),
    history_orders: frozenset({
        "ticket",
        "time_setup",
        "type",
        "state",
        "symbol",
        "volume_initial",
        "price_open",
    }),
    history_deals: frozenset({
        "ticket",
        "order",
        "time",
        "type",
        "entry",
        "symbol",
        "volume",
        "price",
        "profit",
    }),
}

TIME_COLUMNS module-attribute

TIME_COLUMNS: dict[DataKind, frozenset[str]] = {
    kind: (
        REQUIRED_COLUMNS[kind] & _TIME_COLUMN_NAMES
        | get(kind, frozenset())
    )
    for kind in DataKind
}

__all__ module-attribute

__all__ = [
    "DEDUP_KEYS",
    "KNOWN_MT5_TIME_COLUMNS",
    "REQUIRED_COLUMNS",
    "TIME_COLUMNS",
    "DataKind",
    "normalize_dataframe",
    "normalize_time_columns",
    "schema_columns",
    "validate_schema",
]

DataKind

Bases: StrEnum

Supported MT5 dataset kinds with canonical column contracts.

history_deals class-attribute instance-attribute

history_deals = 'history_deals'

history_orders class-attribute instance-attribute

history_orders = 'history_orders'

orders class-attribute instance-attribute

orders = 'orders'

positions class-attribute instance-attribute

positions = 'positions'

rates class-attribute instance-attribute

rates = 'rates'

ticks class-attribute instance-attribute

ticks = 'ticks'

ensure_utc_columns

ensure_utc_columns(
    frame: DataFrame, columns: Iterable[str]
) -> DataFrame

Return a copy with selected columns coerced to UTC datetimes.

Parameters:

Name Type Description Default
frame DataFrame

Source DataFrame.

required
columns Iterable[str]

Column names to coerce.

required

Returns:

Type Description
DataFrame

DataFrame copy with UTC-aware datetime columns.

Source code in mt5cli/schemas.py
def ensure_utc_columns(frame: pd.DataFrame, columns: Iterable[str]) -> pd.DataFrame:
    """Return a copy with selected columns coerced to UTC datetimes.

    Args:
        frame: Source DataFrame.
        columns: Column names to coerce.

    Returns:
        DataFrame copy with UTC-aware datetime columns.
    """
    normalized = frame.copy()
    for column in columns:
        if column not in normalized.columns:
            continue
        if column in _TIME_COLUMN_NAMES:
            normalized[column] = _coerce_mt5_time_column(normalized[column], column)
        else:
            normalized[column] = pd.to_datetime(
                normalized[column], utc=True, errors="coerce"
            )
    return normalized

normalize_dataframe

normalize_dataframe(
    frame: DataFrame,
    kind: DataKind,
    *,
    symbol: str | None = None,
    timeframe: int | str | None = None,
    sort: bool = True,
) -> DataFrame

Normalize MT5 DataFrame columns, timestamps, and storage metadata.

Ensures UTC timestamps, optionally injects symbol / timeframe for storage-oriented datasets, and sorts chronologically when a time column exists.

Parameters:

Name Type Description Default
frame DataFrame

Source DataFrame from MT5 or pdmt5.

required
kind DataKind

Dataset kind guiding normalization rules.

required
symbol str | None

Optional symbol to inject when missing.

None
timeframe int | str | None

Optional timeframe integer or name to inject for rates.

None
sort bool

Whether to sort by time or time_msc when present.

True

Returns:

Type Description
DataFrame

Normalized DataFrame copy.

Source code in mt5cli/schemas.py
def normalize_dataframe(
    frame: pd.DataFrame,
    kind: DataKind,
    *,
    symbol: str | None = None,
    timeframe: int | str | None = None,
    sort: bool = True,
) -> pd.DataFrame:
    """Normalize MT5 DataFrame columns, timestamps, and storage metadata.

    Ensures UTC timestamps, optionally injects ``symbol`` / ``timeframe`` for
    storage-oriented datasets, and sorts chronologically when a ``time`` column
    exists.

    Args:
        frame: Source DataFrame from MT5 or pdmt5.
        kind: Dataset kind guiding normalization rules.
        symbol: Optional symbol to inject when missing.
        timeframe: Optional timeframe integer or name to inject for rates.
        sort: Whether to sort by ``time`` or ``time_msc`` when present.

    Returns:
        Normalized DataFrame copy.
    """
    if frame.empty and len(frame.columns) == 0:
        return frame.copy()

    normalized = normalize_time_columns(frame, kind)

    if symbol is not None and "symbol" not in normalized.columns:
        normalized.insert(0, "symbol", normalize_symbol(symbol))

    if timeframe is not None and kind is DataKind.rates:
        tf = parse_timeframe(timeframe)
        if "timeframe" not in normalized.columns:
            insert_at = 1 if "symbol" in normalized.columns else 0
            normalized.insert(insert_at, "timeframe", tf)

    validate_schema(normalized, kind)

    if sort:
        if "time" in normalized.columns:
            normalized = normalized.sort_values("time", kind="stable")
        elif "time_msc" in normalized.columns:
            normalized = normalized.sort_values("time_msc", kind="stable")
        normalized = normalized.reset_index(drop=True)

    return normalized

normalize_time_columns

normalize_time_columns(
    frame: DataFrame, kind: DataKind
) -> DataFrame

Coerce dataset time columns to UTC-aware datetimes when present.

Any column in :data:KNOWN_MT5_TIME_COLUMNS that is present in frame is normalized. Numeric MT5 epoch values use seconds for time, time_setup, and time_done, and milliseconds for *_msc columns.

Parameters:

Name Type Description Default
frame DataFrame

Source DataFrame from MT5 or pdmt5.

required
kind DataKind

Dataset kind (retained for API compatibility).

required

Returns:

Type Description
DataFrame

DataFrame copy with normalized time columns.

Source code in mt5cli/schemas.py
def normalize_time_columns(frame: pd.DataFrame, kind: DataKind) -> pd.DataFrame:
    """Coerce dataset time columns to UTC-aware datetimes when present.

    Any column in :data:`KNOWN_MT5_TIME_COLUMNS` that is present in ``frame``
    is normalized. Numeric MT5 epoch values use seconds for ``time``,
    ``time_setup``, and ``time_done``, and milliseconds for ``*_msc`` columns.

    Args:
        frame: Source DataFrame from MT5 or pdmt5.
        kind: Dataset kind (retained for API compatibility).

    Returns:
        DataFrame copy with normalized time columns.
    """
    del kind
    normalized = frame.copy()
    for column in normalized.columns:
        if column not in _TIME_COLUMN_NAMES:
            continue
        normalized[column] = _coerce_mt5_time_column(normalized[column], column)
    return normalized

schema_columns

schema_columns(kind: DataKind) -> frozenset[str]

Return required column names for a dataset kind.

Parameters:

Name Type Description Default
kind DataKind

Dataset kind.

required

Returns:

Type Description
frozenset[str]

Required column names for kind.

Source code in mt5cli/schemas.py
def schema_columns(kind: DataKind) -> frozenset[str]:
    """Return required column names for a dataset kind.

    Args:
        kind: Dataset kind.

    Returns:
        Required column names for ``kind``.
    """
    return REQUIRED_COLUMNS[kind]

validate_schema

validate_schema(
    frame: DataFrame,
    kind: DataKind,
    *,
    extra_required: Iterable[str] | None = None,
) -> None

Validate that a DataFrame includes required columns for a dataset kind.

Parameters:

Name Type Description Default
frame DataFrame

DataFrame to validate.

required
kind DataKind

Expected dataset kind.

required
extra_required Iterable[str] | None

Additional columns that must be present (for example symbol and timeframe on stored rate history).

None

Raises:

Type Description
Mt5SchemaError

If required columns are missing.

Source code in mt5cli/schemas.py
def validate_schema(
    frame: pd.DataFrame,
    kind: DataKind,
    *,
    extra_required: Iterable[str] | None = None,
) -> None:
    """Validate that a DataFrame includes required columns for a dataset kind.

    Args:
        frame: DataFrame to validate.
        kind: Expected dataset kind.
        extra_required: Additional columns that must be present (for example
            ``symbol`` and ``timeframe`` on stored rate history).

    Raises:
        Mt5SchemaError: If required columns are missing.
    """
    if frame.empty and len(frame.columns) == 0:
        return
    required = set(REQUIRED_COLUMNS[kind])
    if extra_required is not None:
        required.update(extra_required)
    missing = required - set(frame.columns)
    if missing:
        msg = (
            f"{kind.value} schema is missing required columns: "
            f"{', '.join(sorted(missing))}."
        )
        raise Mt5SchemaError(msg)