Source code for data_juicer_agents.tools.plan.validate_dataset_spec.logic

# -*- coding: utf-8 -*-
"""Pure logic for validate_dataset_spec."""

from __future__ import annotations

from typing import Any, Dict

from .._shared.schema import DatasetSpec
from .._shared.dataset_spec import validate_dataset_spec_payload


[docs] def validate_dataset_spec(*, dataset_spec: Dict[str, Any], dataset_profile: Dict[str, Any] | None = None) -> Dict[str, Any]: spec = DatasetSpec.from_dict(dataset_spec) errors, warnings = validate_dataset_spec_payload(spec, dataset_profile=dataset_profile) return { "ok": len(errors) == 0, "dataset_spec": spec.to_dict(), "validation_errors": errors, "warnings": warnings, "message": "dataset spec is valid" if not errors else "dataset spec validation failed", }
__all__ = ["validate_dataset_spec", "validate_dataset_spec_payload"]