Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions openml/_api/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
from openml._api.runtime.core import APIContext


def set_api_version(version: str, *, strict: bool = False) -> None:
api_context.set_version(version=version, strict=strict)


api_context = APIContext()
5 changes: 5 additions & 0 deletions openml/_api/config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
from __future__ import annotations

API_V1_SERVER = "https://www.openml.org/api/v1/xml"
API_V2_SERVER = "http://127.0.0.1:8001"
API_KEY = "..."
3 changes: 3 additions & 0 deletions openml/_api/http/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
from openml._api.http.client import HTTPClient

__all__ = ["HTTPClient"]
39 changes: 39 additions & 0 deletions openml/_api/http/client.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
from __future__ import annotations

from typing import Any, Mapping

import requests
from requests import Response

from openml.__version__ import __version__


class HTTPClient:
def __init__(self, base_url: str) -> None:
self.base_url = base_url
self.headers: dict[str, str] = {"user-agent": f"openml-python/{__version__}"}

def get(
self,
path: str,
params: Mapping[str, Any] | None = None,
) -> Response:
url = f"{self.base_url}/{path}"
return requests.get(url, params=params, headers=self.headers, timeout=10)

def post(
self,
path: str,
data: Mapping[str, Any] | None = None,
files: Any = None,
) -> Response:
url = f"{self.base_url}/{path}"
return requests.post(url, data=data, files=files, headers=self.headers, timeout=10)

def delete(
self,
path: str,
params: Mapping[str, Any] | None = None,
) -> Response:
url = f"{self.base_url}/{path}"
return requests.delete(url, params=params, headers=self.headers, timeout=10)
Empty file added openml/_api/http/utils.py
Empty file.
4 changes: 4 additions & 0 deletions openml/_api/resources/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
from openml._api.resources.datasets import DatasetsV1, DatasetsV2
from openml._api.resources.tasks import TasksV1, TasksV2

__all__ = ["DatasetsV1", "DatasetsV2", "TasksV1", "TasksV2"]
31 changes: 31 additions & 0 deletions openml/_api/resources/base.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
from __future__ import annotations

from abc import ABC, abstractmethod
from typing import TYPE_CHECKING

if TYPE_CHECKING:
from requests import Response

from openml._api.http import HTTPClient
from openml.datasets.dataset import OpenMLDataset
from openml.tasks.task import OpenMLTask


class ResourceAPI:
def __init__(self, http: HTTPClient):
self._http = http


class DatasetsAPI(ResourceAPI, ABC):
@abstractmethod
def get(self, dataset_id: int) -> OpenMLDataset | tuple[OpenMLDataset, Response]: ...


class TasksAPI(ResourceAPI, ABC):
@abstractmethod
def get(
self,
task_id: int,
*,
return_response: bool = False,
) -> OpenMLTask | tuple[OpenMLTask, Response]: ...
20 changes: 20 additions & 0 deletions openml/_api/resources/datasets.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
from __future__ import annotations

from typing import TYPE_CHECKING

from openml._api.resources.base import DatasetsAPI

if TYPE_CHECKING:
from responses import Response

from openml.datasets.dataset import OpenMLDataset


class DatasetsV1(DatasetsAPI):
def get(self, dataset_id: int) -> OpenMLDataset | tuple[OpenMLDataset, Response]:
raise NotImplementedError


class DatasetsV2(DatasetsAPI):
def get(self, dataset_id: int) -> OpenMLDataset | tuple[OpenMLDataset, Response]:
raise NotImplementedError
128 changes: 128 additions & 0 deletions openml/_api/resources/tasks.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
from __future__ import annotations

from typing import TYPE_CHECKING

import xmltodict

from openml._api.resources.base import TasksAPI
from openml.tasks.task import (
OpenMLClassificationTask,
OpenMLClusteringTask,
OpenMLLearningCurveTask,
OpenMLRegressionTask,
OpenMLTask,
TaskType,
)

if TYPE_CHECKING:
from requests import Response


class TasksV1(TasksAPI):
def get(
self,
task_id: int,
*,
return_response: bool = False,
) -> OpenMLTask | tuple[OpenMLTask, Response]:
path = f"task/{task_id}"
response = self._http.get(path)
xml_content = response.text
task = self._create_task_from_xml(xml_content)

if return_response:
return task, response

return task

def _create_task_from_xml(self, xml: str) -> OpenMLTask:
"""Create a task given a xml string.

Parameters
----------
xml : string
Task xml representation.

Returns
-------
OpenMLTask
"""
dic = xmltodict.parse(xml)["oml:task"]
estimation_parameters = {}
inputs = {}
# Due to the unordered structure we obtain, we first have to extract
# the possible keys of oml:input; dic["oml:input"] is a list of
# OrderedDicts

# Check if there is a list of inputs
if isinstance(dic["oml:input"], list):
for input_ in dic["oml:input"]:
name = input_["@name"]
inputs[name] = input_
# Single input case
elif isinstance(dic["oml:input"], dict):
name = dic["oml:input"]["@name"]
inputs[name] = dic["oml:input"]

evaluation_measures = None
if "evaluation_measures" in inputs:
evaluation_measures = inputs["evaluation_measures"]["oml:evaluation_measures"][
"oml:evaluation_measure"
]

task_type = TaskType(int(dic["oml:task_type_id"]))
common_kwargs = {
"task_id": dic["oml:task_id"],
"task_type": dic["oml:task_type"],
"task_type_id": task_type,
"data_set_id": inputs["source_data"]["oml:data_set"]["oml:data_set_id"],
"evaluation_measure": evaluation_measures,
}
# TODO: add OpenMLClusteringTask?
if task_type in (
TaskType.SUPERVISED_CLASSIFICATION,
TaskType.SUPERVISED_REGRESSION,
TaskType.LEARNING_CURVE,
):
# Convert some more parameters
for parameter in inputs["estimation_procedure"]["oml:estimation_procedure"][
"oml:parameter"
]:
name = parameter["@name"]
text = parameter.get("#text", "")
estimation_parameters[name] = text

common_kwargs["estimation_procedure_type"] = inputs["estimation_procedure"][
"oml:estimation_procedure"
]["oml:type"]
common_kwargs["estimation_procedure_id"] = int(
inputs["estimation_procedure"]["oml:estimation_procedure"]["oml:id"]
)

common_kwargs["estimation_parameters"] = estimation_parameters
common_kwargs["target_name"] = inputs["source_data"]["oml:data_set"][
"oml:target_feature"
]
common_kwargs["data_splits_url"] = inputs["estimation_procedure"][
"oml:estimation_procedure"
]["oml:data_splits_url"]

cls = {
TaskType.SUPERVISED_CLASSIFICATION: OpenMLClassificationTask,
TaskType.SUPERVISED_REGRESSION: OpenMLRegressionTask,
TaskType.CLUSTERING: OpenMLClusteringTask,
TaskType.LEARNING_CURVE: OpenMLLearningCurveTask,
}.get(task_type)
if cls is None:
raise NotImplementedError(f"Task type {common_kwargs['task_type']} not supported.")
return cls(**common_kwargs) # type: ignore


class TasksV2(TasksAPI):
def get(
self,
task_id: int,
*,
return_response: bool = False,
) -> OpenMLTask | tuple[OpenMLTask, Response]:
raise NotImplementedError
Empty file added openml/_api/runtime/__init__.py
Empty file.
59 changes: 59 additions & 0 deletions openml/_api/runtime/core.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
from __future__ import annotations

from typing import TYPE_CHECKING

from openml._api.config import (
API_V1_SERVER,
API_V2_SERVER,
)
from openml._api.http.client import HTTPClient
from openml._api.resources import (
DatasetsV1,
DatasetsV2,
TasksV1,
TasksV2,
)

if TYPE_CHECKING:
from openml._api.resources.base import DatasetsAPI, TasksAPI


class APIBackend:
def __init__(self, *, datasets: DatasetsAPI, tasks: TasksAPI):
self.datasets = datasets
self.tasks = tasks


def build_backend(version: str, *, strict: bool) -> APIBackend:
v1_http = HTTPClient(API_V1_SERVER)
v2_http = HTTPClient(API_V2_SERVER)

v1 = APIBackend(
datasets=DatasetsV1(v1_http),
tasks=TasksV1(v1_http),
)

if version == "v1":
return v1

v2 = APIBackend(
datasets=DatasetsV2(v2_http),
tasks=TasksV2(v2_http),
)

if strict:
return v2

return v1


class APIContext:
def __init__(self) -> None:
self._backend = build_backend("v1", strict=False)

def set_version(self, version: str, *, strict: bool = False) -> None:
self._backend = build_backend(version=version, strict=strict)

@property
def backend(self) -> APIBackend:
return self._backend
12 changes: 12 additions & 0 deletions openml/_api/runtime/fallback.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
from __future__ import annotations

from typing import TYPE_CHECKING

if TYPE_CHECKING:
from openml._api.resources.base import ResourceAPI


class FallbackProxy:
def __init__(self, primary: ResourceAPI, fallback: ResourceAPI):
self._primary = primary
self._fallback = fallback
14 changes: 10 additions & 4 deletions openml/tasks/functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@

import openml._api_calls
import openml.utils
from openml._api import api_context
from openml.datasets import get_dataset
from openml.exceptions import OpenMLCacheException

Expand Down Expand Up @@ -444,11 +445,16 @@ def _get_task_description(task_id: int) -> OpenMLTask:
except OpenMLCacheException:
_cache_dir = openml.utils._create_cache_directory_for_id(TASKS_CACHE_DIR_NAME, task_id)
xml_file = _cache_dir / "task.xml"
task_xml = openml._api_calls._perform_api_call("task/%d" % task_id, "get")
result = api_context.backend.tasks.get(task_id, return_response=True)

with xml_file.open("w", encoding="utf8") as fh:
fh.write(task_xml)
return _create_task_from_xml(task_xml)
if isinstance(result, tuple):
task, response = result
with xml_file.open("w", encoding="utf8") as fh:
fh.write(response.text)
else:
task = result

return task


def _create_task_from_xml(xml: str) -> OpenMLTask:
Expand Down