use cache, new enpoints

JATAYU000 · JATAYU000 · commit 24582ef2e32d · 2026-01-22T18:56:16.000+05:30
diff --git a/openml/_api/resources/base.py b/openml/_api/resources/base.py
@@ -124,6 +124,12 @@ def add_topic(self, data_id: int, topic: str) -> int: ...
     @abstractmethod
     def delete_topic(self, data_id: int, topic: str) -> int: ...
 
+    @abstractmethod
+    def get_online_dataset_format(self, dataset_id: int) -> str: ...
+
+    @abstractmethod
+    def get_online_dataset_arff(self, dataset_id: int) -> str | None: ...
+
 
 class TasksAPI(ResourceAPI, ABC):
     @abstractmethod
diff --git a/openml/_api/resources/datasets.py b/openml/_api/resources/datasets.py
@@ -28,8 +28,6 @@
 if TYPE_CHECKING:
     from requests import Response
 
-    import openml
-
 
 import pandas as pd
 import xmltodict
@@ -158,6 +156,7 @@ def delete(self, dataset_id: int) -> bool:
         bool
             True if the deletion was successful. False otherwise.
         """
+        # TODO will be updated later from the utils
         url_suffix = f"data/{dataset_id}"
         try:
             result_xml = self._http.delete(url_suffix)
@@ -267,7 +266,7 @@ def edit(
         Dataset id
         """
         # compose data edit parameters as xml
-        form_data = {"data_id": dataset_id}  # type: openml._api_calls.DATA_TYPE
+        form_data = {"data_id": dataset_id}  # type: dict[str, str | int]
         xml = OrderedDict()  # type: 'OrderedDict[str, OrderedDict]'
         xml["oml:data_edit_parameters"] = OrderedDict()
         xml["oml:data_edit_parameters"]["@xmlns:oml"] = "http://openml.org/openml"
@@ -290,7 +289,7 @@ def edit(
 
         file_elements = {
             "edit_parameters": ("description.xml", xmltodict.unparse(xml)),
-        }  # type: openml._api_calls.FILE_ELEMENTS_TYPE
+        }  # type: dict[str, str | tuple[str, str]]
         result_xml = self._http.post("data/edit", data=form_data, files=file_elements).text
         result = xmltodict.parse(result_xml)
         dataset_id = result["oml:data_edit"]["oml:id"]
@@ -352,7 +351,7 @@ def status_update(self, dataset_id: int, status: Literal["active", "deactivated"
         if status not in legal_status:
             raise ValueError(f"Illegal status value. Legal values: {legal_status}")
 
-        data: openml._api_calls.DATA_TYPE = {"data_id": dataset_id, "status": status}
+        data: dict[str, str | int] = {"data_id": dataset_id, "status": status}
         result_xml = self._http.post("data/status/update", data=data).text
         result = xmltodict.parse(result_xml)
         server_data_id = result["oml:data_status_update"]["oml:id"]
@@ -692,19 +691,30 @@ def download_dataset_arff(
         return output_file_path
 
     def add_topic(self, data_id: int, topic: str) -> int:
-        form_data = {"data_id": data_id, "topic": topic}  # type: openml._api_calls.DATA_TYPE
+        form_data = {"data_id": data_id, "topic": topic}  # type: dict[str, str | int]
         result_xml = self._http.post("data/topicadd", data=form_data)
         result = xmltodict.parse(result_xml)
         data_id = result["oml:data_topic"]["oml:id"]
         return int(data_id)
 
     def delete_topic(self, data_id: int, topic: str) -> int:
-        form_data = {"data_id": data_id, "topic": topic}  # type: openml._api_calls.DATA_TYPE
+        form_data = {"data_id": data_id, "topic": topic}  # type: dict[str, str | int]
         result_xml = self._http.post("data/topicdelete", data=form_data)
         result = xmltodict.parse(result_xml)
         data_id = result["oml:data_topic"]["oml:id"]
         return int(data_id)
 
+    def get_online_dataset_format(self, dataset_id: int) -> str:
+        dataset_xml = self._http.get(f"data/{dataset_id}")
+        # build a dict from the xml and get the format from the dataset description
+        return xmltodict.parse(dataset_xml)["oml:data_set_description"]["oml:format"].lower()  # type: ignore
+
+    def get_online_dataset_arff(self, dataset_id: int) -> str | None:
+        dataset_xml = self._http.get(f"data/{dataset_id}")
+        # build a dict from the xml.
+        # use the url from the dataset description and return the ARFF string
+        return str(self.download_dataset_arff(xmltodict.parse(dataset_xml)))
+
 
 class DatasetsV2(DatasetsAPI):
     def get(
@@ -717,7 +727,7 @@ def get(
         download_all_files: bool = False,  # noqa: FBT002
     ) -> OpenMLDataset:
         path = f"datasets/{dataset_id}"
-        response = self._http.get(path)
+        response = self._http.get(path, use_cache=True)
         json_content = response.json()
 
         try:
@@ -852,7 +862,7 @@ def status_update(self, dataset_id: int, status: Literal["active", "deactivated"
         if status not in legal_status:
             raise ValueError(f"Illegal status value. Legal values: {legal_status}")
 
-        data: openml._api_calls.DATA_TYPE = {"dataset_id": dataset_id, "status": status}
+        data: dict[str, str | int] = {"dataset_id": dataset_id, "status": status}
         result = self._http.post("datasets/status/update", json=data).json()
         server_data_id = result["dataset_id"]
         server_status = result["status"]
@@ -1114,3 +1124,14 @@ def add_topic(self, data_id: int, topic: str) -> int:
 
     def delete_topic(self, data_id: int, topic: str) -> int:
         raise NotImplementedError()
+
+    def get_online_dataset_format(self, dataset_id: int) -> str:
+        dataset_json = self._http.get(f"datasets/{dataset_id}").text
+        # build a dict from the xml and get the format from the dataset description
+        return dataset_json["data_set_description"]["format"].lower()  # type: ignore
+
+    def get_online_dataset_arff(self, dataset_id: int) -> str | None:
+        dataset_json = self._http.get(f"datasets/{dataset_id}").json()
+        # build a dict from the xml.
+        # use the url from the dataset description and return the ARFF string
+        return str(self.download_dataset_arff(dataset_json))
diff --git a/openml/datasets/functions.py b/openml/datasets/functions.py
@@ -17,6 +17,7 @@
 
 import openml._api_calls
 import openml.utils
+from openml._api import api_context
 from openml.exceptions import (
     OpenMLServerError,
 )
@@ -56,8 +57,6 @@ def list_qualities() -> list[str]:
     -------
     list
     """
-    from openml._api import api_context
-
     return api_context.backend.datasets.list_qualities()
 
 
@@ -111,8 +110,6 @@ def list_datasets(
         If qualities are calculated for the dataset, some of
         these are also included as columns.
     """
-    from openml._api import api_context
-
     listing_call = partial(
         api_context.backend.datasets.list,
         data_id=data_id,
@@ -375,8 +372,6 @@ def get_dataset(
         if did_cache_dir.exists():
             _remove_cache_dir_for_id(DATASETS_CACHE_DIR_NAME, did_cache_dir)
 
-    from openml._api import api_context
-
     return api_context.backend.datasets.get(
         dataset_id,
         download_data,
@@ -659,8 +654,6 @@ def status_update(data_id: int, status: Literal["active", "deactivated"]) -> Non
     status : str,
         'active' or 'deactivated'
     """
-    from openml._api import api_context
-
     legal_status = {"active", "deactivated"}
     if status not in legal_status:
         raise ValueError(f"Illegal status value. Legal values: {legal_status}")
@@ -739,8 +732,6 @@ def edit_dataset(
     -------
     Dataset id
     """
-    from openml._api import api_context
-
     if not isinstance(data_id, int):
         raise TypeError(f"`data_id` must be of type `int`, not {type(data_id)}.")
 
@@ -789,8 +780,6 @@ def fork_dataset(data_id: int) -> int:
     Dataset id of the forked dataset
 
     """
-    from openml._api import api_context
-
     return api_context.backend.datasets.fork(dataset_id=data_id)
 
 
@@ -815,8 +804,6 @@ def data_feature_add_ontology(data_id: int, index: int, ontology: str) -> bool:
     -------
     True or throws an OpenML server exception
     """
-    from openml._api import api_context
-
     return api_context.backend.datasets.feature_add_ontology(data_id, index, ontology)
 
 
@@ -840,8 +827,6 @@ def data_feature_remove_ontology(data_id: int, index: int, ontology: str) -> boo
     -------
     True or throws an OpenML server exception
     """
-    from openml._api import api_context
-
     return api_context.backend.datasets.feature_remove_ontology(data_id, index, ontology)
 
 
@@ -862,8 +847,6 @@ def _topic_add_dataset(data_id: int, topic: str) -> int:
     -------
     Dataset id
     """
-    from openml._api import api_context
-
     return api_context.backend.datasets.add_topic(data_id, topic)
 
 
@@ -884,8 +867,6 @@ def _topic_delete_dataset(data_id: int, topic: str) -> int:
     -------
     Dataset id
     """
-    from openml._api import api_context
-
     return api_context.backend.datasets.delete_topic(data_id, topic)
 
 
@@ -966,8 +947,6 @@ def _get_dataset_parquet(
     output_filename : Path, optional
         Location of the Parquet file if successfully downloaded, None otherwise.
     """
-    from openml._api import api_context
-
     return api_context.backend.datasets.download_dataset_parquet(description, download_all_files)
 
 
@@ -998,8 +977,6 @@ def _get_dataset_arff(
     output_filename : Path
         Location of ARFF file.
     """
-    from openml._api import api_context
-
     return api_context.backend.datasets.download_dataset_arff(description)
 
 
@@ -1029,7 +1006,6 @@ def _get_dataset_features_file(
         Path of the cached dataset feature file
     """
     # cache directory not used here anymore
-    from openml._api import api_context
 
     return api_context.backend.datasets.download_features_file(dataset_id)
 
@@ -1060,7 +1036,6 @@ def _get_dataset_qualities_file(
         Path of the cached qualities file
     """
     # cache directory not used here anymore
-    from openml._api import api_context
 
     return api_context.backend.datasets.download_qualities_file(dataset_id)
 
@@ -1080,12 +1055,7 @@ def _get_online_dataset_arff(dataset_id: int) -> str | None:
     str or None
         A string representation of an ARFF file. Or None if file already exists.
     """
-    dataset_xml = openml._api_calls._perform_api_call(f"data/{dataset_id}", "get")
-    # build a dict from the xml.
-    # use the url from the dataset description and return the ARFF string
-    return openml._api_calls._download_text_file(
-        xmltodict.parse(dataset_xml)["oml:data_set_description"]["oml:url"],
-    )
+    return api_context.backend.datasets.get_online_dataset_arff(dataset_id)
 
 
 # TODO used only in tests
@@ -1102,9 +1072,7 @@ def _get_online_dataset_format(dataset_id: int) -> str:
     str
         Dataset format.
     """
-    dataset_xml = openml._api_calls._perform_api_call(f"data/{dataset_id}", "get")
-    # build a dict from the xml and get the format from the dataset description
-    return xmltodict.parse(dataset_xml)["oml:data_set_description"]["oml:format"].lower()  # type: ignore
+    return api_context.backend.datasets.get_online_dataset_format(dataset_id)
 
 
 def delete_dataset(dataset_id: int) -> bool:
@@ -1123,6 +1091,4 @@ def delete_dataset(dataset_id: int) -> bool:
     bool
         True if the deletion was successful. False otherwise.
     """
-    from openml._api import api_context
-
     return api_context.backend.datasets.delete(dataset_id)