Skip to content

Commit 24582ef

Browse files
committed
use cache, new enpoints
1 parent 5080752 commit 24582ef

3 files changed

Lines changed: 39 additions & 46 deletions

File tree

openml/_api/resources/base.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,12 @@ def add_topic(self, data_id: int, topic: str) -> int: ...
124124
@abstractmethod
125125
def delete_topic(self, data_id: int, topic: str) -> int: ...
126126

127+
@abstractmethod
128+
def get_online_dataset_format(self, dataset_id: int) -> str: ...
129+
130+
@abstractmethod
131+
def get_online_dataset_arff(self, dataset_id: int) -> str | None: ...
132+
127133

128134
class TasksAPI(ResourceAPI, ABC):
129135
@abstractmethod

openml/_api/resources/datasets.py

Lines changed: 30 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -28,8 +28,6 @@
2828
if TYPE_CHECKING:
2929
from requests import Response
3030

31-
import openml
32-
3331

3432
import pandas as pd
3533
import xmltodict
@@ -158,6 +156,7 @@ def delete(self, dataset_id: int) -> bool:
158156
bool
159157
True if the deletion was successful. False otherwise.
160158
"""
159+
# TODO will be updated later from the utils
161160
url_suffix = f"data/{dataset_id}"
162161
try:
163162
result_xml = self._http.delete(url_suffix)
@@ -267,7 +266,7 @@ def edit(
267266
Dataset id
268267
"""
269268
# compose data edit parameters as xml
270-
form_data = {"data_id": dataset_id} # type: openml._api_calls.DATA_TYPE
269+
form_data = {"data_id": dataset_id} # type: dict[str, str | int]
271270
xml = OrderedDict() # type: 'OrderedDict[str, OrderedDict]'
272271
xml["oml:data_edit_parameters"] = OrderedDict()
273272
xml["oml:data_edit_parameters"]["@xmlns:oml"] = "http://openml.org/openml"
@@ -290,7 +289,7 @@ def edit(
290289

291290
file_elements = {
292291
"edit_parameters": ("description.xml", xmltodict.unparse(xml)),
293-
} # type: openml._api_calls.FILE_ELEMENTS_TYPE
292+
} # type: dict[str, str | tuple[str, str]]
294293
result_xml = self._http.post("data/edit", data=form_data, files=file_elements).text
295294
result = xmltodict.parse(result_xml)
296295
dataset_id = result["oml:data_edit"]["oml:id"]
@@ -352,7 +351,7 @@ def status_update(self, dataset_id: int, status: Literal["active", "deactivated"
352351
if status not in legal_status:
353352
raise ValueError(f"Illegal status value. Legal values: {legal_status}")
354353

355-
data: openml._api_calls.DATA_TYPE = {"data_id": dataset_id, "status": status}
354+
data: dict[str, str | int] = {"data_id": dataset_id, "status": status}
356355
result_xml = self._http.post("data/status/update", data=data).text
357356
result = xmltodict.parse(result_xml)
358357
server_data_id = result["oml:data_status_update"]["oml:id"]
@@ -692,19 +691,30 @@ def download_dataset_arff(
692691
return output_file_path
693692

694693
def add_topic(self, data_id: int, topic: str) -> int:
695-
form_data = {"data_id": data_id, "topic": topic} # type: openml._api_calls.DATA_TYPE
694+
form_data = {"data_id": data_id, "topic": topic} # type: dict[str, str | int]
696695
result_xml = self._http.post("data/topicadd", data=form_data)
697696
result = xmltodict.parse(result_xml)
698697
data_id = result["oml:data_topic"]["oml:id"]
699698
return int(data_id)
700699

701700
def delete_topic(self, data_id: int, topic: str) -> int:
702-
form_data = {"data_id": data_id, "topic": topic} # type: openml._api_calls.DATA_TYPE
701+
form_data = {"data_id": data_id, "topic": topic} # type: dict[str, str | int]
703702
result_xml = self._http.post("data/topicdelete", data=form_data)
704703
result = xmltodict.parse(result_xml)
705704
data_id = result["oml:data_topic"]["oml:id"]
706705
return int(data_id)
707706

707+
def get_online_dataset_format(self, dataset_id: int) -> str:
708+
dataset_xml = self._http.get(f"data/{dataset_id}")
709+
# build a dict from the xml and get the format from the dataset description
710+
return xmltodict.parse(dataset_xml)["oml:data_set_description"]["oml:format"].lower() # type: ignore
711+
712+
def get_online_dataset_arff(self, dataset_id: int) -> str | None:
713+
dataset_xml = self._http.get(f"data/{dataset_id}")
714+
# build a dict from the xml.
715+
# use the url from the dataset description and return the ARFF string
716+
return str(self.download_dataset_arff(xmltodict.parse(dataset_xml)))
717+
708718

709719
class DatasetsV2(DatasetsAPI):
710720
def get(
@@ -717,7 +727,7 @@ def get(
717727
download_all_files: bool = False, # noqa: FBT002
718728
) -> OpenMLDataset:
719729
path = f"datasets/{dataset_id}"
720-
response = self._http.get(path)
730+
response = self._http.get(path, use_cache=True)
721731
json_content = response.json()
722732

723733
try:
@@ -852,7 +862,7 @@ def status_update(self, dataset_id: int, status: Literal["active", "deactivated"
852862
if status not in legal_status:
853863
raise ValueError(f"Illegal status value. Legal values: {legal_status}")
854864

855-
data: openml._api_calls.DATA_TYPE = {"dataset_id": dataset_id, "status": status}
865+
data: dict[str, str | int] = {"dataset_id": dataset_id, "status": status}
856866
result = self._http.post("datasets/status/update", json=data).json()
857867
server_data_id = result["dataset_id"]
858868
server_status = result["status"]
@@ -1114,3 +1124,14 @@ def add_topic(self, data_id: int, topic: str) -> int:
11141124

11151125
def delete_topic(self, data_id: int, topic: str) -> int:
11161126
raise NotImplementedError()
1127+
1128+
def get_online_dataset_format(self, dataset_id: int) -> str:
1129+
dataset_json = self._http.get(f"datasets/{dataset_id}").text
1130+
# build a dict from the xml and get the format from the dataset description
1131+
return dataset_json["data_set_description"]["format"].lower() # type: ignore
1132+
1133+
def get_online_dataset_arff(self, dataset_id: int) -> str | None:
1134+
dataset_json = self._http.get(f"datasets/{dataset_id}").json()
1135+
# build a dict from the xml.
1136+
# use the url from the dataset description and return the ARFF string
1137+
return str(self.download_dataset_arff(dataset_json))

openml/datasets/functions.py

Lines changed: 3 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717

1818
import openml._api_calls
1919
import openml.utils
20+
from openml._api import api_context
2021
from openml.exceptions import (
2122
OpenMLServerError,
2223
)
@@ -56,8 +57,6 @@ def list_qualities() -> list[str]:
5657
-------
5758
list
5859
"""
59-
from openml._api import api_context
60-
6160
return api_context.backend.datasets.list_qualities()
6261

6362

@@ -111,8 +110,6 @@ def list_datasets(
111110
If qualities are calculated for the dataset, some of
112111
these are also included as columns.
113112
"""
114-
from openml._api import api_context
115-
116113
listing_call = partial(
117114
api_context.backend.datasets.list,
118115
data_id=data_id,
@@ -375,8 +372,6 @@ def get_dataset(
375372
if did_cache_dir.exists():
376373
_remove_cache_dir_for_id(DATASETS_CACHE_DIR_NAME, did_cache_dir)
377374

378-
from openml._api import api_context
379-
380375
return api_context.backend.datasets.get(
381376
dataset_id,
382377
download_data,
@@ -659,8 +654,6 @@ def status_update(data_id: int, status: Literal["active", "deactivated"]) -> Non
659654
status : str,
660655
'active' or 'deactivated'
661656
"""
662-
from openml._api import api_context
663-
664657
legal_status = {"active", "deactivated"}
665658
if status not in legal_status:
666659
raise ValueError(f"Illegal status value. Legal values: {legal_status}")
@@ -739,8 +732,6 @@ def edit_dataset(
739732
-------
740733
Dataset id
741734
"""
742-
from openml._api import api_context
743-
744735
if not isinstance(data_id, int):
745736
raise TypeError(f"`data_id` must be of type `int`, not {type(data_id)}.")
746737

@@ -789,8 +780,6 @@ def fork_dataset(data_id: int) -> int:
789780
Dataset id of the forked dataset
790781
791782
"""
792-
from openml._api import api_context
793-
794783
return api_context.backend.datasets.fork(dataset_id=data_id)
795784

796785

@@ -815,8 +804,6 @@ def data_feature_add_ontology(data_id: int, index: int, ontology: str) -> bool:
815804
-------
816805
True or throws an OpenML server exception
817806
"""
818-
from openml._api import api_context
819-
820807
return api_context.backend.datasets.feature_add_ontology(data_id, index, ontology)
821808

822809

@@ -840,8 +827,6 @@ def data_feature_remove_ontology(data_id: int, index: int, ontology: str) -> boo
840827
-------
841828
True or throws an OpenML server exception
842829
"""
843-
from openml._api import api_context
844-
845830
return api_context.backend.datasets.feature_remove_ontology(data_id, index, ontology)
846831

847832

@@ -862,8 +847,6 @@ def _topic_add_dataset(data_id: int, topic: str) -> int:
862847
-------
863848
Dataset id
864849
"""
865-
from openml._api import api_context
866-
867850
return api_context.backend.datasets.add_topic(data_id, topic)
868851

869852

@@ -884,8 +867,6 @@ def _topic_delete_dataset(data_id: int, topic: str) -> int:
884867
-------
885868
Dataset id
886869
"""
887-
from openml._api import api_context
888-
889870
return api_context.backend.datasets.delete_topic(data_id, topic)
890871

891872

@@ -966,8 +947,6 @@ def _get_dataset_parquet(
966947
output_filename : Path, optional
967948
Location of the Parquet file if successfully downloaded, None otherwise.
968949
"""
969-
from openml._api import api_context
970-
971950
return api_context.backend.datasets.download_dataset_parquet(description, download_all_files)
972951

973952

@@ -998,8 +977,6 @@ def _get_dataset_arff(
998977
output_filename : Path
999978
Location of ARFF file.
1000979
"""
1001-
from openml._api import api_context
1002-
1003980
return api_context.backend.datasets.download_dataset_arff(description)
1004981

1005982

@@ -1029,7 +1006,6 @@ def _get_dataset_features_file(
10291006
Path of the cached dataset feature file
10301007
"""
10311008
# cache directory not used here anymore
1032-
from openml._api import api_context
10331009

10341010
return api_context.backend.datasets.download_features_file(dataset_id)
10351011

@@ -1060,7 +1036,6 @@ def _get_dataset_qualities_file(
10601036
Path of the cached qualities file
10611037
"""
10621038
# cache directory not used here anymore
1063-
from openml._api import api_context
10641039

10651040
return api_context.backend.datasets.download_qualities_file(dataset_id)
10661041

@@ -1080,12 +1055,7 @@ def _get_online_dataset_arff(dataset_id: int) -> str | None:
10801055
str or None
10811056
A string representation of an ARFF file. Or None if file already exists.
10821057
"""
1083-
dataset_xml = openml._api_calls._perform_api_call(f"data/{dataset_id}", "get")
1084-
# build a dict from the xml.
1085-
# use the url from the dataset description and return the ARFF string
1086-
return openml._api_calls._download_text_file(
1087-
xmltodict.parse(dataset_xml)["oml:data_set_description"]["oml:url"],
1088-
)
1058+
return api_context.backend.datasets.get_online_dataset_arff(dataset_id)
10891059

10901060

10911061
# TODO used only in tests
@@ -1102,9 +1072,7 @@ def _get_online_dataset_format(dataset_id: int) -> str:
11021072
str
11031073
Dataset format.
11041074
"""
1105-
dataset_xml = openml._api_calls._perform_api_call(f"data/{dataset_id}", "get")
1106-
# build a dict from the xml and get the format from the dataset description
1107-
return xmltodict.parse(dataset_xml)["oml:data_set_description"]["oml:format"].lower() # type: ignore
1075+
return api_context.backend.datasets.get_online_dataset_format(dataset_id)
11081076

11091077

11101078
def delete_dataset(dataset_id: int) -> bool:
@@ -1123,6 +1091,4 @@ def delete_dataset(dataset_id: int) -> bool:
11231091
bool
11241092
True if the deletion was successful. False otherwise.
11251093
"""
1126-
from openml._api import api_context
1127-
11281094
return api_context.backend.datasets.delete(dataset_id)

0 commit comments

Comments
 (0)