Skip to content

Commit e5939f0

Browse files
authored
Merge pull request #499 from iiasa/enh/newclimate
Add `.tools.newclimate`
2 parents 5974ba8 + 5d56c4b commit e5939f0

9 files changed

Lines changed: 756 additions & 5 deletions

File tree

.github/CODEOWNERS

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,3 +74,6 @@
7474
/doc/project/sparccle.rst @adrivinca
7575
/doc/project/ssp.rst @OFR-IIASA
7676
/doc/project/uptake.rst @ywpratama
77+
78+
/message_ix_models/tools/newclimate @khaeru
79+
/message_ix_models/tests/tools/test_newclimate.py @khaeru

doc/api/data-sources.rst

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -180,3 +180,11 @@ These files were characterized by:
180180
.. [1] the column is sometimes labelled "UNIT", but the contents appear to be the same.
181181
182182
This source is discontinued and will not publish subsequent editions of the data.
183+
184+
.. _tools-newclimate:
185+
186+
NewClimate Institute (:mod:`.tools.newclimate`)
187+
===============================================
188+
189+
.. automodule:: message_ix_models.tools.newclimate
190+
:members:

doc/api/tools.rst

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -119,6 +119,8 @@ Policies (:mod:`.tools.policy`)
119119
.. automodule:: message_ix_models.tools.policy
120120
:members:
121121

122+
See also :ref:`tools-newclimate`.
123+
122124
.. _tools-wb:
123125

124126
World Bank structures (:mod:`.tools.wb`)
@@ -127,7 +129,6 @@ World Bank structures (:mod:`.tools.wb`)
127129
.. automodule:: message_ix_models.tools.wb
128130
:members:
129131

130-
131132
Tools for scenario manipulation
132133
===============================
133134

doc/whatsnew.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ Next release
55
============
66

77
- Add IAMC code list :class:`~.iamc.structure.CL_SCENARIO_DIAGNOSTIC` (:pull:`501`).
8+
- New module :ref:`tools-newclimate` (:pull:`499`).
89
- Add :doc:`/api/model-bmt` (:pull:`433`).
910

1011
- Add
Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,89 @@
1+
import pytest
2+
3+
from message_ix_models.testing import KEY as STASH_KEY
4+
from message_ix_models.tools.newclimate import SECTOR, fetch, get, read
5+
from message_ix_models.tools.newclimate.structure import STRINGENCY
6+
7+
8+
class TestSTRINGENCY:
9+
def test_int(self) -> None:
10+
"""Lookup of str containing only digits gives an enumeration member."""
11+
assert STRINGENCY["1"] == STRINGENCY._1
12+
13+
14+
@pytest.mark.parametrize(
15+
"version",
16+
("2024", "2023", "2022", "2021", "2020", "2019"),
17+
)
18+
def test_fetch(version: str) -> None:
19+
# File can be fetched
20+
p = fetch(version)
21+
22+
assert p.exists()
23+
24+
25+
@pytest.mark.parametrize(
26+
"version, N_total, N_transport",
27+
(
28+
("2024", 6507, 1298),
29+
("2023", 6273, 1246),
30+
("2022", 5883, 1203),
31+
pytest.param("2021", 1, 1, marks=pytest.mark.xfail(raises=NotImplementedError)),
32+
pytest.param("2020", 1, 1, marks=pytest.mark.xfail(raises=NotImplementedError)),
33+
pytest.param("2019", 1, 1, marks=pytest.mark.xfail(raises=NotImplementedError)),
34+
),
35+
)
36+
def test_get(version: str, N_total: int, N_transport: int) -> None:
37+
# Data can be fetched and read
38+
result = get(version)
39+
40+
# Expected number of records
41+
N = len(result)
42+
assert N_total == N
43+
44+
# Objects can be filtered using enumerations
45+
subset = {k: p for k, p in result.items() if SECTOR.Transport in p.sector}
46+
47+
N = len(subset)
48+
assert N_transport == N
49+
50+
51+
def test_read0() -> None:
52+
result = get("2024")
53+
54+
# Retrieve one entry
55+
p = result["211000001"]
56+
57+
# Enumerated field is parsed to a list of enum items
58+
assert [SECTOR.Electricity_and_heat, SECTOR.Renewables] == p.sector
59+
60+
# Geo field contains a pycountry object
61+
assert 1 == len(p.geo)
62+
# …that can be used to access various fields, as needed
63+
assert "ITA" == p.country.alpha_3
64+
assert "Italy" == p.country.name
65+
66+
67+
@pytest.mark.parametrize(
68+
"filename, N_total",
69+
(
70+
("Canada_edits_additions0.csv", 18),
71+
("Canada_edits_additions1.csv", 1),
72+
("climate_policy_database_policies_2025.csv", 6507),
73+
),
74+
)
75+
def test_read_local_data(
76+
pytestconfig: pytest.Config, filename: str, N_total: int
77+
) -> None:
78+
"""Test files in user's local data path."""
79+
path = pytestconfig.stash[STASH_KEY["user-local-data"]].joinpath(
80+
"newclimate", filename
81+
)
82+
83+
if path.exists():
84+
# Function runs
85+
result = read(path)
86+
87+
# Expected number of records
88+
N = len(result)
89+
assert N_total == N
Lines changed: 215 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,215 @@
1+
"""Handle data from the NewClimate Institute's Climate Policy Database (CPDB).
2+
3+
This module provides:
4+
5+
- :class:`.NewClimatePolicy`, a concrete subclass of the abstract/generic
6+
:class:`.Policy`, that reflects the data model appearing in the CPDB.
7+
8+
- Enumerations that reflect values appearing in fields of the database which appear to
9+
be enumerated (as opposed to free text):
10+
:class:`HIGH_IMPACT`,
11+
:class:`JURISDICTION`,
12+
:class:`OBJECTIVE`,
13+
:class:`SECTOR`,
14+
:class:`STATUS`,
15+
:class:`STRINGENCY`,
16+
:class:`TYPE`, and
17+
:class:`UPDATE`.
18+
19+
- A method :meth:`.NewClimatePolicy.from_csv_dict` that interprets the CSV data
20+
format in which the database is expressed.
21+
22+
- Functions to :func:`fetch` versions of the database from Zenodo, :func:`read` into
23+
collections of Python objects, or do both (:func:`get`).
24+
25+
These enable programmatic use of the information in the database. For example:
26+
27+
.. code-block:: python
28+
29+
from message_ix_models.tools.newclimate import SECTOR, get
30+
from pycountry import countries
31+
32+
# Fetch and parse the 2024 edition of the database
33+
policies = get("2024")
34+
print(len(policies)) # 6507 objects
35+
36+
# Filter the dict to a list of policy objects matching a certain sector
37+
p_transport = list(filter(lambda p: SECTOR.Transport in p.sector, policies.values()))
38+
print(len(p_transport)) # 1298 objects
39+
40+
# Filter for any policies concerning the country of Austria, or the EU
41+
match = {pycountry.lookup("Austria"), "European Union"}
42+
p_AUT = list(filter(lambda p: set(p.geo) & match, policies.values()))
43+
print(len(p_AUT))) # 259 objects
44+
45+
.. todo:: Extend the module:
46+
47+
- Serialize :class:`.NewClimatePolicy` objects in 1 or more formats, preferably
48+
standards-based.
49+
- :func:`fetch` versions of the database more recent than the latest Zenodo record,
50+
using the `cpdb_api package
51+
<https://github.com/https-github-com-NewClimateInstitute/CPDB-API>`_ or other code.
52+
- Convert to/from other data models.
53+
"""
54+
55+
import csv
56+
import logging
57+
from functools import cache
58+
from typing import TYPE_CHECKING
59+
60+
from .structure import (
61+
HIGH_IMPACT,
62+
JURISDICTION,
63+
OBJECTIVE,
64+
SECTOR,
65+
STATUS,
66+
STRINGENCY,
67+
TYPE,
68+
UPDATE,
69+
NewClimatePolicy,
70+
)
71+
72+
if TYPE_CHECKING:
73+
from pathlib import Path
74+
75+
__all__ = [
76+
"HIGH_IMPACT",
77+
"JURISDICTION",
78+
"NewClimatePolicy",
79+
"OBJECTIVE",
80+
"SECTOR",
81+
"STATUS",
82+
"STRINGENCY",
83+
"TYPE",
84+
"UPDATE",
85+
"read",
86+
"get",
87+
"fetch",
88+
]
89+
90+
log = logging.getLogger(__name__)
91+
92+
#: Pooch information for fetching files from the static version of the database.
93+
SOURCE = { # noqa: E501
94+
"newclimate-2024": dict(
95+
pooch_args=dict(
96+
base_url="doi:10.5281/zenodo.15432946",
97+
registry={
98+
"ClimatePolicyDatabase_v2024.csv": (
99+
"sha256:e893745bc26d225d8e91d063eb1fdbcbb5da4a51ce05d28ce5b9f51f6ef4408f"
100+
),
101+
},
102+
),
103+
),
104+
"newclimate-2023": dict(
105+
pooch_args=dict(
106+
base_url="doi:10.5281/zenodo.10869734",
107+
registry={
108+
"ClimatePolicyDatabase_v2023.xlsx": (
109+
"sha256:bdce700c6b0c2eeb7fa06584cb8523793b64ec5799d91ae65818209aaf9de682"
110+
),
111+
},
112+
),
113+
),
114+
"newclimate-2022": dict(
115+
pooch_args=dict(
116+
base_url="doi:10.5281/zenodo.7774473",
117+
registry={
118+
"ClimatePolicyDatabase_v2022.csv": (
119+
"sha256:fe431e41c4c2fb8513d6718fba6ba3bc0a1fd2c5b9016256a106b998f5f48946"
120+
),
121+
},
122+
),
123+
),
124+
"newclimate-2021": dict(
125+
pooch_args=dict(
126+
base_url="doi:10.5281/zenodo.7774471",
127+
registry={
128+
"ClimatePolicyDatabase_v2021.xlsx": (
129+
"sha256:d880c2c94c7d8da84bb9cf8d315faf7230e4965cbc679ac1783222ecfe84062a"
130+
),
131+
},
132+
),
133+
),
134+
"newclimate-2020": dict(
135+
pooch_args=dict(
136+
base_url="doi:10.5281/zenodo.7774462",
137+
registry={
138+
"ClimatePolicyDatabase_v2020.xlsx": (
139+
"sha256:08818156401200ec094985c34250ef65cea6ff5246cbbeb1d0ade317f8fdaa0c"
140+
),
141+
},
142+
),
143+
),
144+
"newclimate-2019": dict(
145+
pooch_args=dict(
146+
base_url="doi:10.5281/zenodo.7774110",
147+
registry={
148+
"ClimatePolicyDatabase _v2019.xlsx": (
149+
"sha256:c28cdd613496d503ae00bacf637fc052128e04361580110829843b4bf0235368"
150+
),
151+
},
152+
)
153+
),
154+
}
155+
156+
157+
def fetch(version: str) -> "Path":
158+
"""Retrieve data for `version` of the Climate Policy Database from Zenodo."""
159+
from message_ix_models.util import pooch
160+
161+
# Ensure sources for this module are registered
162+
pooch.SOURCE.update(SOURCE)
163+
164+
# Construct the key
165+
source_id = f"newclimate-{version}"
166+
167+
return pooch.fetch(**pooch.SOURCE[source_id], extra_cache_path="newclimate")[0]
168+
169+
170+
def get(version: str) -> dict[str, NewClimatePolicy]:
171+
""":func:`fetch` and then :func:`read` data for `version` of the database."""
172+
f_source = fetch(version)
173+
174+
if f_source.suffix == ".xlsx":
175+
# Convert Excel to CSV
176+
import pandas as pd
177+
178+
f_read = f_source.with_suffix(".csv")
179+
if not f_read.exists():
180+
log.info(f"Unpack {f_source} to {f_read}")
181+
pd.read_excel(f_source).to_csv(f_read, index=False)
182+
else:
183+
f_read = f_source
184+
185+
# - Force use of UTF-8 on macOS and Windows.
186+
# - The 2022 CSV file is not in UTF-8 format; use a different encoding.
187+
kwargs = dict(encoding="latin-1" if version == "2022" else "utf-8")
188+
189+
try:
190+
return read(f_read, **kwargs)
191+
except Exception as e:
192+
if version in ("2021", "2020", "2019"):
193+
raise NotImplementedError("Read 2021 and earlier data format") from e
194+
else: # pragma: no cover
195+
raise
196+
197+
198+
@cache
199+
def read(path: "Path", **kwargs) -> dict[str, NewClimatePolicy]:
200+
"""Read a CSV file into a :class:`dict` of Policy objects.
201+
202+
Returns
203+
-------
204+
dict
205+
Keys are :attr:`.NewClimatePolicy.id`. If the file contains records with the
206+
same IDs, only the last appears, and a warning is logged.
207+
"""
208+
with open(path, **kwargs) as f:
209+
policies = [NewClimatePolicy.from_csv_dict(row) for row in csv.DictReader(f)]
210+
211+
result = {p.id: p for p in policies}
212+
if len(result) < len(policies):
213+
log.warning(f"{len(policies) - len(result)} duplicate IDs in `path`")
214+
215+
return result

0 commit comments

Comments
 (0)