Skip to content

Commit 89ad5bb

Browse files
yusun-nlpMyhs-phz
andauthored
[Datasets] add molecular_iq evaluation (#2431)
* add molecular_iq evaluation * fix molecular_core * fix molecular_core * update * Delete opencompass/datasets/moleculariq/__pycache__ directory * Delete opencompass/datasets/moleculariq/moleculariq_core/__pycache__ directory * Delete opencompass/datasets/moleculariq/moleculariq_core/_data/__pycache__ directory * Delete opencompass/datasets/moleculariq/moleculariq_core/_dynamic/__pycache__ directory * Delete opencompass/datasets/moleculariq/moleculariq_core/_nlp/__pycache__ directory * Delete opencompass/datasets/moleculariq/moleculariq_core/_pools/__pycache__ directory * Delete opencompass/datasets/moleculariq/moleculariq_core/rewards/__pycache__ directory * Delete opencompass/datasets/moleculariq/moleculariq_core/solver/__pycache__ directory * Add error handling for RDKit imports Wrap RDKit imports in a try-except block to handle potential import errors. * Add error handling for RDKit import Handle import error for RDKit library gracefully. * Update template_based_reaction_solver.py * Refactor SymbolicSolver class and bond patterns * Refactor RDKit import exception handling Moved exception handling for RDKit imports to the end and added initialization for SymbolicSolver class. * Add error handling for RDKit imports Handle import errors for RDKit library gracefully. --------- Co-authored-by: Myhs_phz <demarcia2014@126.com>
1 parent c7c022a commit 89ad5bb

31 files changed

Lines changed: 9930 additions & 0 deletions

.pre-commit-config.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ exclude: |
1212
opencompass/datasets/matbench/|
1313
opencompass/datasets/teval/|
1414
opencompass/datasets/NPHardEval/|
15+
opencompass/datasets/moleculariq/moleculariq_core/|
1516
opencompass/datasets/TheoremQA|
1617
opencompass/datasets/subjective/mtbench101.py|
1718
docs/zh_cn/advanced_guides/compassbench_intro.md |
Lines changed: 160 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,160 @@
1+
from opencompass.openicl.icl_prompt_template import PromptTemplate
2+
from opencompass.openicl.icl_retriever import ZeroRetriever
3+
from opencompass.openicl.icl_inferencer import GenInferencer
4+
from opencompass.datasets.moleculariq import MoleculariqDataset
5+
from opencompass.datasets.moleculariq import (
6+
MoleculariqCountEvaluator,
7+
MoleculariqIndexEvaluator,
8+
MoleculariqGenerationEvaluator,
9+
)
10+
11+
moleculariq_reader_cfg = dict(
12+
input_columns=['prompt'],
13+
output_column='ground_truth',
14+
)
15+
16+
# 论文 Table 1 使用的 system prompt(来自论文附录 B.2.2)
17+
system_prompt = """You are an expert chemist. Answer molecular property, understanding, structural analysis and molecular generation questions precisely and accurately.
18+
19+
CRITICAL: Only content within<answer></answer> tags will be extracted. ALWAYS return JSON format.
20+
21+
KEY REQUIREMENT: Use EXACT key names from the question. Never modify or invent keys.
22+
23+
INDEXING: Atoms are indexed from 0 to the end of the SMILES string from left to right. Only heavy atoms (skip [H], include [2H]/[3H]).
24+
Examples:
25+
- "CCO": C(0), C(1), O(2)
26+
- "CC(C)O": C(0), C(1), C(2), O(3)
27+
- "CC(=O)N": C(0), C(1), O(2), N(3)
28+
29+
ABSENT FEATURES: Use 0 for counts, [] for indices. Never null or omit.
30+
31+
ALWAYS USE JSON with EXACT keys from the question:
32+
33+
Single count (key from question: "alcohol count"):<answer>"alcohol count": 2</answer>
34+
<answer>"alcohol count": 0</answer> (if absent)
35+
36+
Single index (key from question: "ketone indices"):<answer>"ketone indices": [5]</answer>
37+
<answer>"ketone indices": []</answer> (if absent)
38+
39+
Multiple properties (keys from question: "ring count", "halogen indices"):<answer>"ring count": 2, "halogen indices": [3, 7]</answer>
40+
<answer>"ring count": 0, "halogen indices": []</answer> (if all absent)
41+
42+
Constraint generation:<answer>"smiles": "CC(O)C"</answer>
43+
44+
Include ALL requested properties. Never null or omit."""
45+
46+
# questions.py: with_key_hints
47+
system_prompt_with_key_hints = """You are an expert chemist specializing in molecular understanding, property calculations, structural analysis and molecular generation.
48+
49+
CRITICAL: Only content within <answer></answer> tags will be extracted as your response. Everything outside these tags is ignored.
50+
51+
KEY REQUIREMENT: Always use the EXACT key names provided in the question. Do not modify or create your own keys.
52+
53+
IMPORTANT: If a requested feature is not present in the molecule, you MUST return 0 for counts or [] for indices. Never null or omit.
54+
55+
INDEXING RULES:
56+
- Atom indices are 0-based
57+
- Atoms are numbered from 0 in the order they appear in the SMILES string from left to right
58+
- Regular hydrogens (implicit or explicit [H]) are NOT indexed
59+
- Isotopes ([2H], [3H]) ARE indexed as they appear
60+
- Examples:
61+
- "CCO": C(0), C(1), O(2)
62+
- "CC(C)O": C(0), C(1), C(2), O(3)
63+
- "CC(=O)N": C(0), C(1), O(2), N(3)
64+
65+
For SINGLE COUNT tasks:
66+
- Return a JSON object with the EXACT key from the question
67+
- Return 0 if the feature is absent
68+
- Examples: <answer>{"alcohol_group_count": 2}</answer>
69+
- For absent features: <answer>{"alcohol_group_count": 0}</answer>
70+
71+
For SINGLE INDEX tasks:
72+
- Return a JSON object with the EXACT key from the question
73+
- Return empty list [] if the feature is absent
74+
- Examples: <answer>{"alcohol_group_indices": [3, 7]}</answer>
75+
- For absent features: <answer>{"alcohol_group_indices": []}</answer>
76+
77+
For MULTIPLE COUNT tasks with key hints:
78+
- Return a JSON object using the EXACT keys provided
79+
- Each key maps to an integer count (0 if absent)
80+
- Example: <answer>{"alcohol_group_count": 2, "ketone_group_count": 0}</answer>
81+
82+
For MULTIPLE INDEX tasks with key hints:
83+
- Return a JSON object using the EXACT keys provided
84+
- Each key maps to a list of indices (empty list [] if absent)
85+
- Example: <answer>{"alcohol_group_indices": [3, 7], "ketone_group_indices": []}</answer>
86+
87+
For CONSTRAINT GENERATION tasks:
88+
- Return a JSON object with "smiles" as the key
89+
- Example: <answer>{"smiles": "CC(=O)CC(O)C"}</answer>"""
90+
91+
# questions.py: concise
92+
system_prompt_concise = """You are an expert chemist. Answer molecular property, understanding, structural analysis and molecular generation questions precisely and accurately.
93+
94+
CRITICAL: Only content within <answer></answer> tags will be extracted. ALWAYS return JSON format.
95+
96+
KEY REQUIREMENT: Use EXACT key names from the question. Never modify or invent keys.
97+
98+
INDEXING: Atoms are indexed from 0 to the end of the SMILES string from left to right. Only heavy atoms (skip [H], include [2H]/[3H]).
99+
Examples:
100+
- "CCO": C(0), C(1), O(2)
101+
- "CC(C)O": C(0), C(1), C(2), O(3)
102+
- "CC(=O)N": C(0), C(1), O(2), N(3)
103+
104+
ABSENT FEATURES: Use 0 for counts, [] for indices. Never null or omit.
105+
106+
ALWAYS USE JSON with EXACT keys from the question:
107+
108+
Single count (key from question: "alcohol_count"):
109+
<answer>{"alcohol_count": 2}</answer>
110+
<answer>{"alcohol_count": 0}</answer> (if absent)
111+
112+
Single index (key from question: "ketone_indices"):
113+
<answer>{"ketone_indices": [5]}</answer>
114+
<answer>{"ketone_indices": []}</answer> (if absent)
115+
116+
Multiple properties (keys from question: "ring_count", "halogen_indices"):
117+
<answer>{"ring_count": 2, "halogen_indices": [3, 7]}</answer>
118+
<answer>{"ring_count": 0, "halogen_indices": []}</answer> (if all absent)
119+
120+
Constraint generation:
121+
<answer>{"smiles": "CC(O)C"}</answer>
122+
123+
Include ALL requested properties. Never null or omit."""
124+
125+
_evaluator_map = {
126+
'count': MoleculariqCountEvaluator,
127+
'index': MoleculariqIndexEvaluator,
128+
'generation': MoleculariqGenerationEvaluator,
129+
}
130+
131+
moleculariq_datasets = []
132+
for _name in ['count', 'index', 'generation']:
133+
moleculariq_infer_cfg = dict(
134+
prompt_template=dict(
135+
type=PromptTemplate,
136+
template=dict(
137+
round=[
138+
dict(role='SYSTEM', prompt=system_prompt),
139+
dict(role='HUMAN', prompt='{prompt}'),
140+
]
141+
),
142+
),
143+
retriever=dict(type=ZeroRetriever),
144+
inferencer=dict(type=GenInferencer),
145+
)
146+
moleculariq_eval_cfg = dict(
147+
evaluator=dict(type=_evaluator_map[_name]),
148+
)
149+
150+
moleculariq_datasets.append(
151+
dict(
152+
abbr=f'MolecularIQ-{_name}',
153+
type=MoleculariqDataset,
154+
name=_name,
155+
path='opencompass/MolecularIQ',
156+
reader_cfg=moleculariq_reader_cfg,
157+
infer_cfg=moleculariq_infer_cfg,
158+
eval_cfg=moleculariq_eval_cfg,
159+
)
160+
)
Lines changed: 158 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,158 @@
1+
from opencompass.openicl.icl_raw_prompt_template import RawPromptTemplate
2+
from opencompass.openicl.icl_retriever import ZeroRetriever
3+
from opencompass.openicl.icl_inferencer import GenInferencer
4+
from opencompass.datasets.moleculariq import MoleculariqDataset
5+
from opencompass.datasets.moleculariq import (
6+
MoleculariqCountEvaluator,
7+
MoleculariqIndexEvaluator,
8+
MoleculariqGenerationEvaluator,
9+
)
10+
11+
moleculariq_reader_cfg = dict(
12+
input_columns=['prompt'],
13+
output_column='ground_truth',
14+
)
15+
16+
# 论文 Table 1 使用的 system prompt(来自论文附录 B.2.2)
17+
system_prompt = """You are an expert chemist. Answer molecular property, understanding, structural analysis and molecular generation questions precisely and accurately.
18+
19+
CRITICAL: Only content within<answer></answer> tags will be extracted. ALWAYS return JSON format.
20+
21+
KEY REQUIREMENT: Use EXACT key names from the question. Never modify or invent keys.
22+
23+
INDEXING: Atoms are indexed from 0 to the end of the SMILES string from left to right. Only heavy atoms (skip [H], include [2H]/[3H]).
24+
Examples:
25+
- "CCO": C(0), C(1), O(2)
26+
- "CC(C)O": C(0), C(1), C(2), O(3)
27+
- "CC(=O)N": C(0), C(1), O(2), N(3)
28+
29+
ABSENT FEATURES: Use 0 for counts, [] for indices. Never null or omit.
30+
31+
ALWAYS USE JSON with EXACT keys from the question:
32+
33+
Single count (key from question: "alcohol count"):<answer>"alcohol count": 2</answer>
34+
<answer>"alcohol count": 0</answer> (if absent)
35+
36+
Single index (key from question: "ketone indices"):<answer>"ketone indices": [5]</answer>
37+
<answer>"ketone indices": []</answer> (if absent)
38+
39+
Multiple properties (keys from question: "ring count", "halogen indices"):<answer>"ring count": 2, "halogen indices": [3, 7]</answer>
40+
<answer>"ring count": 0, "halogen indices": []</answer> (if all absent)
41+
42+
Constraint generation:<answer>"smiles": "CC(O)C"</answer>
43+
44+
Include ALL requested properties. Never null or omit."""
45+
46+
# questions.py: with_key_hints
47+
system_prompt_with_key_hints = """You are an expert chemist specializing in molecular understanding, property calculations, structural analysis and molecular generation.
48+
49+
CRITICAL: Only content within <answer></answer> tags will be extracted as your response. Everything outside these tags is ignored.
50+
51+
KEY REQUIREMENT: Always use the EXACT key names provided in the question. Do not modify or create your own keys.
52+
53+
IMPORTANT: If a requested feature is not present in the molecule, you MUST return 0 for counts or [] for indices. Never null or omit.
54+
55+
INDEXING RULES:
56+
- Atom indices are 0-based
57+
- Atoms are numbered from 0 in the order they appear in the SMILES string from left to right
58+
- Regular hydrogens (implicit or explicit [H]) are NOT indexed
59+
- Isotopes ([2H], [3H]) ARE indexed as they appear
60+
- Examples:
61+
- "CCO": C(0), C(1), O(2)
62+
- "CC(C)O": C(0), C(1), C(2), O(3)
63+
- "CC(=O)N": C(0), C(1), O(2), N(3)
64+
65+
For SINGLE COUNT tasks:
66+
- Return a JSON object with the EXACT key from the question
67+
- Return 0 if the feature is absent
68+
- Examples: <answer>{"alcohol_group_count": 2}</answer>
69+
- For absent features: <answer>{"alcohol_group_count": 0}</answer>
70+
71+
For SINGLE INDEX tasks:
72+
- Return a JSON object with the EXACT key from the question
73+
- Return empty list [] if the feature is absent
74+
- Examples: <answer>{"alcohol_group_indices": [3, 7]}</answer>
75+
- For absent features: <answer>{"alcohol_group_indices": []}</answer>
76+
77+
For MULTIPLE COUNT tasks with key hints:
78+
- Return a JSON object using the EXACT keys provided
79+
- Each key maps to an integer count (0 if absent)
80+
- Example: <answer>{"alcohol_group_count": 2, "ketone_group_count": 0}</answer>
81+
82+
For MULTIPLE INDEX tasks with key hints:
83+
- Return a JSON object using the EXACT keys provided
84+
- Each key maps to a list of indices (empty list [] if absent)
85+
- Example: <answer>{"alcohol_group_indices": [3, 7], "ketone_group_indices": []}</answer>
86+
87+
For CONSTRAINT GENERATION tasks:
88+
- Return a JSON object with "smiles" as the key
89+
- Example: <answer>{"smiles": "CC(=O)CC(O)C"}</answer>"""
90+
91+
# questions.py: concise
92+
system_prompt_concise = """You are an expert chemist. Answer molecular property, understanding, structural analysis and molecular generation questions precisely and accurately.
93+
94+
CRITICAL: Only content within <answer></answer> tags will be extracted. ALWAYS return JSON format.
95+
96+
KEY REQUIREMENT: Use EXACT key names from the question. Never modify or invent keys.
97+
98+
INDEXING: Atoms are indexed from 0 to the end of the SMILES string from left to right. Only heavy atoms (skip [H], include [2H]/[3H]).
99+
Examples:
100+
- "CCO": C(0), C(1), O(2)
101+
- "CC(C)O": C(0), C(1), C(2), O(3)
102+
- "CC(=O)N": C(0), C(1), O(2), N(3)
103+
104+
ABSENT FEATURES: Use 0 for counts, [] for indices. Never null or omit.
105+
106+
ALWAYS USE JSON with EXACT keys from the question:
107+
108+
Single count (key from question: "alcohol_count"):
109+
<answer>{"alcohol_count": 2}</answer>
110+
<answer>{"alcohol_count": 0}</answer> (if absent)
111+
112+
Single index (key from question: "ketone_indices"):
113+
<answer>{"ketone_indices": [5]}</answer>
114+
<answer>{"ketone_indices": []}</answer> (if absent)
115+
116+
Multiple properties (keys from question: "ring_count", "halogen_indices"):
117+
<answer>{"ring_count": 2, "halogen_indices": [3, 7]}</answer>
118+
<answer>{"ring_count": 0, "halogen_indices": []}</answer> (if all absent)
119+
120+
Constraint generation:
121+
<answer>{"smiles": "CC(O)C"}</answer>
122+
123+
Include ALL requested properties. Never null or omit."""
124+
125+
_evaluator_map = {
126+
'count': MoleculariqCountEvaluator,
127+
'index': MoleculariqIndexEvaluator,
128+
'generation': MoleculariqGenerationEvaluator,
129+
}
130+
131+
moleculariq_datasets = []
132+
for _name in ['count', 'index', 'generation']:
133+
moleculariq_infer_cfg = dict(
134+
prompt_template=dict(
135+
type=RawPromptTemplate,
136+
messages=[
137+
{'role': 'system', 'content': system_prompt},
138+
{'role': 'user', 'content': '{prompt}'},
139+
],
140+
),
141+
retriever=dict(type=ZeroRetriever),
142+
inferencer=dict(type=GenInferencer),
143+
)
144+
moleculariq_eval_cfg = dict(
145+
evaluator=dict(type=_evaluator_map[_name]),
146+
)
147+
148+
moleculariq_datasets.append(
149+
dict(
150+
abbr=f'MolecularIQ-{_name}',
151+
type=MoleculariqDataset,
152+
name=_name,
153+
path='opencompass/MolecularIQ',
154+
reader_cfg=moleculariq_reader_cfg,
155+
infer_cfg=moleculariq_infer_cfg,
156+
eval_cfg=moleculariq_eval_cfg,
157+
)
158+
)

opencompass/datasets/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,7 @@
123123
from .MMLUArabic import * # noqa: F401, F403
124124
from .mmmlu import * # noqa: F401, F403
125125
from .mol_instructions_chem import * # noqa: F401, F403
126+
from .moleculariq import * # noqa: F401, F403
126127
from .multipl_e import * # noqa: F401, F403
127128
from .multirc import * # noqa: F401, F403
128129
from .musr import * # noqa: F401, F403
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
from .moleculariq import MoleculariqCountEvaluator # noqa: F401, F403
2+
from .moleculariq import MoleculariqDataset # noqa: F401, F403
3+
from .moleculariq import MoleculariqGenerationEvaluator # noqa: F401, F403
4+
from .moleculariq import MoleculariqIndexEvaluator # noqa: F401, F403

0 commit comments

Comments
 (0)