Skip to content

Commit 943cf3d

Browse files
rudolf-adamkovicclaude
authored andcommitted
Add support for the Elm programming language
Based on PR #188. Creates a language-specific Elm.dockerfile instead of modifying the main Dockerfile. Tested with claude-haiku-4-5 on HumanEval (1 completion, temperature 0.2): pass@1 = 73.1%. Co-Authored-By: Rudolf Adamkovic <rudolf@adamkovic.org> Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 9e19574 commit 943cf3d

6 files changed

Lines changed: 238 additions & 1 deletion

File tree

Lines changed: 167 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,167 @@
1+
# This script translates problems from the OpenAI HumanEval dataset into Elm.
2+
import re
3+
import ast
4+
from typing import List
5+
6+
7+
class Translator:
8+
9+
stop = ["\n\n", "\n--", "\ntype", "\nmodule"]
10+
11+
def __init__(self):
12+
self.type = None
13+
14+
def translate_identifier(self, name: str) -> str:
15+
parts = name.lower().split("_")
16+
return parts[0] + "".join(p.capitalize() for p in parts[1:])
17+
18+
def file_ext(self):
19+
return "elm"
20+
21+
def translate_type(self, t):
22+
match t:
23+
case ast.Subscript(ast.Name(id), slice, _ctx):
24+
match id:
25+
case "List":
26+
inner = self.translate_type(slice)
27+
return f"List {inner}" if " " not in inner else f"List ({inner})"
28+
case "Tuple":
29+
match slice:
30+
case ast.Tuple(elts, _ctx):
31+
tys = [self.translate_type(e) for e in elts]
32+
return "(" + ", ".join(tys) + ")"
33+
case _other:
34+
raise Exception(f"Bad tuple: {slice}")
35+
case "Dict":
36+
match slice:
37+
case ast.Tuple([k, v], _ctx):
38+
kt = self.translate_type(k)
39+
vt = self.translate_type(v)
40+
return f"Dict.Dict {kt} {vt}"
41+
case _other:
42+
raise Exception(f"Bad dict: {slice}")
43+
case "Optional":
44+
inner = self.translate_type(slice)
45+
return f"Maybe {inner}" if " " not in inner else f"Maybe ({inner})"
46+
case "Union":
47+
raise Exception("Union is not supported")
48+
case other:
49+
raise Exception(f"Bad generic {other}")
50+
case ast.Name("int") | "int":
51+
return "Int"
52+
case ast.Name("float") | "float":
53+
return "Float"
54+
case ast.Name("bool"):
55+
return "Bool"
56+
case ast.Name("str") | "str":
57+
return "String"
58+
case None:
59+
raise Exception("implicitly untyped argument")
60+
case ast.Name("Any"):
61+
raise Exception("Any is not supported")
62+
case ast.Name(x):
63+
raise Exception(f"unknown name {x}")
64+
case ast.Constant(Ellipsis):
65+
raise Exception("no ellipsis")
66+
case _other:
67+
raise Exception(f"unknown annotation: {t}")
68+
69+
def translate_prompt(self, name: str, args: List[ast.arg], returns, description: str):
70+
self.type = [[arg.annotation for arg in args], returns]
71+
elm_name = self.translate_identifier(name)
72+
comment = "-- " + re.sub(r"\n(\s*)", "\n-- ", description.strip()) + "\n"
73+
try:
74+
arg_types = [self.translate_type(arg.annotation) for arg in args]
75+
ret_type = self.translate_type(returns)
76+
except Exception as e:
77+
print(e)
78+
return None
79+
type_parts = arg_types + [ret_type]
80+
type_sig = elm_name + " : " + " -> ".join(type_parts)
81+
arg_names = [arg.arg for arg in args]
82+
func_decl = elm_name + " " + " ".join(arg_names) + " ="
83+
imports = "import Platform\n"
84+
all_types = " ".join(type_parts)
85+
if "Dict.Dict" in all_types:
86+
imports += "import Dict\n"
87+
return f"module Main exposing (..)\n\n{imports}\n{comment}{type_sig}\n{func_decl}\n"
88+
89+
def test_suite_prefix_lines(self, entry_point) -> List[str]:
90+
return [
91+
"",
92+
"assert : Bool -> ()",
93+
"assert b = if b then () else Debug.todo \"assertion failed\"",
94+
"",
95+
"main : Program () () ()",
96+
"main =",
97+
" Platform.worker",
98+
" { init = \\_ ->",
99+
f" let",
100+
f" candidate = {self.translate_identifier(entry_point)}",
101+
]
102+
103+
def test_suite_suffix_lines(self) -> List[str]:
104+
return [
105+
" in",
106+
" ((), Cmd.none)",
107+
" , update = \\_ _ -> ((), Cmd.none)",
108+
" , subscriptions = \\_ -> Sub.none",
109+
" }",
110+
]
111+
112+
def deep_equality(self, left: str, right: str) -> str:
113+
return f" _ = assert ({left} == {right})"
114+
115+
def gen_literal(self, c: bool | str | int | float | None):
116+
if type(c) == bool:
117+
return str(c)
118+
if type(c) == str:
119+
escaped = c.replace("\\", "\\\\").replace('"', '\\"').replace("\n", "\\n")
120+
return f'"{escaped}"'
121+
if c is None:
122+
return "Nothing"
123+
if type(c) == int:
124+
if c < 0:
125+
return f"({repr(c)})"
126+
return repr(c)
127+
if type(c) == float:
128+
return repr(c)
129+
return repr(c)
130+
131+
def gen_var(self, v: str):
132+
return self.translate_identifier(v)
133+
134+
def gen_list(self, l: List[str]):
135+
return "[" + ", ".join(l) + "]"
136+
137+
def gen_tuple(self, t: List[str]):
138+
return "(" + ", ".join(t) + ")"
139+
140+
def gen_dict(self, keys: List[str], values: List[str]):
141+
pairs = ", ".join(f"({k}, {v})" for k, v in zip(keys, values))
142+
return f"Dict.fromList [{pairs}]"
143+
144+
def gen_call(self, func: str, args: List[str]):
145+
if func == "candidate":
146+
args = [self._coerce(arg, self.type[0][i]) for i, arg in enumerate(args)]
147+
return "(" + func + " " + " ".join(args) + ")"
148+
149+
def _coerce(self, expr: str, ann) -> str:
150+
match expr, ann:
151+
case expr, ast.Subscript(ast.Name("Optional"), _):
152+
if expr == "Nothing":
153+
return expr
154+
return f"(Just {expr})"
155+
case expr, ast.Name("float") | "float" if "." not in expr and expr not in ("Nothing",):
156+
return f"(toFloat {expr})"
157+
case _:
158+
return expr
159+
160+
def finalize(self, result, context) -> str:
161+
match context:
162+
case "lhs":
163+
return result
164+
case "rhs":
165+
return self._coerce(result, self.type[1])
166+
case _other:
167+
raise Exception("bad context to finalize")

dataset_builder/terms.csv

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,3 +29,4 @@ Dart,dart,list,list,record,map,null,true,false
2929
Hy,hy,list,list,tuple,dictionary,None,True,False
3030
Zig,zig,slice,slice,tuple,hash map,null,true,false
3131
Tcl,tcl,list,list,list,dictionary,empty string,true,false
32+
Elm,elm,list,list,tuple,dictionary,Nothing,True,False

evaluation/Elm.dockerfile

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
FROM ubuntu:22.04
2+
ARG DEBIAN_FRONTEND=noninteractive
3+
ENV TZ=Etc/UTC
4+
5+
RUN apt-get update -yqq && apt-get install -yqq python3-tqdm curl
6+
7+
# Node.js (required for Elm)
8+
RUN curl -fsSL https://deb.nodesource.com/setup_current.x | bash - && \
9+
apt-get install -y nodejs
10+
11+
# Elm
12+
RUN npm install -g elm
13+
WORKDIR /etc/elm
14+
RUN echo y | elm init > /dev/null
15+
RUN printf 'module Main exposing (..)\nimport Html\nmain = Html.text ""\n' > src/Main.elm
16+
RUN elm make src/Main.elm --output=/dev/null
17+
18+
COPY src /code
19+
WORKDIR /code
20+
ENTRYPOINT ["python3", "main.py"]

evaluation/Makefile

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,12 @@
11
DOCKER_EXEC=podman
22

3-
build: Dockerfile Adb.dockerfile Hy.dockerfile Zig.dockerfile Tcl.dockerfile
3+
build: Dockerfile Adb.dockerfile Hy.dockerfile Zig.dockerfile Tcl.dockerfile Elm.dockerfile
44
${DOCKER_EXEC} build -t multipl-e-evaluation .
55
${DOCKER_EXEC} build -t ghcr.io/nuprl/multipl-e:adb -f Adb.dockerfile .
66
${DOCKER_EXEC} build -t ghcr.io/nuprl/multipl-e:hy -f Hy.dockerfile .
77
${DOCKER_EXEC} build -t ghcr.io/nuprl/multipl-e:zig -f Zig.dockerfile .
88
${DOCKER_EXEC} build -t ghcr.io/nuprl/multipl-e:tcl -f Tcl.dockerfile .
9+
${DOCKER_EXEC} build -t ghcr.io/nuprl/multipl-e:elm -f Elm.dockerfile .
910

1011
test: build
1112
${DOCKER_EXEC} run --rm \

evaluation/src/containerized_eval.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@
3232
import eval_hy
3333
import eval_zig
3434
import eval_tcl
35+
import eval_elm
3536
import tempfile
3637

3738

@@ -71,6 +72,7 @@
7172
"hy": (eval_hy.eval_script, ".hy"),
7273
"zig": (eval_zig.eval_script, ".zig"),
7374
"tcl": (eval_tcl.eval_script, ".tcl"),
75+
"elm": (eval_elm.eval_script, ".elm"),
7476
}
7577

7678
def eval_string_script(language, program):

evaluation/src/eval_elm.py

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
import shutil
2+
import tempfile
3+
from pathlib import Path
4+
from safe_subprocess import run
5+
6+
def eval_script(path: Path):
7+
with tempfile.TemporaryDirectory() as tmpdir:
8+
project_dir = Path(tmpdir) / "elm"
9+
shutil.copytree("/etc/elm", str(project_dir))
10+
11+
shutil.copy(str(path), str(project_dir / "src" / "Main.elm"))
12+
13+
output_js = project_dir / "main.js"
14+
r = run(
15+
["elm", "make", "src/Main.elm", f"--output={output_js}"],
16+
cwd=str(project_dir),
17+
timeout_seconds=30,
18+
)
19+
20+
if r.exit_code != 0:
21+
return {
22+
"status": "SyntaxError",
23+
"exit_code": r.exit_code,
24+
"stdout": r.stdout,
25+
"stderr": r.stderr,
26+
}
27+
28+
r = run(
29+
["node", "-e", "require('./main.js').Elm.Main.init()"],
30+
cwd=str(project_dir),
31+
timeout_seconds=15,
32+
)
33+
34+
if r.timeout:
35+
status = "Timeout"
36+
elif r.exit_code == 0:
37+
status = "OK"
38+
else:
39+
status = "Exception"
40+
41+
return {
42+
"status": status,
43+
"exit_code": r.exit_code,
44+
"stdout": r.stdout,
45+
"stderr": r.stderr,
46+
}

0 commit comments

Comments
 (0)