Skip to content

Commit 7959de2

Browse files
authored
Merge pull request #24 from AgentOps-AI/23-update-pricing-to-decimals
23 update pricing to decimals
2 parents 9316192 + b22a625 commit 7959de2

11 files changed

Lines changed: 1655 additions & 783 deletions

MANIFEST.in

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
include model_prices.yaml
1+
include model_prices.json

README.md

Lines changed: 9 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -25,8 +25,7 @@ prompt_cost = calculate_prompt_cost(prompt, model)
2525
completion_cost = calculate_completion_cost(completion, model)
2626

2727
print(f"{prompt_cost} + {completion_cost} = {prompt_cost + completion_cost}")
28-
# 135 + 140 = 275 ($0.0000275)
29-
# Priced in TPUs (token price units), which is 1/100,000,000th of a USD.
28+
# 0.0000135 + 0.000014 = 0.0000275
3029
```
3130

3231
## Installation
@@ -58,23 +57,19 @@ completion = chat_completion.choices[0].message.content
5857
prompt_cost = calculate_prompt_cost(prompt, model)
5958
completion_cost = calculate_completion_cost(completion, model)
6059
print(f"{prompt_cost} + {completion_cost} = {prompt_cost + completion_cost}")
61-
# 1800 + 1000 = 2800 ($0.0000280)
62-
63-
from tokencost import USD_PER_TPU
64-
print(f"Cost USD: ${(prompt_cost + completion_cost)/USD_PER_TPU}")
65-
# $2.8e-05
60+
# 0.0000180 + 0.000010 = 0.0000280
6661
```
6762

6863
**Calculating cost using string prompts instead of messages:**
6964
```python
70-
from tokencost import calculate_prompt_cost, USD_PER_TPU
65+
from tokencost import calculate_prompt_cost
7166

7267
prompt_string = "Hello world"
7368
response = "How may I assist you today?"
7469
model= "gpt-3.5-turbo"
7570

7671
prompt_cost = calculate_prompt_cost(prompt_string, model)
77-
print(f"Cost: ${prompt_cost/USD_PER_TPU}")
72+
print(f"Cost: ${prompt_cost}")
7873
# Cost: $3e-06
7974
```
8075

@@ -95,7 +90,11 @@ print(count_string_tokens(prompt="Hello world", model="gpt-3.5-turbo"))
9590
```
9691

9792
## Cost table
98-
Units denominated in TPUs (Token Price Units = 1/10,000,000 USD). All prices can be located in `model_prices.yaml`.
93+
Units denominated in USD. All prices can be located in `model_prices.json`.
94+
95+
96+
* Prices last updated Jan 30, 2024 from: https://openai.com/pricing and https://github.com/BerriAI/litellm/blob/main/model_prices_and_context_window.json
97+
9998

10099
| Model Name | Prompt Cost (USD) | Completion Cost (USD) | Max Prompt Tokens |
101100
| --- | --- | --- | --- |

pyproject.toml

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,11 +6,11 @@ build-backend = "setuptools.build_meta"
66
include-package-data = true
77

88
[tool.setuptools.package-data]
9-
tokencost = ["model_prices.yaml"]
9+
tokencost = ["model_prices.json"]
1010

1111
[project]
1212
name = "tokencost"
13-
version = "0.0.6"
13+
version = "0.1.0"
1414
authors = [
1515
{ name = "Trisha Pan", email = "trishaepan@gmail.com" },
1616
{ name = "Alex Reibman", email = "areibman@gmail.com" },
@@ -24,8 +24,7 @@ classifiers = [
2424
"Operating System :: OS Independent",
2525
]
2626
dependencies = [
27-
"tiktoken>=0.5.2",
28-
"pyyaml>=6.0.1"
27+
"tiktoken>=0.5.2"
2928
]
3029

3130
[project.optional-dependencies]

tests/test_costs.py

Lines changed: 30 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
# -*- coding: utf-8 -*-
33

44
import pytest
5+
from decimal import Decimal
56
from tokencost.costs import (
67
count_message_tokens,
78
count_string_tokens,
@@ -129,21 +130,21 @@ def test_count_string_invalid_model():
129130
@pytest.mark.parametrize(
130131
"prompt,model,expected_output",
131132
[
132-
(MESSAGES, "gpt-3.5-turbo", 2250),
133-
(MESSAGES, "gpt-3.5-turbo-0301", 2550),
134-
(MESSAGES, "gpt-3.5-turbo-0613", 2250),
135-
(MESSAGES, "gpt-3.5-turbo-16k", 4500),
136-
(MESSAGES, "gpt-3.5-turbo-16k-0613", 4500),
137-
(MESSAGES, "gpt-3.5-turbo-1106", 750),
138-
(MESSAGES, "gpt-3.5-turbo-instruct", 2250),
139-
(MESSAGES, "gpt-4", 45000),
140-
(MESSAGES, "gpt-4-0314", 45000),
141-
(MESSAGES, "gpt-4-32k", 90000),
142-
(MESSAGES, "gpt-4-32k-0314", 90000),
143-
(MESSAGES, "gpt-4-0613", 45000),
144-
(MESSAGES, "gpt-4-1106-preview", 15000),
145-
(MESSAGES, "gpt-4-vision-preview", 15000),
146-
(STRING, "text-embedding-ada-002", 40),
133+
(MESSAGES, "gpt-3.5-turbo", Decimal('0.0000225')),
134+
(MESSAGES, "gpt-3.5-turbo-0301", Decimal('0.0000255')),
135+
(MESSAGES, "gpt-3.5-turbo-0613", Decimal('0.0000225')),
136+
(MESSAGES, "gpt-3.5-turbo-16k", Decimal('0.000045')),
137+
(MESSAGES, "gpt-3.5-turbo-16k-0613", Decimal('0.000045')),
138+
(MESSAGES, "gpt-3.5-turbo-1106", Decimal('0.000015')),
139+
(MESSAGES, "gpt-3.5-turbo-instruct", Decimal('0.0000225')),
140+
(MESSAGES, "gpt-4", Decimal('0.00045')),
141+
(MESSAGES, "gpt-4-0314", Decimal('0.00045')),
142+
(MESSAGES, "gpt-4-32k", Decimal('0.00090')),
143+
(MESSAGES, "gpt-4-32k-0314", Decimal('0.00090')),
144+
(MESSAGES, "gpt-4-0613", Decimal('0.00045')),
145+
(MESSAGES, "gpt-4-1106-preview", Decimal('0.00015')),
146+
(MESSAGES, "gpt-4-vision-preview", Decimal('0.00015')),
147+
(STRING, "text-embedding-ada-002", Decimal('0.0000004')),
147148
],
148149
)
149150
def test_calculate_prompt_cost(prompt, model, expected_output):
@@ -163,20 +164,20 @@ def test_invalid_prompt_format():
163164
@pytest.mark.parametrize(
164165
"prompt,model,expected_output",
165166
[
166-
(STRING, "gpt-3.5-turbo", 800),
167-
(STRING, "gpt-3.5-turbo-0301", 800),
168-
(STRING, "gpt-3.5-turbo-0613", 800),
169-
(STRING, "gpt-3.5-turbo-16k", 1600),
170-
(STRING, "gpt-3.5-turbo-16k-0613", 1600),
171-
(STRING, "gpt-3.5-turbo-1106", 600),
172-
(STRING, "gpt-3.5-turbo-instruct", 800),
173-
(STRING, "gpt-4", 24000),
174-
(STRING, "gpt-4-0314", 24000),
175-
(STRING, "gpt-4-32k", 48000),
176-
(STRING, "gpt-4-32k-0314", 48000),
177-
(STRING, "gpt-4-0613", 24000),
178-
(STRING, "gpt-4-1106-preview", 12000),
179-
(STRING, "gpt-4-vision-preview", 12000),
167+
(STRING, "gpt-3.5-turbo", Decimal('0.000008')),
168+
(STRING, "gpt-3.5-turbo-0301", Decimal('0.000008')),
169+
(STRING, "gpt-3.5-turbo-0613", Decimal('0.000008')),
170+
(STRING, "gpt-3.5-turbo-16k", Decimal('0.000016')),
171+
(STRING, "gpt-3.5-turbo-16k-0613", Decimal('0.000016')),
172+
(STRING, "gpt-3.5-turbo-1106", Decimal('0.000008')),
173+
(STRING, "gpt-3.5-turbo-instruct", Decimal('0.000008')),
174+
(STRING, "gpt-4", Decimal('0.00024')),
175+
(STRING, "gpt-4-0314", Decimal('0.00024')),
176+
(STRING, "gpt-4-32k", Decimal('0.00048')),
177+
(STRING, "gpt-4-32k-0314", Decimal('0.00048')),
178+
(STRING, "gpt-4-0613", Decimal('0.00024')),
179+
(STRING, "gpt-4-1106-preview", Decimal('0.00012')),
180+
(STRING, "gpt-4-vision-preview", Decimal('0.00012')),
180181
(STRING, "text-embedding-ada-002", 0),
181182
],
182183
)

tests/test_llama_index_callbacks.py

Lines changed: 8 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -5,20 +5,12 @@
55
from unittest.mock import MagicMock
66

77
# Mock the calculate_prompt_cost and calculate_completion_cost functions
8-
# and the USD_PER_TPU constant
98

9+
# 4 tokens
1010
STRING = "Hello, world!"
1111

1212

13-
@pytest.fixture
14-
def mock_tokencost(monkeypatch):
15-
monkeypatch.setattr('tokencost.calculate_prompt_cost', MagicMock(return_value=100))
16-
monkeypatch.setattr('tokencost.calculate_completion_cost', MagicMock(return_value=200))
17-
monkeypatch.setattr('tokencost.USD_PER_TPU', 10)
18-
19-
# Mock the ChatMessage class
20-
21-
13+
# Mock the ChatMessage class in LlamaIndex
2214
@pytest.fixture
2315
def mock_chat_message(monkeypatch):
2416
class MockChatMessage:
@@ -34,30 +26,30 @@ def __str__(self):
3426
# Test the _calc_llm_event_cost method for prompt and completion
3527

3628

37-
def test_calc_llm_event_cost_prompt_completion(mock_tokencost, capsys):
29+
def test_calc_llm_event_cost_prompt_completion(capsys):
3830
handler = llama_index.TokenCostHandler(model='gpt-3.5-turbo')
3931
payload = {
4032
EventPayload.PROMPT: STRING,
4133
EventPayload.COMPLETION: STRING
4234
}
4335
handler._calc_llm_event_cost(payload)
4436
captured = capsys.readouterr()
45-
assert "# Prompt cost: 6e-06" in captured.out
46-
assert "# Completion: 8e-06" in captured.out
37+
assert "# Prompt cost: 0.0000060" in captured.out
38+
assert "# Completion: 0.000008" in captured.out
4739

4840
# Test the _calc_llm_event_cost method for messages and response
4941

5042

51-
def test_calc_llm_event_cost_messages_response(mock_tokencost, mock_chat_message, capsys):
43+
def test_calc_llm_event_cost_messages_response(mock_chat_message, capsys):
5244
handler = llama_index.TokenCostHandler(model='gpt-3.5-turbo')
5345
payload = {
5446
EventPayload.MESSAGES: [mock_chat_message("message 1"), mock_chat_message("message 2")],
5547
EventPayload.RESPONSE: "test response"
5648
}
5749
handler._calc_llm_event_cost(payload)
5850
captured = capsys.readouterr()
59-
assert "# Prompt cost: 1.05e-05" in captured.out
60-
assert "# Completion: 4e-06" in captured.out
51+
assert "# Prompt cost: 0.0000105" in captured.out
52+
assert "# Completion: 0.000004" in captured.out
6153

6254
# Additional tests can be written for start_trace, end_trace, on_event_start, and on_event_end
6355
# depending on the specific logic and requirements of those methods.

tokencost/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,4 +4,4 @@
44
calculate_completion_cost,
55
calculate_prompt_cost,
66
)
7-
from .constants import TOKEN_COSTS, USD_PER_TPU
7+
from .constants import TOKEN_COSTS

tokencost/callbacks/llama_index.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
from typing import Any, Dict, List, Optional, cast
22
from llama_index.callbacks.base_handler import BaseCallbackHandler
33
from llama_index.callbacks.schema import CBEventType, EventPayload
4-
from tokencost import calculate_prompt_cost, calculate_completion_cost, USD_PER_TPU
4+
from tokencost import calculate_prompt_cost, calculate_completion_cost
55

66

77
class TokenCostHandler(BaseCallbackHandler):
@@ -29,15 +29,15 @@ def _calc_llm_event_cost(self, payload: dict) -> None:
2929
if EventPayload.PROMPT in payload:
3030
prompt = str(payload.get(EventPayload.PROMPT))
3131
completion = str(payload.get(EventPayload.COMPLETION))
32-
prompt_cost = calculate_prompt_cost(prompt, self.model) / USD_PER_TPU
33-
completion_cost = calculate_completion_cost(completion, self.model) / USD_PER_TPU
32+
prompt_cost = calculate_prompt_cost(prompt, self.model)
33+
completion_cost = calculate_completion_cost(completion, self.model)
3434

3535
elif EventPayload.MESSAGES in payload:
3636
messages = cast(List[ChatMessage], payload.get(EventPayload.MESSAGES, []))
3737
messages_str = "\n".join([str(x) for x in messages])
38-
prompt_cost = calculate_prompt_cost(messages_str, self.model) / USD_PER_TPU
38+
prompt_cost = calculate_prompt_cost(messages_str, self.model)
3939
response = str(payload.get(EventPayload.RESPONSE))
40-
completion_cost = calculate_completion_cost(response, self.model) / USD_PER_TPU
40+
completion_cost = calculate_completion_cost(response, self.model)
4141

4242
print(f"# Prompt cost: {prompt_cost}")
4343
print(f"# Completion: {completion_cost}")

tokencost/constants.py

Lines changed: 18 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
import os
2-
import yaml
2+
import json
3+
from urllib.request import urlopen
4+
35
"""
46
Prompt (aka context) tokens are based on number of words + other chars (eg spaces and punctuation) in input.
57
Completion tokens are similarly based on how long chatGPT's response is.
@@ -11,16 +13,23 @@
1113
1214
Note: When asking follow-up questions, everything above and including your follow-up question
1315
is considered a prompt (for the purpose of context) and will thus cost prompt tokens.
14-
15-
1 Token Price Unit (TPU) is defined as 1/100,000,000 of $1 (USD). 1,000,000 TPUs would equate to $0.01.
1616
"""
1717

18-
USD_PER_TPU = 100_000_000
19-
2018
# How to read TOKEN_COSTS:
21-
# Each prompt token costs __ TPUs per token.
22-
# Each completion token costs __ TPUs per token.
19+
# Each prompt token costs __ USD per token.
20+
# Each completion token costs __ USD per token.
2321
# Max prompt limit of each model is __ tokens.
2422

25-
with open(os.path.join(os.path.dirname(__file__), "model_prices.yaml"), "r") as f:
26-
TOKEN_COSTS = yaml.safe_load(f)
23+
# Fetch the latest prices using urllib.request
24+
PRICES_URL = "https://raw.githubusercontent.com/BerriAI/litellm/main/model_prices_and_context_window.json"
25+
26+
try:
27+
with urlopen(PRICES_URL) as response:
28+
if response.status == 200:
29+
TOKEN_COSTS = json.loads(response.read())
30+
else:
31+
raise Exception("Failed to fetch token costs, status code: {}".format(response.status))
32+
except Exception:
33+
# If fetching fails, use the local model_prices.json as a fallback
34+
with open(os.path.join(os.path.dirname(__file__), "model_prices.json"), "r") as f:
35+
TOKEN_COSTS = json.load(f)

tokencost/costs.py

Lines changed: 14 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
import tiktoken
55
from typing import Union, List, Dict
66
from .constants import TOKEN_COSTS
7+
from decimal import Decimal
78

89

910
# TODO: Add Claude support
@@ -90,27 +91,26 @@ def count_string_tokens(prompt: str, model: str) -> int:
9091
return len(encoding.encode(prompt))
9192

9293

93-
def calculate_prompt_cost(prompt: Union[List[dict], str], model: str) -> int:
94+
def calculate_prompt_cost(prompt: Union[List[dict], str], model: str) -> Decimal:
9495
"""
95-
Calculate the prompt's cost in token price units (TPU). 1 TPU = $1/10,000,000.
96-
e.g. 100,000 TPUs = $0.01.
96+
Calculate the prompt's cost in USD.
9797
9898
Args:
9999
prompt (Union[List[dict], str]): List of message objects or single string prompt.
100100
model (str): The model name.
101101
102102
Returns:
103-
int: The calculated cost in TPUs.
103+
Decimal: The calculated cost in USD.
104104
105105
e.g.:
106106
>>> prompt = [{ "role": "user", "content": "Hello world"},
107107
{ "role": "assistant", "content": "How may I assist you today?"}]
108108
>>>calculate_prompt_cost(prompt, "gpt-3.5-turbo")
109-
300
109+
Decimal('0.0000300')
110110
# or
111111
>>> prompt = "Hello world"
112112
>>> calculate_prompt_cost(prompt, "gpt-3.5-turbo")
113-
30
113+
Decimal('0.0000030')
114114
"""
115115
model = model.lower()
116116
if model not in TOKEN_COSTS:
@@ -129,34 +129,32 @@ def calculate_prompt_cost(prompt: Union[List[dict], str], model: str) -> int:
129129
if isinstance(prompt, str)
130130
else count_message_tokens(prompt, model)
131131
)
132-
prompt_cost = TOKEN_COSTS[model]["prompt"]
132+
prompt_cost = TOKEN_COSTS[model]["input_cost_per_token"]
133+
return Decimal(str(prompt_cost)) * Decimal(prompt_tokens)
133134

134-
return prompt_cost * prompt_tokens
135135

136-
137-
def calculate_completion_cost(completion: str, model: str) -> int:
136+
def calculate_completion_cost(completion: str, model: str) -> Decimal:
138137
"""
139-
Calculate the prompt's cost in token price units (TPU). 1 TPU = $1/10,000,000.
140-
e.g. 100,000 TPUs = $0.01.
138+
Calculate the prompt's cost in USD.
141139
142140
Args:
143141
completion (str): Completion string.
144142
model (str): The model name.
145143
146144
Returns:
147-
int: The calculated cost in TPUs.
145+
Decimal: The calculated cost in USD.
148146
149147
e.g.:
150148
>>> completion = "How may I assist you today?"
151149
>>> calculate_completion_cost(completion, "gpt-3.5-turbo")
152-
140
150+
Decimal('0.000014')
153151
"""
154152
if model not in TOKEN_COSTS:
155153
raise KeyError(
156154
f"""Model {model} is not implemented.
157155
Double-check your spelling, or submit an issue/PR"""
158156
)
159157
completion_tokens = count_string_tokens(completion, model)
160-
completion_cost = TOKEN_COSTS[model]["completion"]
158+
completion_cost = TOKEN_COSTS[model]["output_cost_per_token"]
161159

162-
return completion_cost * completion_tokens
160+
return Decimal(str(completion_cost)) * Decimal(completion_tokens)

0 commit comments

Comments
 (0)