Skip to content

09 token count

Token Count

from langchain_community.callbacks import get_openai_callback

with get_openai_callback() as cb:
    result = llm("Tell me a joke")
    print(cb)
Tokens Used: 6659
        Prompt Tokens: 6321
        Completion Tokens: 338
Successful Requests: 4
Total Cost (USD): $0.13318

↑ これはstreamingには対応していない (https://github.com/langchain-ai/langchain/issues/4583) ので、Streamingで使う場合には、

from typing import Any, Dict, List

import tiktoken
from langchain.callbacks.base import BaseCallbackHandler
from langchain.callbacks.openai_info import (
    MODEL_COST_PER_1K_TOKENS,
    get_openai_token_cost_for_model,
)
from langchain.schema import LLMResult


class Cost:
    def __init__(
        self, total_cost=0, total_tokens=0, prompt_tokens=0, completion_tokens=0
    ):
        self.total_cost = total_cost
        self.total_tokens = total_tokens
        self.prompt_tokens = prompt_tokens
        self.completion_tokens = completion_tokens

    def __str__(self):
        return (
            f"Tokens Used: {self.total_tokens}\n"
            f"\tPrompt Tokens: {self.prompt_tokens}\n"
            f"\tCompletion Tokens: {self.completion_tokens}\n"
            f"Total Cost (USD): ${self.total_cost}"
        )

    def add_prompt_tokens(self, prompt_tokens):
        self.prompt_tokens += prompt_tokens
        self.total_tokens += prompt_tokens

    def add_completion_tokens(self, completion_tokens):
        self.completion_tokens += completion_tokens
        self.total_tokens += completion_tokens


class CostCalcCallbackHandler(BaseCallbackHandler):
    def __init__(self, model_name, cost, *args, **kwargs):
        self.model_name = model_name
        self.encoding = tiktoken.encoding_for_model(model_name)
        self.cost = cost
        super().__init__(*args, **kwargs)

    def on_llm_start(
        self, serialized: Dict[str, Any], prompts: List[str], **kwargs: Any
    ) -> None:
        for prompt in prompts:
            self.cost.add_prompt_tokens(len(self.encoding.encode(prompt)))

    def on_llm_end(self, response: LLMResult, **kwargs: Any) -> None:
        for generation_list in response.generations:
            for generation in generation_list:
                self.cost.add_completion_tokens(
                    len(self.encoding.encode(generation.text))
                )
        if self.model_name in MODEL_COST_PER_1K_TOKENS:
            prompt_cost = get_openai_token_cost_for_model(
                self.model_name, self.cost.prompt_tokens
            )
            completion_cost = get_openai_token_cost_for_model(
                self.model_name, self.cost.completion_tokens, is_completion=True
            )
            self.cost.total_cost = prompt_cost + completion_cost

を定義して、以下のように使うことができる。

cost = Cost()
qa = ConversationalRetrievalChain.from_llm(
    ChatOpenAI(
        model_name=MODEL_NAME,
        streaming=True,
        callbacks=[handler, CostCalcCallbackHandler(MODEL_NAME, cost)],
    ),
    retriever=retriever,
    return_source_documents=True,
)

# アシスタントのメッセージを表示
result = qa(
    {"question": user_msg, "chat_history": st.session_state.chat_history}
)

print(result)
st.session_state[SESSION_KEY_FOR_COST].append(
    {"usage": "normal", "amount": cost.total_cost}
)