Files
2025-09-23 20:24:15 +08:00

168 lines
6.8 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
import re
from contextlib import suppress
from typing import Mapping, Optional, Union, Generator
from dify_plugin.entities.model import (
AIModelEntity,
DefaultParameterName,
I18nObject,
ModelFeature,
ParameterRule,
ParameterType,
)
from dify_plugin.entities.model.llm import LLMResult
from dify_plugin.entities.model.message import (
PromptMessage,
PromptMessageRole,
PromptMessageTool,
SystemPromptMessage,
AssistantPromptMessage,
)
from dify_plugin.interfaces.model.openai_compatible.llm import OAICompatLargeLanguageModel
from typing import List
class OpenAILargeLanguageModel(OAICompatLargeLanguageModel):
# Pre-compiled regex for better performance
_THINK_PATTERN = re.compile(r"^<think>.*?</think>\s*", re.DOTALL)
def get_customizable_model_schema(
self, model: str, credentials: Mapping | dict
) -> AIModelEntity:
entity = super().get_customizable_model_schema(model, credentials)
agent_though_support = credentials.get("agent_though_support", "not_supported")
if agent_though_support == "supported":
try:
entity.features.index(ModelFeature.AGENT_THOUGHT)
except ValueError:
entity.features.append(ModelFeature.AGENT_THOUGHT)
structured_output_support = credentials.get("structured_output_support", "not_supported")
if structured_output_support == "supported":
# ----
# The following section should be added after the new version of `dify-plugin-sdks`
# is released.
# Related Commit:
# https://github.com/langgenius/dify-plugin-sdks/commit/0690573a879caf43f92494bf411f45a1835d96f6
# ----
# try:
# entity.features.index(ModelFeature.STRUCTURED_OUTPUT)
# except ValueError:
# entity.features.append(ModelFeature.STRUCTURED_OUTPUT)
entity.parameter_rules.append(
ParameterRule(
name=DefaultParameterName.RESPONSE_FORMAT.value,
label=I18nObject(en_US="Response Format", zh_Hans="回复格式"),
help=I18nObject(
en_US="Specifying the format that the model must output.",
zh_Hans="指定模型必须输出的格式。",
),
type=ParameterType.STRING,
options=["text", "json_object", "json_schema"],
required=False,
)
)
entity.parameter_rules.append(
ParameterRule(
name=DefaultParameterName.JSON_SCHEMA.value,
use_template=DefaultParameterName.JSON_SCHEMA.value,
)
)
if "display_name" in credentials and credentials["display_name"] != "":
entity.label = I18nObject(
en_US=credentials["display_name"], zh_Hans=credentials["display_name"]
)
entity.parameter_rules += [
ParameterRule(
name="enable_thinking",
label=I18nObject(en_US="Thinking mode", zh_Hans="思考模式"),
help=I18nObject(
en_US="Whether to enable thinking mode, applicable to various thinking mode models deployed on reasoning frameworks such as vLLM and SGLang, for example Qwen3.",
zh_Hans="是否开启思考模式适用于vLLM和SGLang等推理框架部署的多种思考模式模型例如Qwen3。",
),
type=ParameterType.BOOLEAN,
required=False,
)
]
return entity
@classmethod
def _drop_analyze_channel(cls, prompt_messages: List[PromptMessage]) -> None:
"""
Remove thinking content from assistant messages for better performance.
Uses early exit and pre-compiled regex to minimize overhead.
Args:
prompt_messages:
Returns:
"""
for p in prompt_messages:
# Early exit conditions
if not isinstance(p, AssistantPromptMessage):
continue
if not isinstance(p.content, str):
continue
# Quick check to avoid regex if not needed
if not p.content.startswith("<think>"):
continue
# Only perform regex substitution when necessary
new_content = cls._THINK_PATTERN.sub("", p.content, count=1)
# Only update if changed
if new_content != p.content:
p.content = new_content
def _invoke(
self,
model: str,
credentials: dict,
prompt_messages: list[PromptMessage],
model_parameters: dict,
tools: Optional[list[PromptMessageTool]] = None,
stop: Optional[list[str]] = None,
stream: bool = True,
user: Optional[str] = None,
) -> Union[LLMResult, Generator]:
# Compatibility adapter for Dify's 'json_schema' structured output mode.
# The base class does not natively handle the 'json_schema' parameter. This block
# translates it into a standard OpenAI-compatible request by:
# 1. Injecting the JSON schema directly into the system prompt to guide the model.
# This ensures models like gpt-4o produce the correct structured output.
if model_parameters.get("response_format") == "json_schema":
# Use .get() instead of .pop() for safety
json_schema_str = model_parameters.get("json_schema")
if json_schema_str:
structured_output_prompt = (
"Your response must be a JSON object that validates against the following JSON schema, and nothing else.\n"
f"JSON Schema: ```json\n{json_schema_str}\n```"
)
existing_system_prompt = next(
(p for p in prompt_messages if p.role == PromptMessageRole.SYSTEM), None
)
if existing_system_prompt:
existing_system_prompt.content = (
structured_output_prompt + "\n\n" + existing_system_prompt.content
)
else:
prompt_messages.insert(0, SystemPromptMessage(content=structured_output_prompt))
enable_thinking = model_parameters.pop("enable_thinking", None)
if enable_thinking is not None:
model_parameters["chat_template_kwargs"] = {"enable_thinking": bool(enable_thinking)}
# Remove thinking content from assistant messages for better performance.
with suppress(Exception):
self._drop_analyze_channel(prompt_messages)
return super()._invoke(
model, credentials, prompt_messages, model_parameters, tools, stop, stream, user
)