import re from contextlib import suppress from typing import Mapping, Optional, Union, Generator from dify_plugin.entities.model import ( AIModelEntity, DefaultParameterName, I18nObject, ModelFeature, ParameterRule, ParameterType, ) from dify_plugin.entities.model.llm import LLMResult from dify_plugin.entities.model.message import ( PromptMessage, PromptMessageRole, PromptMessageTool, SystemPromptMessage, AssistantPromptMessage, ) from dify_plugin.interfaces.model.openai_compatible.llm import OAICompatLargeLanguageModel from typing import List class OpenAILargeLanguageModel(OAICompatLargeLanguageModel): # Pre-compiled regex for better performance _THINK_PATTERN = re.compile(r"^.*?\s*", re.DOTALL) def get_customizable_model_schema( self, model: str, credentials: Mapping | dict ) -> AIModelEntity: entity = super().get_customizable_model_schema(model, credentials) agent_though_support = credentials.get("agent_though_support", "not_supported") if agent_though_support == "supported": try: entity.features.index(ModelFeature.AGENT_THOUGHT) except ValueError: entity.features.append(ModelFeature.AGENT_THOUGHT) structured_output_support = credentials.get("structured_output_support", "not_supported") if structured_output_support == "supported": # ---- # The following section should be added after the new version of `dify-plugin-sdks` # is released. # Related Commit: # https://github.com/langgenius/dify-plugin-sdks/commit/0690573a879caf43f92494bf411f45a1835d96f6 # ---- # try: # entity.features.index(ModelFeature.STRUCTURED_OUTPUT) # except ValueError: # entity.features.append(ModelFeature.STRUCTURED_OUTPUT) entity.parameter_rules.append( ParameterRule( name=DefaultParameterName.RESPONSE_FORMAT.value, label=I18nObject(en_US="Response Format", zh_Hans="回复格式"), help=I18nObject( en_US="Specifying the format that the model must output.", zh_Hans="指定模型必须输出的格式。", ), type=ParameterType.STRING, options=["text", "json_object", "json_schema"], required=False, ) ) entity.parameter_rules.append( ParameterRule( name=DefaultParameterName.JSON_SCHEMA.value, use_template=DefaultParameterName.JSON_SCHEMA.value, ) ) if "display_name" in credentials and credentials["display_name"] != "": entity.label = I18nObject( en_US=credentials["display_name"], zh_Hans=credentials["display_name"] ) entity.parameter_rules += [ ParameterRule( name="enable_thinking", label=I18nObject(en_US="Thinking mode", zh_Hans="思考模式"), help=I18nObject( en_US="Whether to enable thinking mode, applicable to various thinking mode models deployed on reasoning frameworks such as vLLM and SGLang, for example Qwen3.", zh_Hans="是否开启思考模式,适用于vLLM和SGLang等推理框架部署的多种思考模式模型,例如Qwen3。", ), type=ParameterType.BOOLEAN, required=False, ) ] return entity @classmethod def _drop_analyze_channel(cls, prompt_messages: List[PromptMessage]) -> None: """ Remove thinking content from assistant messages for better performance. Uses early exit and pre-compiled regex to minimize overhead. Args: prompt_messages: Returns: """ for p in prompt_messages: # Early exit conditions if not isinstance(p, AssistantPromptMessage): continue if not isinstance(p.content, str): continue # Quick check to avoid regex if not needed if not p.content.startswith(""): continue # Only perform regex substitution when necessary new_content = cls._THINK_PATTERN.sub("", p.content, count=1) # Only update if changed if new_content != p.content: p.content = new_content def _invoke( self, model: str, credentials: dict, prompt_messages: list[PromptMessage], model_parameters: dict, tools: Optional[list[PromptMessageTool]] = None, stop: Optional[list[str]] = None, stream: bool = True, user: Optional[str] = None, ) -> Union[LLMResult, Generator]: # Compatibility adapter for Dify's 'json_schema' structured output mode. # The base class does not natively handle the 'json_schema' parameter. This block # translates it into a standard OpenAI-compatible request by: # 1. Injecting the JSON schema directly into the system prompt to guide the model. # This ensures models like gpt-4o produce the correct structured output. if model_parameters.get("response_format") == "json_schema": # Use .get() instead of .pop() for safety json_schema_str = model_parameters.get("json_schema") if json_schema_str: structured_output_prompt = ( "Your response must be a JSON object that validates against the following JSON schema, and nothing else.\n" f"JSON Schema: ```json\n{json_schema_str}\n```" ) existing_system_prompt = next( (p for p in prompt_messages if p.role == PromptMessageRole.SYSTEM), None ) if existing_system_prompt: existing_system_prompt.content = ( structured_output_prompt + "\n\n" + existing_system_prompt.content ) else: prompt_messages.insert(0, SystemPromptMessage(content=structured_output_prompt)) enable_thinking = model_parameters.pop("enable_thinking", None) if enable_thinking is not None: model_parameters["chat_template_kwargs"] = {"enable_thinking": bool(enable_thinking)} # Remove thinking content from assistant messages for better performance. with suppress(Exception): self._drop_analyze_channel(prompt_messages) return super()._invoke( model, credentials, prompt_messages, model_parameters, tools, stop, stream, user )