generate - 生成模型(图、音、视)
2026年6月16日 上午8:41:40
生成模型(GenerateModel) 与 聊天模型(ChatModel)用途区别很大。GenerateModel 只能一次性生成内容,不能对话。比如:
- 通过文本,生成图片、声音、视频
- 通过图片,生成视频
- 等(只要是一次性生成)
补充:GenerateModel 是替代之前的 ImageModel 而新设计的接口,完全兼容 ImageModel 且概念范围更广(旧接口仍可用)。
1、构建生成模型
添加配置
solon.ai.generate:
demo:
apiUrl: "https://api.moark.com/v1/images/generations" # 使用完整地址(而不是 api_base)
model: "stable-diffusion-3.5-large-turbo"
构建并测试
import org.noear.solon.ai.generate.GenerateConfig;
import org.noear.solon.ai.generate.GenerateModel;
import org.noear.solon.ai.generate.GenerateResponse;
import org.noear.solon.annotation.Bean;
import org.noear.solon.annotation.Configuration;
import org.noear.solon.annotation.Inject;
import java.io.IOException;
@Configuration
public class DemoConfig {
@Bean
public GenerateModel build(@Inject("${solon.ai.generate}") GenerateConfig config) {
return GenerateModel.of(config).build();
}
@Bean
public void test(GenerateModel generateModel) throws IOException {
//一次性返回
GenerateResponse resp = generateModel.prompt("一只白色的小花猫").call();
//打印消息
System.out.println(resp.getContent().getUrl());
}
}
2、使用选项
generateModel.prompt("一只白色的小花猫")
.options(o -> o.size("1024x1024"))
.call();
generateModel.prompt("一只白色的小花猫")
.options(o -> {
o.optionSet("negative_prompt", "");
o.optionSet("sampler_name", "Euler");
o.optionSet("scheduler", "Simple");
o.optionSet("steps", 25);
o.optionSet("width", 512);
o.optionSet("height", 768);
o.optionSet("batch_size", 1);
o.optionSet("cfg_scale", 1);
o.optionSet("distilled_cfg_scale", 3.5);
o.optionSet("seed", -1);
o.optionSet("n_iter", 1);
})
.call();
3、方言适配
生成模型(GenerateModel)同样支持方言适配。框架已内置 OllamaGenerateDialect、DashscopeGenerateDialect、OpenaiGenerateDialect(默认) 三种方言(基本够用),自动支持 Ollama 提供的模型接口、Dashscope 提供的模型接口及 Openai 规范的模型接口。
也可以通过定制,实现更多的模型兼容。方言接口:
package org.noear.solon.ai.generate.dialect;
import org.noear.solon.ai.AiModelDialect;
import org.noear.solon.ai.generate.GenerateConfig;
import org.noear.solon.ai.generate.GenerateOptions;
import org.noear.solon.ai.generate.GenerateResponse;
import org.noear.solon.lang.Preview;
import java.util.Map;
/**
* 生成模型方言
*/
public interface GenerateDialect extends AiModelDialect {
/**
* 是否为默认
*/
default boolean isDefault() {
return false;
}
/**
* 匹配检测
*
* @param config 聊天配置
*/
boolean matched(GenerateConfig config);
/**
* 构建请求数据
*
* @param config 聊天配置
* @param options 聊天选项
* @param promptStr 提示语文本形态
* @param promptMap 提示语字典形态
*/
String buildRequestJson(GenerateConfig config, GenerateOptions options, String promptStr, Map promptMap);
/**
* 分析响应数据
*
* @param config 聊天配置
* @param respJson 响应数据
*/
GenerateResponse parseResponseJson(GenerateConfig config, String respJson);
}
OllamaGenerateDialect 适配参考:
package org.noear.solon.ai.llm.dialect.ollama;
import org.noear.snack4.Feature;
import org.noear.snack4.ONode;
import org.noear.snack4.Options;
import org.noear.snack4.json.JsonReader;
import org.noear.solon.Utils;
import org.noear.solon.ai.chat.content.ContentBlock;
import org.noear.solon.ai.AiUsage;
import org.noear.solon.ai.chat.content.AudioBlock;
import org.noear.solon.ai.chat.ChatChoice;
import org.noear.solon.ai.chat.ChatConfig;
import org.noear.solon.ai.chat.ChatException;
import org.noear.solon.ai.chat.ChatResponseDefault;
import org.noear.solon.ai.chat.dialect.AbstractChatDialect;
import org.noear.solon.ai.chat.message.AssistantMessage;
import org.noear.solon.ai.chat.message.UserMessage;
import org.noear.solon.ai.chat.tool.ToolCall;
import org.noear.solon.ai.chat.tool.ToolCallBuilder;
import org.noear.solon.ai.chat.content.ImageBlock;
import org.noear.solon.ai.chat.content.VideoBlock;
import org.noear.solon.core.util.Assert;
import org.noear.solon.core.util.DateUtil;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.Date;
import java.util.List;
import java.util.Map;
/**
* Ollama 聊天模型方言
*/
public class OllamaChatDialect extends AbstractChatDialect {
private static final Logger LOG = LoggerFactory.getLogger(OllamaChatDialect.class);
private static OllamaChatDialect instance = new OllamaChatDialect();
public static OllamaChatDialect getInstance() {
return instance;
}
/**
* 匹配检测
*
* @param config 聊天配置
*/
@Override
public boolean matched(ChatConfig config) {
String standard = config.getStandardOrProvider();
return "ollama".equalsIgnoreCase(standard) ||
(Assert.isEmpty(standard) && config.getApiUrl().endsWith("/api/chat"));
}
@Override
protected String getApiUrl(ChatConfig config) {
//处理后缀#
int index = config.getApiUrl().indexOf('#');
if (index > 0) {
return config.getApiUrl().substring(0, index);
}
//自动补全地址
if (config.getApiUrl().endsWith("/api/chat")) {
return config.getApiUrl();
} else {
if (config.getApiUrl().endsWith("/")) {
return config.getApiUrl() + "api/chat";
} else {
return config.getApiUrl() + "/api/chat";
}
}
}
@Override
protected void buildUserMessageNodeDo(ChatConfig config, ONode oNode, UserMessage msg) {
oNode.set("role", msg.getRole().name().toLowerCase());
if (msg.isMultiModal() == false) {
//单模态
oNode.set("content", msg.getContent());
} else {
//多模态
oNode.set("content", msg.getContent());
for (ContentBlock block1 : msg.getBlocks()) {
if (block1 instanceof ImageBlock) {
oNode.getOrNew("images").add(block1.toDataString(false));
} else if (block1 instanceof AudioBlock) {
oNode.getOrNew("audios").add(block1.toDataString(false));
} else if (block1 instanceof VideoBlock) {
oNode.getOrNew("videos").add(block1.toDataString(false));
}
}
}
}
@Override
public ONode buildAssistantToolCallMessageNode(ChatResponseDefault resp, Map<String, ToolCallBuilder> toolCallBuilders) {
ONode oNode = new ONode();
oNode.set("role", "assistant");
oNode.set("content", resp.getAggregationContent());
oNode.getOrNew("tool_calls").asArray().then(n1 -> {
for (Map.Entry<String, ToolCallBuilder> kv : toolCallBuilders.entrySet()) {
//有可能没有
n1.addNew().set("id", kv.getValue().idBuilder.toString())
.set("type", "function")
.getOrNew("function").then(n2 -> {
n2.set("name", kv.getValue().nameBuilder.toString());
n2.set("arguments", ONode.ofJson(kv.getValue().argumentsBuilder.toString()));
});
}
});
return oNode;
}
@Override
public boolean parseResponseJson(ChatConfig config, ChatResponseDefault resp, String json) {
//解析
ONode oResp = ONode.ofJson(json);
if (oResp.isObject() == false) {
return false;
}
if (oResp.hasKey("error")) {
resp.setError(new ChatException(oResp.get("error").getString()));
} else {
resp.setModel(oResp.get("model").getString());
resp.setFinished(oResp.get("done").getBoolean());
String done_reason = oResp.get("done_reason").getString();
String createdStr = oResp.get("created_at").getString();
if (createdStr != null) {
createdStr = createdStr.substring(0, createdStr.indexOf(".") + 4);
}
Date created = DateUtil.parseTry(createdStr);
List<AssistantMessage> messageList = parseAssistantMessage(resp, oResp.get("message"));
for (AssistantMessage msg1 : messageList) {
resp.addChoice(new ChatChoice(0, created, done_reason, msg1));
}
if (Utils.isNotEmpty(done_reason)) {
resp.lastFinishReason = done_reason;
}
if (resp.isFinished()) {
long promptTokens = oResp.get("prompt_eval_count").getLong();
long completionTokens = oResp.get("eval_count").getLong();
long totalTokens = promptTokens + completionTokens;
resp.setUsage(new AiUsage(promptTokens, 0L, completionTokens, totalTokens, oResp));
if (resp.hasChoices() == false) {
resp.addChoice(new ChatChoice(0, created, resp.getLastFinishReasonNormalized(), new AssistantMessage("")));
}
}
}
return true;
}
@Override
protected ToolCall parseToolCall(ChatResponseDefault resp, ONode n1) {
String callId = n1.get("id").getString();//可能是空的
ONode n1f = n1.get("function");
String name = n1f.get("name").getString();
ONode n1fArgs = n1f.get("arguments");
String argStr = n1fArgs.getString();
String index = name;
if (n1fArgs.isValue()) {
//有可能是 json string(还可能只是流的中间消息)
if (hasNestedJsonBlock(argStr)) {
JsonReader reader = new JsonReader(argStr, Options.of(Feature.Read_AutoRepair));
n1fArgs = reader.readLast();
if (n1fArgs == null) {
LOG.warn("Parse tool arguments failed: {}", argStr);
}
}
}
Map<String, Object> argMap = null;
if (n1fArgs != null) {
if (n1fArgs.isObject()) {
argMap = n1fArgs.toBean(Map.class);
}
}
return new ToolCall(index, callId, name, argStr, argMap);
}
}