From f9e74ad166b67a7517340aec485bc54ac3016316 Mon Sep 17 00:00:00 2001 From: river Date: Thu, 16 Nov 2023 21:18:47 +0800 Subject: [PATCH 01/12] =?UTF-8?q?chore:=20=E4=BF=AE=E6=94=B9readme?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- readme.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/readme.md b/readme.md index eae9410b..0c0dfb38 100644 --- a/readme.md +++ b/readme.md @@ -94,8 +94,8 @@ ## 🌟 项目特点 -- 🍏 对话基于 OpenAI (https://platform.openai.com/account/api-keys) 接口 -- 🍎 通过 lark,将 ChatGPT 接入[飞书](https://open.feishu.cn/app)和[飞书国际版](https://www.larksuite.com/) +- 🍏 支持 OpenAI (https://platform.openai.com/account/api-keys) 主要Chat接口:GPT4、DALL·E-3、Whisper、GPT-4V +- 🍎 将 ChatGPT 接入[飞书](https://open.feishu.cn/app)和[飞书国际版](https://www.larksuite.com/) - 🥒 支持[Serverless 云函数](https://github.com/serverless-devs/serverless-devs)、[本地环境](https://dashboard.cpolar.com/login)、[Docker](https://www.docker.com/)、[二进制安装包](https://github.com/Leizhenpeng/feishu-chatgpt/releases/) 等多种渠道部署 From df271db72bcf5b82cc4568f00441d3cba7ad177e Mon Sep 17 00:00:00 2001 From: river Date: Thu, 16 Nov 2023 21:56:10 +0800 Subject: [PATCH 02/12] chore: update msg --- code/handlers/msg.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/code/handlers/msg.go b/code/handlers/msg.go index 6c486f3e..a0b14256 100644 --- a/code/handlers/msg.go +++ b/code/handlers/msg.go @@ -701,7 +701,7 @@ func sendHelpCard(ctx context.Context, sessionId *string, msgId *string) { newCard, _ := newSendCard( withHeader("🎒需要帮助吗?", larkcard.TemplateBlue), - withMainMd("**我是小飞机,一款基于chatGpt技术的智能聊天机器人!**"), + withMainMd("**你好呀~我来自企联AI,一款基于OpenAI的智能助手!**"), withSplitLine(), withMdAndExtraBtn( "** 🆑 清除话题上下文**\n文本回复 *清除* 或 */clear*", From c9f3c60502ed063373fa27c9544635854635ac6a Mon Sep 17 00:00:00 2001 From: river Date: Thu, 16 Nov 2023 21:59:18 +0800 Subject: [PATCH 03/12] chore: add vision help card --- code/handlers/msg.go | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/code/handlers/msg.go b/code/handlers/msg.go index a0b14256..2131a188 100644 --- a/code/handlers/msg.go +++ b/code/handlers/msg.go @@ -701,7 +701,7 @@ func sendHelpCard(ctx context.Context, sessionId *string, msgId *string) { newCard, _ := newSendCard( withHeader("🎒需要帮助吗?", larkcard.TemplateBlue), - withMainMd("**你好呀~我来自企联AI,一款基于OpenAI的智能助手!**"), + withMainMd("**🤠你好呀~ 我来自企联AI,一款基于OpenAI的智能助手!**"), withSplitLine(), withMdAndExtraBtn( "** 🆑 清除话题上下文**\n文本回复 *清除* 或 */clear*", @@ -722,6 +722,8 @@ func sendHelpCard(ctx context.Context, withSplitLine(), withMainMd("🎨 **图片创作模式**\n回复*图片创作* 或 */picture*"), withSplitLine(), + withMainMd("🕵️ **图片理解模式** \n"+" 文本回复 *图片理解* 或 */vision*"), + withSplitLine(), withMainMd("🎰 **Token余额查询**\n回复*余额* 或 */balance*"), withSplitLine(), withMainMd("🔃️ **历史话题回档** 🚧\n"+" 进入话题的回复详情页,文本回复 *恢复* 或 */reload*"), From 79853ca0502e12d560e5088684585b57419c702e Mon Sep 17 00:00:00 2001 From: river Date: Thu, 16 Nov 2023 22:23:25 +0800 Subject: [PATCH 04/12] chore: add msg tip --- code/handlers/event_vision_action.go | 110 +++++++++++++++++++++++++++ code/handlers/handler.go | 1 + code/handlers/msg.go | 38 ++++++++- code/services/sessionCache.go | 42 ++++++++-- 4 files changed, 185 insertions(+), 6 deletions(-) create mode 100644 code/handlers/event_vision_action.go diff --git a/code/handlers/event_vision_action.go b/code/handlers/event_vision_action.go new file mode 100644 index 00000000..fb6614a1 --- /dev/null +++ b/code/handlers/event_vision_action.go @@ -0,0 +1,110 @@ +package handlers + +import ( + "context" + "fmt" + "os" + "start-feishubot/logger" + + "start-feishubot/initialization" + "start-feishubot/services" + "start-feishubot/services/openai" + "start-feishubot/utils" + + larkim "github.com/larksuite/oapi-sdk-go/v3/service/im/v1" +) + +type VisionAction struct { /*图片推理*/ +} + +func (*VisionAction) Execute(a *ActionInfo) bool { + check := AzureModeCheck(a) + if !check { + return true + } + // 开启图片创作模式 + if _, foundPic := utils.EitherTrimEqual(a.info.qParsed, + "/vision", "图片推理"); foundPic { + a.handler.sessionCache.Clear(*a.info.sessionId) + a.handler.sessionCache.SetMode(*a.info.sessionId, + services.ModeVision) + a.handler.sessionCache.SetVisionDetail(*a.info.sessionId, + services.VisionDetailHigh) + sendVisionInstructionCard(*a.ctx, a.info.sessionId, + a.info.msgId) + return false + } + + mode := a.handler.sessionCache.GetMode(*a.info.sessionId) + //fmt.Println("mode: ", mode) + logger.Debug("MODE:", mode) + // 收到一张图片,且不在图片创作模式下, 提醒是否切换到图片创作模式 + if a.info.msgType == "image" && mode != services.ModePicCreate { + sendPicModeCheckCard(*a.ctx, a.info.sessionId, a.info.msgId) + return false + } + + if a.info.msgType == "image" && mode == services.ModePicCreate { + //保存图片 + imageKey := a.info.imageKey + //fmt.Printf("fileKey: %s \n", imageKey) + msgId := a.info.msgId + //fmt.Println("msgId: ", *msgId) + req := larkim.NewGetMessageResourceReqBuilder().MessageId( + *msgId).FileKey(imageKey).Type("image").Build() + resp, err := initialization.GetLarkClient().Im.MessageResource.Get(context.Background(), req) + //fmt.Println(resp, err) + if err != nil { + //fmt.Println(err) + replyMsg(*a.ctx, fmt.Sprintf("🤖️:图片下载失败,请稍后再试~\n 错误信息: %v", err), + a.info.msgId) + return false + } + + f := fmt.Sprintf("%s.png", imageKey) + resp.WriteFile(f) + defer os.Remove(f) + resolution := a.handler.sessionCache.GetPicResolution(*a. + info.sessionId) + + openai.ConvertJpegToPNG(f) + openai.ConvertToRGBA(f, f) + + //图片校验 + err = openai.VerifyPngs([]string{f}) + if err != nil { + replyMsg(*a.ctx, fmt.Sprintf("🤖️:无法解析图片,请发送原图并尝试重新操作~"), + a.info.msgId) + return false + } + bs64, err := a.handler.gpt.GenerateOneImageVariation(f, resolution) + if err != nil { + replyMsg(*a.ctx, fmt.Sprintf( + "🤖️:图片生成失败,请稍后再试~\n错误信息: %v", err), a.info.msgId) + return false + } + replayImagePlainByBase64(*a.ctx, bs64, a.info.msgId) + return false + + } + + // 生成图片 + if mode == services.ModePicCreate { + resolution := a.handler.sessionCache.GetPicResolution(*a. + info.sessionId) + style := a.handler.sessionCache.GetPicStyle(*a. + info.sessionId) + bs64, err := a.handler.gpt.GenerateOneImage(a.info.qParsed, + resolution, style) + if err != nil { + replyMsg(*a.ctx, fmt.Sprintf( + "🤖️:图片生成失败,请稍后再试~\n错误信息: %v", err), a.info.msgId) + return false + } + replayImageCardByBase64(*a.ctx, bs64, a.info.msgId, a.info.sessionId, + a.info.qParsed) + return false + } + + return true +} diff --git a/code/handlers/handler.go b/code/handlers/handler.go index 3458c99d..8c243bc2 100644 --- a/code/handlers/handler.go +++ b/code/handlers/handler.go @@ -97,6 +97,7 @@ func (m MessageHandler) msgReceivedHandler(ctx context.Context, event *larkim.P2 &EmptyAction{}, //空消息处理 &ClearAction{}, //清除消息处理 &PicAction{}, //图片处理 + &VisionAction{}, //图片推理处理 &AIModeAction{}, //模式切换处理 &RoleListAction{}, //角色列表处理 &HelpAction{}, //帮助处理 diff --git a/code/handlers/msg.go b/code/handlers/msg.go index 2131a188..2162e6c4 100644 --- a/code/handlers/msg.go +++ b/code/handlers/msg.go @@ -25,6 +25,7 @@ var ( PicModeChangeKind = CardKind("pic_mode_change") // 切换图片创作模式 PicResolutionKind = CardKind("pic_resolution") // 图片分辨率调整 PicStyleKind = CardKind("pic_style") // 图片风格调整 + VisionStyleKind = CardKind("vision_style") // 图片推理级别调整 PicTextMoreKind = CardKind("pic_text_more") // 重新根据文本生成图片 PicVarMoreKind = CardKind("pic_var_more") // 变量图片 RoleTagsChooseKind = CardKind("role_tags_choose") // 内置角色所属标签选择 @@ -380,6 +381,32 @@ func withPicResolutionBtn(sessionID *string) larkcard. return actions } +func withVisionDetailLevelBtn(sessionID *string) larkcard. + MessageCardElement { + detailMenu := newMenu("选择图片解析度,默认为高", + map[string]interface{}{ + "value": "0", + "kind": VisionStyleKind, + "sessionId": *sessionID, + "msgId": *sessionID, + }, + MenuOption{ + label: "高", + value: string(services.VisionDetailHigh), + }, + MenuOption{ + label: "低", + value: string(services.VisionDetailLow), + }, + ) + + actions := larkcard.NewMessageCardAction(). + Actions([]larkcard.MessageCardActionElement{detailMenu}). + Layout(larkcard.MessageCardActionLayoutBisected.Ptr()). + Build() + + return actions +} func withRoleTagsBtn(sessionID *string, tags ...string) larkcard. MessageCardElement { var menuOptions []MenuOption @@ -669,6 +696,15 @@ func sendPicCreateInstructionCard(ctx context.Context, replyCard(ctx, msgId, newCard) } +func sendVisionInstructionCard(ctx context.Context, + sessionId *string, msgId *string) { + newCard, _ := newSendCard( + withHeader("🕵️️ 已进入图片推理模式", larkcard.TemplateBlue), + withVisionDetailLevelBtn(sessionId), + withNote("提醒:回复图片,让LLM和你一起推理图片的内容。")) + replyCard(ctx, msgId, newCard) +} + func sendPicModeCheckCard(ctx context.Context, sessionId *string, msgId *string) { newCard, _ := newSendCard( @@ -722,7 +758,7 @@ func sendHelpCard(ctx context.Context, withSplitLine(), withMainMd("🎨 **图片创作模式**\n回复*图片创作* 或 */picture*"), withSplitLine(), - withMainMd("🕵️ **图片理解模式** \n"+" 文本回复 *图片理解* 或 */vision*"), + withMainMd("🕵️ **图片推理模式** \n"+" 文本回复 *图片推理* 或 */vision*"), withSplitLine(), withMainMd("🎰 **Token余额查询**\n回复*余额* 或 */balance*"), withSplitLine(), diff --git a/code/services/sessionCache.go b/code/services/sessionCache.go index 3fd9f195..21c12ce8 100644 --- a/code/services/sessionCache.go +++ b/code/services/sessionCache.go @@ -8,6 +8,7 @@ import ( ) type SessionMode string +type VisionDetail string type SessionService struct { cache *cache.Cache } @@ -19,10 +20,11 @@ type Resolution string type PicStyle string type SessionMeta struct { - Mode SessionMode `json:"mode"` - Msg []openai.Messages `json:"msg,omitempty"` - PicSetting PicSetting `json:"pic_setting,omitempty"` - AIMode openai.AIMode `json:"ai_mode,omitempty"` + Mode SessionMode `json:"mode"` + Msg []openai.Messages `json:"msg,omitempty"` + PicSetting PicSetting `json:"pic_setting,omitempty"` + AIMode openai.AIMode `json:"ai_mode,omitempty"` + VisionDetail VisionDetail `json:"vision_detail,omitempty"` } const ( @@ -36,10 +38,15 @@ const ( PicStyleVivid PicStyle = "vivid" PicStyleNatural PicStyle = "natural" ) +const ( + VisionDetailHigh VisionDetail = "high" + VisionDetailLow VisionDetail = "low" +) const ( ModePicCreate SessionMode = "pic_create" ModePicVary SessionMode = "pic_vary" ModeGPT SessionMode = "gpt" + ModeVision SessionMode = "vision" ) type SessionServiceCacheInterface interface { @@ -52,9 +59,11 @@ type SessionServiceCacheInterface interface { GetAIMode(sessionId string) openai.AIMode SetAIMode(sessionId string, aiMode openai.AIMode) SetPicResolution(sessionId string, resolution Resolution) - SetPicStyle(sessionId string, resolution PicStyle) GetPicResolution(sessionId string) string + SetPicStyle(sessionId string, resolution PicStyle) GetPicStyle(sessionId string) string + SetVisionDetail(sessionId string, visionDetail VisionDetail) + GetVisionDetail(sessionId string) string Clear(sessionId string) } @@ -218,6 +227,29 @@ func (s *SessionService) Clear(sessionId string) { s.cache.Delete(sessionId) } +func (s *SessionService) GetVisionDetail(sessionId string) string { + sessionContext, ok := s.cache.Get(sessionId) + if !ok { + return "" + } + sessionMeta := sessionContext.(*SessionMeta) + return string(sessionMeta.VisionDetail) +} + +func (s *SessionService) SetVisionDetail(sessionId string, + visionDetail VisionDetail) { + maxCacheTime := time.Hour * 12 + sessionContext, ok := s.cache.Get(sessionId) + if !ok { + sessionMeta := &SessionMeta{VisionDetail: visionDetail} + s.cache.Set(sessionId, sessionMeta, maxCacheTime) + return + } + sessionMeta := sessionContext.(*SessionMeta) + sessionMeta.VisionDetail = visionDetail + s.cache.Set(sessionId, sessionMeta, maxCacheTime) +} + func GetSessionCache() SessionServiceCacheInterface { if sessionServices == nil { sessionServices = &SessionService{cache: cache.New(time.Hour*12, time.Hour*1)} From bb2e403a7f6a23d17dd2c571ea83d420b68fa48b Mon Sep 17 00:00:00 2001 From: river Date: Mon, 20 Nov 2023 11:48:40 +0800 Subject: [PATCH 05/12] =?UTF-8?q?feat:=20=E5=AE=8C=E6=88=90gpt4v=E7=9A=84o?= =?UTF-8?q?penai=E5=AF=B9=E6=8E=A5=E5=92=8C=E5=8D=95=E5=85=83=E6=B5=8B?= =?UTF-8?q?=E8=AF=95?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- code/handlers/card_common_action.go | 5 +-- code/handlers/card_vision_action.go | 31 +++++++++++++++ code/handlers/event_vision_action.go | 2 +- code/services/openai/common.go | 11 +++++- code/services/openai/gpt3_test.go | 21 +++++++++++ code/services/openai/vision.go | 56 ++++++++++++++++++++++++++++ 6 files changed, 121 insertions(+), 5 deletions(-) create mode 100644 code/handlers/card_vision_action.go create mode 100644 code/services/openai/vision.go diff --git a/code/handlers/card_common_action.go b/code/handlers/card_common_action.go index 29ef6a8e..e2449e48 100644 --- a/code/handlers/card_common_action.go +++ b/code/handlers/card_common_action.go @@ -4,8 +4,6 @@ import ( "context" "encoding/json" "fmt" - "start-feishubot/logger" - larkcard "github.com/larksuite/oapi-sdk-go/v3/card" ) @@ -20,6 +18,7 @@ func NewCardHandler(m MessageHandler) CardHandlerFunc { handlers := []CardHandlerMeta{ NewClearCardHandler, NewPicResolutionHandler, + NewVisionResolutionHandler, NewPicTextMoreHandler, NewPicModeChangeHandler, NewRoleTagCardHandler, @@ -35,7 +34,7 @@ func NewCardHandler(m MessageHandler) CardHandlerFunc { return nil, err } //pp.Println(cardMsg) - logger.Debug("cardMsg ", cardMsg) + //logger.Debug("cardMsg ", cardMsg) for _, handler := range handlers { h := handler(cardMsg, m) i, err := h(ctx, cardAction) diff --git a/code/handlers/card_vision_action.go b/code/handlers/card_vision_action.go new file mode 100644 index 00000000..7468b1bb --- /dev/null +++ b/code/handlers/card_vision_action.go @@ -0,0 +1,31 @@ +package handlers + +import ( + "context" + "fmt" + larkcard "github.com/larksuite/oapi-sdk-go/v3/card" + larkcore "github.com/larksuite/oapi-sdk-go/v3/core" + "start-feishubot/services" +) + +func NewVisionResolutionHandler(cardMsg CardMsg, + m MessageHandler) CardHandlerFunc { + return func(ctx context.Context, cardAction *larkcard.CardAction) (interface{}, error) { + if cardMsg.Kind == VisionStyleKind { + CommonProcessVisionStyle(cardMsg, cardAction, m.sessionCache) + return nil, nil + } + return nil, ErrNextHandler + } +} + +func CommonProcessVisionStyle(msg CardMsg, + cardAction *larkcard.CardAction, + cache services.SessionServiceCacheInterface) { + option := cardAction.Action.Option + fmt.Println(larkcore.Prettify(msg)) + cache.SetVisionDetail(msg.SessionId, services.VisionDetail(option)) + //send text + replyMsg(context.Background(), "图片解析度调整为:"+option, + &msg.MsgId) +} diff --git a/code/handlers/event_vision_action.go b/code/handlers/event_vision_action.go index fb6614a1..b0480ee2 100644 --- a/code/handlers/event_vision_action.go +++ b/code/handlers/event_vision_action.go @@ -44,7 +44,7 @@ func (*VisionAction) Execute(a *ActionInfo) bool { return false } - if a.info.msgType == "image" && mode == services.ModePicCreate { + if a.info.msgType == "image" && mode == services.ModeVision { //保存图片 imageKey := a.info.imageKey //fmt.Printf("fileKey: %s \n", imageKey) diff --git a/code/services/openai/common.go b/code/services/openai/common.go index e7db23b6..104b42ce 100644 --- a/code/services/openai/common.go +++ b/code/services/openai/common.go @@ -18,6 +18,9 @@ import ( type PlatForm string +const ( + MaxRetries = 1 +) const ( AzureApiUrlV1 = "openai.azure.com/openai/deployments/" ) @@ -104,6 +107,7 @@ func (gpt *ChatGPT) doAPIRequestWithRetry(url, method string, return errors.New("no available API") } + fmt.Println("requestBodyData", string(requestBodyData)) req, err := http.NewRequest(method, url, bytes.NewReader(requestBodyData)) if err != nil { return err @@ -182,7 +186,7 @@ func (gpt *ChatGPT) sendRequestWithBodyType(link, method string, } err = gpt.doAPIRequestWithRetry(link, method, bodyType, - requestBody, responseBody, client, 3) + requestBody, responseBody, client, MaxRetries) return err } @@ -252,3 +256,8 @@ func GetProxyClient(proxyString string) (*http.Client, error) { } return client, nil } + +func (gpt *ChatGPT) ChangeMode(model string) *ChatGPT { + gpt.Model = model + return gpt +} diff --git a/code/services/openai/gpt3_test.go b/code/services/openai/gpt3_test.go index fb442972..b960705b 100644 --- a/code/services/openai/gpt3_test.go +++ b/code/services/openai/gpt3_test.go @@ -23,6 +23,27 @@ func TestCompletions(t *testing.T) { fmt.Println(resp.Content, resp.Role) } +func TestVisionOnePic(t *testing.T) { + config := initialization.LoadConfig("../../config.yaml") + content := []ContentType{ + {Type: "text", Text: "What’s in this image?"}, + {Type: "image_url", URL: ImageURL{ + URL: "https://resource.liaobots." + + "com/1849d492904448a0ac17f975f0b7ca8b.jpg", + Detail: "low", + }}, + } + msgs := []VisionMessages{ + {Role: "user", Content: content}, + } + gpt := NewChatGPT(*config) + resp, err := gpt.GetVisionInfo(msgs) + if err != nil { + t.Errorf("TestCompletions failed with error: %v", err) + } + fmt.Println(resp.Content, resp.Role) +} + func TestGenerateOneImage(t *testing.T) { config := initialization.LoadConfig("../../config.yaml") gpt := NewChatGPT(*config) diff --git a/code/services/openai/vision.go b/code/services/openai/vision.go new file mode 100644 index 00000000..e825bbe1 --- /dev/null +++ b/code/services/openai/vision.go @@ -0,0 +1,56 @@ +package openai + +import ( + "errors" + "start-feishubot/logger" +) + +type ImageURL struct { + URL string `json:"url"` + Detail string `json:"detail,omitempty"` +} + +type ContentType struct { + Type string `json:"type"` + Text string `json:"text,omitempty"` + URL ImageURL `json:"image_url,omitempty"` + // Add other fields as needed for different content types +} + +type VisionMessages struct { + Role string `json:"role"` + Content []ContentType `json:"content"` +} + +type VisionRequestBody struct { + Model string `json:"model"` + Messages []VisionMessages `json:"messages"` + MaxTokens int `json:"max_tokens"` +} + +func (gpt *ChatGPT) GetVisionInfo(msg []VisionMessages) ( + resp Messages, err error) { + requestBody := VisionRequestBody{ + Model: "gpt-4-vision-preview", + Messages: msg, + MaxTokens: gpt.MaxTokens, + } + gptResponseBody := &ChatGPTResponseBody{} + url := gpt.FullUrl("chat/completions") + logger.Debug(url) + logger.Debug("request body ", requestBody) + if url == "" { + return resp, errors.New("无法获取openai请求地址") + } + //gpt.ChangeMode("gpt-4-vision-preview") + //fmt.Println("model", gpt.Model) + err = gpt.sendRequestWithBodyType(url, "POST", jsonBody, requestBody, gptResponseBody) + if err == nil && len(gptResponseBody.Choices) > 0 { + resp = gptResponseBody.Choices[0].Message + } else { + logger.Errorf("ERROR %v", err) + resp = Messages{} + err = errors.New("openai 请求失败") + } + return resp, err +} From 2718b6132fca22f6bee3f382513fdf080a621178 Mon Sep 17 00:00:00 2001 From: river Date: Mon, 20 Nov 2023 13:32:39 +0800 Subject: [PATCH 06/12] =?UTF-8?q?feat:=20=E6=A3=80=E6=B5=8B=E5=9B=BE?= =?UTF-8?q?=E7=89=87=E8=87=AA=E5=8A=A8=E5=90=8E=E6=8E=A8=E8=8D=90=E5=88=87?= =?UTF-8?q?=E6=8D=A2=E5=9B=BE=E7=89=87=E5=88=9B=E4=BD=9C=E6=A8=A1=E5=BC=8F?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- code/handlers/card_common_action.go | 1 + code/handlers/card_vision_action.go | 43 ++++++++++++++++++++++ code/handlers/event_vision_action.go | 6 ++-- code/handlers/handler.go | 5 ++- code/handlers/msg.go | 54 ++++++++++++++++++++++------ code/logger/logger.go | 2 +- 6 files changed, 94 insertions(+), 17 deletions(-) diff --git a/code/handlers/card_common_action.go b/code/handlers/card_common_action.go index e2449e48..1f1a7ac8 100644 --- a/code/handlers/card_common_action.go +++ b/code/handlers/card_common_action.go @@ -24,6 +24,7 @@ func NewCardHandler(m MessageHandler) CardHandlerFunc { NewRoleTagCardHandler, NewRoleCardHandler, NewAIModeCardHandler, + NewVisionModeChangeHandler, } return func(ctx context.Context, cardAction *larkcard.CardAction) (interface{}, error) { diff --git a/code/handlers/card_vision_action.go b/code/handlers/card_vision_action.go index 7468b1bb..9e056492 100644 --- a/code/handlers/card_vision_action.go +++ b/code/handlers/card_vision_action.go @@ -18,6 +18,19 @@ func NewVisionResolutionHandler(cardMsg CardMsg, return nil, ErrNextHandler } } +func NewVisionModeChangeHandler(cardMsg CardMsg, + m MessageHandler) CardHandlerFunc { + return func(ctx context.Context, cardAction *larkcard.CardAction) (interface{}, error) { + if cardMsg.Kind == VisionModeChangeKind { + newCard, err, done := CommonProcessVisionModeChange(cardMsg, m.sessionCache) + if done { + return newCard, err + } + return nil, nil + } + return nil, ErrNextHandler + } +} func CommonProcessVisionStyle(msg CardMsg, cardAction *larkcard.CardAction, @@ -29,3 +42,33 @@ func CommonProcessVisionStyle(msg CardMsg, replyMsg(context.Background(), "图片解析度调整为:"+option, &msg.MsgId) } + +func CommonProcessVisionModeChange(cardMsg CardMsg, + session services.SessionServiceCacheInterface) ( + interface{}, error, bool) { + if cardMsg.Value == "1" { + + sessionId := cardMsg.SessionId + session.Clear(sessionId) + session.SetMode(sessionId, + services.ModeVision) + session.SetVisionDetail(sessionId, + services.VisionDetailLow) + + newCard, _ := + newSendCard( + withHeader("🕵️️ 已进入图片推理模式", larkcard.TemplateBlue), + withVisionDetailLevelBtn(&sessionId), + withNote("提醒:回复图片,让LLM和你一起推理图片的内容。")) + return newCard, nil, true + } + if cardMsg.Value == "0" { + newCard, _ := newSendCard( + withHeader("️🎒 机器人提醒", larkcard.TemplateGreen), + withMainMd("依旧保留此话题的上下文信息"), + withNote("我们可以继续探讨这个话题,期待和您聊天。如果您有其他问题或者想要讨论的话题,请告诉我哦"), + ) + return newCard, nil, true + } + return nil, nil, false +} diff --git a/code/handlers/event_vision_action.go b/code/handlers/event_vision_action.go index b0480ee2..8b134db4 100644 --- a/code/handlers/event_vision_action.go +++ b/code/handlers/event_vision_action.go @@ -36,11 +36,11 @@ func (*VisionAction) Execute(a *ActionInfo) bool { } mode := a.handler.sessionCache.GetMode(*a.info.sessionId) - //fmt.Println("mode: ", mode) + fmt.Println("a.info.msgType: ", a.info.msgType) logger.Debug("MODE:", mode) - // 收到一张图片,且不在图片创作模式下, 提醒是否切换到图片创作模式 + // 收到一张图片,且不在图片推理模式下, 提醒是否切换到图片推理模式 if a.info.msgType == "image" && mode != services.ModePicCreate { - sendPicModeCheckCard(*a.ctx, a.info.sessionId, a.info.msgId) + sendVisionModeCheckCard(*a.ctx, a.info.sessionId, a.info.msgId) return false } diff --git a/code/handlers/handler.go b/code/handlers/handler.go index 8c243bc2..f3dd51b7 100644 --- a/code/handlers/handler.go +++ b/code/handlers/handler.go @@ -94,18 +94,17 @@ func (m MessageHandler) msgReceivedHandler(ctx context.Context, event *larkim.P2 &ProcessedUniqueAction{}, //避免重复处理 &ProcessMentionAction{}, //判断机器人是否应该被调用 &AudioAction{}, //语音处理 - &EmptyAction{}, //空消息处理 &ClearAction{}, //清除消息处理 - &PicAction{}, //图片处理 &VisionAction{}, //图片推理处理 + &PicAction{}, //图片处理 &AIModeAction{}, //模式切换处理 &RoleListAction{}, //角色列表处理 &HelpAction{}, //帮助处理 &BalanceAction{}, //余额处理 &RolePlayAction{}, //角色扮演处理 &MessageAction{}, //消息处理 + &EmptyAction{}, //空消息处理 &StreamMessageAction{}, //流式消息处理 - } chain(data, actions...) return nil diff --git a/code/handlers/msg.go b/code/handlers/msg.go index 2162e6c4..b76f1f29 100644 --- a/code/handlers/msg.go +++ b/code/handlers/msg.go @@ -21,16 +21,17 @@ type CardKind string type CardChatType string var ( - ClearCardKind = CardKind("clear") // 清空上下文 - PicModeChangeKind = CardKind("pic_mode_change") // 切换图片创作模式 - PicResolutionKind = CardKind("pic_resolution") // 图片分辨率调整 - PicStyleKind = CardKind("pic_style") // 图片风格调整 - VisionStyleKind = CardKind("vision_style") // 图片推理级别调整 - PicTextMoreKind = CardKind("pic_text_more") // 重新根据文本生成图片 - PicVarMoreKind = CardKind("pic_var_more") // 变量图片 - RoleTagsChooseKind = CardKind("role_tags_choose") // 内置角色所属标签选择 - RoleChooseKind = CardKind("role_choose") // 内置角色选择 - AIModeChooseKind = CardKind("ai_mode_choose") // AI模式选择 + ClearCardKind = CardKind("clear") // 清空上下文 + PicModeChangeKind = CardKind("pic_mode_change") // 切换图片创作模式 + VisionModeChangeKind = CardKind("vision_mode") // 切换图片解析模式 + PicResolutionKind = CardKind("pic_resolution") // 图片分辨率调整 + PicStyleKind = CardKind("pic_style") // 图片风格调整 + VisionStyleKind = CardKind("vision_style") // 图片推理级别调整 + PicTextMoreKind = CardKind("pic_text_more") // 重新根据文本生成图片 + PicVarMoreKind = CardKind("pic_var_more") // 变量图片 + RoleTagsChooseKind = CardKind("role_tags_choose") // 内置角色所属标签选择 + RoleChooseKind = CardKind("role_choose") // 内置角色选择 + AIModeChooseKind = CardKind("ai_mode_choose") // AI模式选择 ) var ( @@ -312,6 +313,30 @@ func withPicModeDoubleCheckBtn(sessionID *string) larkcard. return actions } +func withVisionModeDoubleCheckBtn(sessionID *string) larkcard. + MessageCardElement { + confirmBtn := newBtn("切换模式", map[string]interface{}{ + "value": "1", + "kind": VisionModeChangeKind, + "chatType": UserChatType, + "sessionId": *sessionID, + }, larkcard.MessageCardButtonTypeDanger, + ) + cancelBtn := newBtn("我再想想", map[string]interface{}{ + "value": "0", + "kind": VisionModeChangeKind, + "sessionId": *sessionID, + "chatType": UserChatType, + }, + larkcard.MessageCardButtonTypeDefault) + + actions := larkcard.NewMessageCardAction(). + Actions([]larkcard.MessageCardActionElement{confirmBtn, cancelBtn}). + Layout(larkcard.MessageCardActionLayoutBisected.Ptr()). + Build() + + return actions +} func withOneBtn(btn *larkcard.MessageCardEmbedButton) larkcard. MessageCardElement { @@ -714,6 +739,15 @@ func sendPicModeCheckCard(ctx context.Context, withPicModeDoubleCheckBtn(sessionId)) replyCard(ctx, msgId, newCard) } +func sendVisionModeCheckCard(ctx context.Context, + sessionId *string, msgId *string) { + newCard, _ := newSendCard( + withHeader("🕵️ 机器人提醒", larkcard.TemplateBlue), + withMainMd("检测到图片,是否进入图片推理模式?"), + withNote("请注意,这将开始一个全新的对话,您将无法利用之前话题的历史信息"), + withVisionModeDoubleCheckBtn(sessionId)) + replyCard(ctx, msgId, newCard) +} func sendNewTopicCard(ctx context.Context, sessionId *string, msgId *string, content string) { diff --git a/code/logger/logger.go b/code/logger/logger.go index 75442471..df9aadc8 100644 --- a/code/logger/logger.go +++ b/code/logger/logger.go @@ -24,7 +24,7 @@ func init() { //WarnLevel //InfoLevel //DebugLevel - logger.Level = logrus.InfoLevel + logger.Level = logrus.DebugLevel } From 4bb5cb078728cafc59d76ec057321160e0ac2e75 Mon Sep 17 00:00:00 2001 From: river Date: Mon, 20 Nov 2023 14:14:42 +0800 Subject: [PATCH 07/12] =?UTF-8?q?feat:=20=E8=AF=BB=E5=8F=96=E5=9B=BE?= =?UTF-8?q?=E7=89=87base64?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- code/handlers/event_vision_action.go | 41 +++++++++++++++------------- code/services/openai/picture.go | 21 ++++++++++++++ 2 files changed, 43 insertions(+), 19 deletions(-) diff --git a/code/handlers/event_vision_action.go b/code/handlers/event_vision_action.go index 8b134db4..1b91bc3b 100644 --- a/code/handlers/event_vision_action.go +++ b/code/handlers/event_vision_action.go @@ -4,9 +4,8 @@ import ( "context" "fmt" "os" - "start-feishubot/logger" - "start-feishubot/initialization" + "start-feishubot/logger" "start-feishubot/services" "start-feishubot/services/openai" "start-feishubot/utils" @@ -39,7 +38,7 @@ func (*VisionAction) Execute(a *ActionInfo) bool { fmt.Println("a.info.msgType: ", a.info.msgType) logger.Debug("MODE:", mode) // 收到一张图片,且不在图片推理模式下, 提醒是否切换到图片推理模式 - if a.info.msgType == "image" && mode != services.ModePicCreate { + if a.info.msgType == "image" && mode != services.ModeVision { sendVisionModeCheckCard(*a.ctx, a.info.sessionId, a.info.msgId) return false } @@ -53,7 +52,7 @@ func (*VisionAction) Execute(a *ActionInfo) bool { req := larkim.NewGetMessageResourceReqBuilder().MessageId( *msgId).FileKey(imageKey).Type("image").Build() resp, err := initialization.GetLarkClient().Im.MessageResource.Get(context.Background(), req) - //fmt.Println(resp, err) + fmt.Println(resp, err) if err != nil { //fmt.Println(err) replyMsg(*a.ctx, fmt.Sprintf("🤖️:图片下载失败,请稍后再试~\n 错误信息: %v", err), @@ -62,28 +61,32 @@ func (*VisionAction) Execute(a *ActionInfo) bool { } f := fmt.Sprintf("%s.png", imageKey) + fmt.Println(f) resp.WriteFile(f) defer os.Remove(f) - resolution := a.handler.sessionCache.GetPicResolution(*a. - info.sessionId) - - openai.ConvertJpegToPNG(f) - openai.ConvertToRGBA(f, f) + //resolution := a.handler.sessionCache.GetPicResolution(*a. + // info.sessionId) - //图片校验 - err = openai.VerifyPngs([]string{f}) + base64, err := openai.GetBase64FromImage(f) if err != nil { - replyMsg(*a.ctx, fmt.Sprintf("🤖️:无法解析图片,请发送原图并尝试重新操作~"), + replyMsg(*a.ctx, fmt.Sprintf("🤖️:图片下载失败,请稍后再试~\n 错误信息: %v", err), a.info.msgId) return false } - bs64, err := a.handler.gpt.GenerateOneImageVariation(f, resolution) - if err != nil { - replyMsg(*a.ctx, fmt.Sprintf( - "🤖️:图片生成失败,请稍后再试~\n错误信息: %v", err), a.info.msgId) - return false - } - replayImagePlainByBase64(*a.ctx, bs64, a.info.msgId) + ////图片校验 + //err = openai.VerifyPngs([]string{f}) + //if err != nil { + // replyMsg(*a.ctx, fmt.Sprintf("🤖️:无法解析图片,请发送原图并尝试重新操作~"), + // a.info.msgId) + // return false + //} + //bs64, err := a.handler.gpt.GenerateOneImageVariation(f, resolution) + //if err != nil { + // replyMsg(*a.ctx, fmt.Sprintf( + // "🤖️:图片生成失败,请稍后再试~\n错误信息: %v", err), a.info.msgId) + // return false + //} + replayImagePlainByBase64(*a.ctx, base64, a.info.msgId) return false } diff --git a/code/services/openai/picture.go b/code/services/openai/picture.go index d93bedda..1c0ace43 100644 --- a/code/services/openai/picture.go +++ b/code/services/openai/picture.go @@ -2,11 +2,13 @@ package openai import ( "bufio" + "encoding/base64" "fmt" "image" "image/jpeg" "image/png" "io" + "io/ioutil" "mime/multipart" "os" ) @@ -300,3 +302,22 @@ func GetImageCompressionType(path string) (string, error) { // 返回压缩类型 return format, nil } + +func GetBase64FromImage(imagePath string) (string, error) { + // 打开文件 + // 读取图片文件 + imageFile, err := os.Open(imagePath) + if err != nil { + return "", err + } + defer imageFile.Close() + // 读取图片内容 + imageData, err := ioutil.ReadAll(imageFile) + if err != nil { + return "", err + } + // 将图片内容转换为base64编码 + base64String := base64.StdEncoding.EncodeToString(imageData) + + return base64String, nil +} From c1b49363aae0ecc7044c1ddf67bdfad6823ec94e Mon Sep 17 00:00:00 2001 From: river Date: Mon, 20 Nov 2023 15:09:19 +0800 Subject: [PATCH 08/12] =?UTF-8?q?feat:=20=E4=BF=AE=E6=94=B9content?= =?UTF-8?q?=E4=B8=8A=E4=BC=A0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- code/handlers/event_msg_action.go | 17 +++++++++++++++++ code/handlers/event_vision_action.go | 23 +++++++++++++++++++++++ code/services/openai/gpt3_test.go | 13 +++++++++---- code/services/openai/vision.go | 14 ++++++-------- 4 files changed, 55 insertions(+), 12 deletions(-) diff --git a/code/handlers/event_msg_action.go b/code/handlers/event_msg_action.go index da7e5b4b..f9e10f71 100644 --- a/code/handlers/event_msg_action.go +++ b/code/handlers/event_msg_action.go @@ -23,6 +23,23 @@ func setDefaultPrompt(msg []openai.Messages) []openai.Messages { return msg } +//func setDefaultVisionPrompt(msg []openai.VisionMessages) []openai.VisionMessages { +// if !hasSystemRole(msg) { +// msg = append(msg, openai.VisionMessages{ +// Role: "system", Content: []openai.ContentType{ +// {Type: "text", Text: "You are ChatGPT4V, " + +// "You are ChatGPT4V, " + +// "a large language and picture model trained by" + +// " OpenAI. " + +// "Answer in user's language as concisely as" + +// " possible. Knowledge cutoff: 20230601 " + +// "Current date" + time.Now().Format("20060102"), +// }}, +// }) +// } +// return msg +//} + type MessageAction struct { /*消息*/ } diff --git a/code/handlers/event_vision_action.go b/code/handlers/event_vision_action.go index 1b91bc3b..03c31c23 100644 --- a/code/handlers/event_vision_action.go +++ b/code/handlers/event_vision_action.go @@ -73,6 +73,29 @@ func (*VisionAction) Execute(a *ActionInfo) bool { a.info.msgId) return false } + // + var msg []openai.VisionMessages + detail := a.handler.sessionCache.GetVisionDetail(*a.info.sessionId) + // 如果没有提示词,默认模拟ChatGPT + msg = append(msg, openai.VisionMessages{ + Role: "user", Content: []openai.ContentType{ + { + Type: "image", ImageURL: openai. + ImageURL{URL: base64, Detail: detail}, + }, + }, + }) + // get ai mode as temperature + fmt.Println("msg: ", msg) + completions, err := a.handler.gpt.GetVisionInfo(msg) + if err != nil { + replyMsg(*a.ctx, fmt.Sprintf( + "🤖️:消息机器人摆烂了,请稍后再试~\n错误信息: %v", err), a.info.msgId) + return false + } + msg = append(msg, completions) + a.handler.sessionCache.SetMsg(*a.info.sessionId, msg) + ////图片校验 //err = openai.VerifyPngs([]string{f}) //if err != nil { diff --git a/code/services/openai/gpt3_test.go b/code/services/openai/gpt3_test.go index b960705b..459eb16f 100644 --- a/code/services/openai/gpt3_test.go +++ b/code/services/openai/gpt3_test.go @@ -26,15 +26,20 @@ func TestCompletions(t *testing.T) { func TestVisionOnePic(t *testing.T) { config := initialization.LoadConfig("../../config.yaml") content := []ContentType{ - {Type: "text", Text: "What’s in this image?"}, - {Type: "image_url", URL: ImageURL{ + {Type: "text", Text: "What’s in this image?", ImageURL: nil}, + {Type: "image_url", ImageURL: &ImageURL{ URL: "https://resource.liaobots." + "com/1849d492904448a0ac17f975f0b7ca8b.jpg", - Detail: "low", + Detail: "high", }}, } + //turn content-json to str + //contentStr, err2 := json.Marshal(content) + //if err2 != nil { + // return + //} msgs := []VisionMessages{ - {Role: "user", Content: content}, + {Role: "assistant", Content: content}, } gpt := NewChatGPT(*config) resp, err := gpt.GetVisionInfo(msgs) diff --git a/code/services/openai/vision.go b/code/services/openai/vision.go index e825bbe1..0d068b66 100644 --- a/code/services/openai/vision.go +++ b/code/services/openai/vision.go @@ -6,20 +6,18 @@ import ( ) type ImageURL struct { - URL string `json:"url"` + URL string `json:"url,omitempty"` Detail string `json:"detail,omitempty"` } type ContentType struct { - Type string `json:"type"` - Text string `json:"text,omitempty"` - URL ImageURL `json:"image_url,omitempty"` - // Add other fields as needed for different content types + Type string `json:"type"` + Text string `json:"text,omitempty"` + ImageURL *ImageURL `json:"image_url,omitempty"` } - type VisionMessages struct { - Role string `json:"role"` - Content []ContentType `json:"content"` + Role string `json:"role"` + Content interface{} `json:"content"` } type VisionRequestBody struct { From 15d0099d6f2067daad386ddc03bd08cd2fd7cf5b Mon Sep 17 00:00:00 2001 From: river Date: Mon, 20 Nov 2023 15:36:19 +0800 Subject: [PATCH 09/12] =?UTF-8?q?feat:=20=20=E5=8D=95=E5=9B=BE=E6=B5=8B?= =?UTF-8?q?=E8=AF=95=E5=AE=8C=E6=88=90?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- code/handlers/event_vision_action.go | 45 +++++++++++----------------- code/services/openai/common.go | 2 +- code/services/openai/gpt3_test.go | 6 +--- code/services/openai/vision.go | 1 - 4 files changed, 20 insertions(+), 34 deletions(-) diff --git a/code/handlers/event_vision_action.go b/code/handlers/event_vision_action.go index 03c31c23..85fe642a 100644 --- a/code/handlers/event_vision_action.go +++ b/code/handlers/event_vision_action.go @@ -43,6 +43,8 @@ func (*VisionAction) Execute(a *ActionInfo) bool { return false } + // todo + if a.info.msgType == "image" && mode == services.ModeVision { //保存图片 imageKey := a.info.imageKey @@ -77,14 +79,19 @@ func (*VisionAction) Execute(a *ActionInfo) bool { var msg []openai.VisionMessages detail := a.handler.sessionCache.GetVisionDetail(*a.info.sessionId) // 如果没有提示词,默认模拟ChatGPT + + content2 := []openai.ContentType{ + {Type: "text", Text: "图片里面有什么", ImageURL: nil}, + {Type: "image_url", ImageURL: &openai.ImageURL{ + URL: "data:image/jpeg;base64," + base64, + Detail: detail, + }}, + } + msg = append(msg, openai.VisionMessages{ - Role: "user", Content: []openai.ContentType{ - { - Type: "image", ImageURL: openai. - ImageURL{URL: base64, Detail: detail}, - }, - }, + Role: "user", Content: content2, }) + // get ai mode as temperature fmt.Println("msg: ", msg) completions, err := a.handler.gpt.GetVisionInfo(msg) @@ -93,8 +100,10 @@ func (*VisionAction) Execute(a *ActionInfo) bool { "🤖️:消息机器人摆烂了,请稍后再试~\n错误信息: %v", err), a.info.msgId) return false } - msg = append(msg, completions) - a.handler.sessionCache.SetMsg(*a.info.sessionId, msg) + sendOldTopicCard(*a.ctx, a.info.sessionId, a.info.msgId, + completions.Content) + return false + //a.handler.sessionCache.SetMsg(*a.info.sessionId, msg) ////图片校验 //err = openai.VerifyPngs([]string{f}) @@ -109,28 +118,10 @@ func (*VisionAction) Execute(a *ActionInfo) bool { // "🤖️:图片生成失败,请稍后再试~\n错误信息: %v", err), a.info.msgId) // return false //} - replayImagePlainByBase64(*a.ctx, base64, a.info.msgId) + //replayImagePlainByBase64(*a.ctx, base64, a.info.msgId) return false } - // 生成图片 - if mode == services.ModePicCreate { - resolution := a.handler.sessionCache.GetPicResolution(*a. - info.sessionId) - style := a.handler.sessionCache.GetPicStyle(*a. - info.sessionId) - bs64, err := a.handler.gpt.GenerateOneImage(a.info.qParsed, - resolution, style) - if err != nil { - replyMsg(*a.ctx, fmt.Sprintf( - "🤖️:图片生成失败,请稍后再试~\n错误信息: %v", err), a.info.msgId) - return false - } - replayImageCardByBase64(*a.ctx, bs64, a.info.msgId, a.info.sessionId, - a.info.qParsed) - return false - } - return true } diff --git a/code/services/openai/common.go b/code/services/openai/common.go index 104b42ce..7fd812f8 100644 --- a/code/services/openai/common.go +++ b/code/services/openai/common.go @@ -107,7 +107,7 @@ func (gpt *ChatGPT) doAPIRequestWithRetry(url, method string, return errors.New("no available API") } - fmt.Println("requestBodyData", string(requestBodyData)) + //fmt.Println("requestBodyData", string(requestBodyData)) req, err := http.NewRequest(method, url, bytes.NewReader(requestBodyData)) if err != nil { return err diff --git a/code/services/openai/gpt3_test.go b/code/services/openai/gpt3_test.go index 459eb16f..e13c4347 100644 --- a/code/services/openai/gpt3_test.go +++ b/code/services/openai/gpt3_test.go @@ -33,11 +33,7 @@ func TestVisionOnePic(t *testing.T) { Detail: "high", }}, } - //turn content-json to str - //contentStr, err2 := json.Marshal(content) - //if err2 != nil { - // return - //} + msgs := []VisionMessages{ {Role: "assistant", Content: content}, } diff --git a/code/services/openai/vision.go b/code/services/openai/vision.go index 0d068b66..8523ab3f 100644 --- a/code/services/openai/vision.go +++ b/code/services/openai/vision.go @@ -35,7 +35,6 @@ func (gpt *ChatGPT) GetVisionInfo(msg []VisionMessages) ( } gptResponseBody := &ChatGPTResponseBody{} url := gpt.FullUrl("chat/completions") - logger.Debug(url) logger.Debug("request body ", requestBody) if url == "" { return resp, errors.New("无法获取openai请求地址") From 4624006bd98dc25ddae0ebdcdd7e7c7b07f8f444 Mon Sep 17 00:00:00 2001 From: river Date: Mon, 20 Nov 2023 22:22:16 +0800 Subject: [PATCH 10/12] feat: support gpt4v --- code/handlers/common.go | 28 +++++++- code/handlers/event_common_action.go | 1 + code/handlers/event_vision_action.go | 97 +++++++++++++++++++++++----- code/handlers/handler.go | 1 + 4 files changed, 110 insertions(+), 17 deletions(-) diff --git a/code/handlers/common.go b/code/handlers/common.go index 1d0f7b64..ffe4912a 100644 --- a/code/handlers/common.go +++ b/code/handlers/common.go @@ -13,7 +13,6 @@ func msgFilter(msg string) string { //replace @到下一个非空的字段 为 '' regex := regexp.MustCompile(`@[^ ]*`) return regex.ReplaceAllString(msg, "") - } // Parse rich text json to text @@ -47,6 +46,33 @@ func parsePostContent(content string) string { return msgFilter(text) } +func parsePostImageKeys(content string) []string { + var contentMap map[string]interface{} + err := json.Unmarshal([]byte(content), &contentMap) + + if err != nil { + fmt.Println(err) + return nil + } + + var imageKeys []string + + if contentMap["content"] == nil { + return imageKeys + } + + contentList := contentMap["content"].([]interface{}) + for _, v := range contentList { + for _, v1 := range v.([]interface{}) { + if v1.(map[string]interface{})["tag"] == "img" { + imageKeys = append(imageKeys, v1.(map[string]interface{})["image_key"].(string)) + } + } + } + + return imageKeys +} + func parseContent(content, msgType string) string { //"{\"text\":\"@_user_1 hahaha\"}", //only get text content hahaha diff --git a/code/handlers/event_common_action.go b/code/handlers/event_common_action.go index 5139060d..190d66bd 100644 --- a/code/handlers/event_common_action.go +++ b/code/handlers/event_common_action.go @@ -19,6 +19,7 @@ type MsgInfo struct { qParsed string fileKey string imageKey string + imageKeys []string // post 消息卡片中的图片组 sessionId *string mention []*larkim.MentionEvent } diff --git a/code/handlers/event_vision_action.go b/code/handlers/event_vision_action.go index 85fe642a..37bd6ac7 100644 --- a/code/handlers/event_vision_action.go +++ b/code/handlers/event_vision_action.go @@ -36,7 +36,9 @@ func (*VisionAction) Execute(a *ActionInfo) bool { mode := a.handler.sessionCache.GetMode(*a.info.sessionId) fmt.Println("a.info.msgType: ", a.info.msgType) + logger.Debug("MODE:", mode) + // 收到一张图片,且不在图片推理模式下, 提醒是否切换到图片推理模式 if a.info.msgType == "image" && mode != services.ModeVision { sendVisionModeCheckCard(*a.ctx, a.info.sessionId, a.info.msgId) @@ -44,6 +46,7 @@ func (*VisionAction) Execute(a *ActionInfo) bool { } // todo + //return false if a.info.msgType == "image" && mode == services.ModeVision { //保存图片 @@ -66,8 +69,6 @@ func (*VisionAction) Execute(a *ActionInfo) bool { fmt.Println(f) resp.WriteFile(f) defer os.Remove(f) - //resolution := a.handler.sessionCache.GetPicResolution(*a. - // info.sessionId) base64, err := openai.GetBase64FromImage(f) if err != nil { @@ -105,20 +106,84 @@ func (*VisionAction) Execute(a *ActionInfo) bool { return false //a.handler.sessionCache.SetMsg(*a.info.sessionId, msg) - ////图片校验 - //err = openai.VerifyPngs([]string{f}) - //if err != nil { - // replyMsg(*a.ctx, fmt.Sprintf("🤖️:无法解析图片,请发送原图并尝试重新操作~"), - // a.info.msgId) - // return false - //} - //bs64, err := a.handler.gpt.GenerateOneImageVariation(f, resolution) - //if err != nil { - // replyMsg(*a.ctx, fmt.Sprintf( - // "🤖️:图片生成失败,请稍后再试~\n错误信息: %v", err), a.info.msgId) - // return false - //} - //replayImagePlainByBase64(*a.ctx, base64, a.info.msgId) + } + + if a.info.msgType == "post" && mode == services.ModeVision { + fmt.Println(a.info.imageKeys) + fmt.Println(a.info.qParsed) + imagesKeys := a.info.imageKeys + var base64s []string + if len(imagesKeys) == 0 { + replyMsg(*a.ctx, "🤖️:请发送一张图片", a.info.msgId) + return false + } + //保存图片 + for i := 0; i < len(imagesKeys); i++ { + if imagesKeys[i] == "" { + continue + } + imageKey := imagesKeys[i] + msgId := a.info.msgId + //fmt.Println("msgId: ", *msgId) + req := larkim.NewGetMessageResourceReqBuilder().MessageId( + *msgId).FileKey(imageKey).Type("image").Build() + resp, err := initialization.GetLarkClient().Im.MessageResource.Get(context.Background(), req) + if err != nil { + //fmt.Println(err) + replyMsg(*a.ctx, fmt.Sprintf("🤖️:图片下载失败,请稍后再试~\n 错误信息: %v", err), + a.info.msgId) + return false + } + + f := fmt.Sprintf("%s.png", imageKey) + fmt.Println(f) + resp.WriteFile(f) + defer os.Remove(f) + + base64, err := openai.GetBase64FromImage(f) + base64s = append(base64s, base64) + if err != nil { + replyMsg(*a.ctx, fmt.Sprintf("🤖️:图片下载失败,请稍后再试~\n 错误信息: %v", err), + a.info.msgId) + return false + } + } + + var msg []openai.VisionMessages + detail := a.handler.sessionCache.GetVisionDetail(*a.info.sessionId) + // 如果没有提示词,默认模拟ChatGPT + + content0 := []openai.ContentType{ + {Type: "text", Text: a.info.qParsed, ImageURL: nil}, + } + // 循环数组 + for i := 0; i < len(base64s); i++ { + content1 := []openai.ContentType{ + {Type: "image_url", ImageURL: &openai.ImageURL{ + URL: "data:image/jpeg;base64," + base64s[i], + Detail: detail, + }}, + } + content0 = append(content0, content1...) + } + + msg = append(msg, openai.VisionMessages{ + Role: "user", Content: content0, + }) + + // get ai mode as temperature + fmt.Println("msg: ", msg) + completions, err := a.handler.gpt.GetVisionInfo(msg) + if err != nil { + replyMsg(*a.ctx, fmt.Sprintf( + "🤖️:消息机器人摆烂了,请稍后再试~\n错误信息: %v", err), a.info.msgId) + return false + } + sendOldTopicCard(*a.ctx, a.info.sessionId, a.info.msgId, + completions.Content) + return false + //a.handler.sessionCache.SetMsg(*a.info.sessionId, msg) + return false } diff --git a/code/handlers/handler.go b/code/handlers/handler.go index f3dd51b7..d7622f1b 100644 --- a/code/handlers/handler.go +++ b/code/handlers/handler.go @@ -82,6 +82,7 @@ func (m MessageHandler) msgReceivedHandler(ctx context.Context, event *larkim.P2 qParsed: strings.Trim(parseContent(*content, msgType), " "), fileKey: parseFileKey(*content), imageKey: parseImageKey(*content), + imageKeys: parsePostImageKeys(*content), sessionId: sessionId, mention: mention, } From 2c1b1cff5517d7067716d7600ed12cdcb2d089e7 Mon Sep 17 00:00:00 2001 From: river Date: Mon, 20 Nov 2023 23:17:44 +0800 Subject: [PATCH 11/12] =?UTF-8?q?feat:=20=E4=BC=98=E5=8C=96=E4=BB=A3?= =?UTF-8?q?=E7=A0=81=E7=BB=93=E6=9E=84=E5=92=8CUI=E6=96=87=E6=A1=88?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- code/handlers/event_vision_action.go | 256 ++++++++++++--------------- code/handlers/msg.go | 9 + 2 files changed, 121 insertions(+), 144 deletions(-) diff --git a/code/handlers/event_vision_action.go b/code/handlers/event_vision_action.go index 37bd6ac7..ae67873b 100644 --- a/code/handlers/event_vision_action.go +++ b/code/handlers/event_vision_action.go @@ -5,7 +5,6 @@ import ( "fmt" "os" "start-feishubot/initialization" - "start-feishubot/logger" "start-feishubot/services" "start-feishubot/services/openai" "start-feishubot/utils" @@ -16,177 +15,146 @@ import ( type VisionAction struct { /*图片推理*/ } -func (*VisionAction) Execute(a *ActionInfo) bool { - check := AzureModeCheck(a) - if !check { +func (va *VisionAction) Execute(a *ActionInfo) bool { + if !AzureModeCheck(a) { return true } - // 开启图片创作模式 - if _, foundPic := utils.EitherTrimEqual(a.info.qParsed, - "/vision", "图片推理"); foundPic { - a.handler.sessionCache.Clear(*a.info.sessionId) - a.handler.sessionCache.SetMode(*a.info.sessionId, - services.ModeVision) - a.handler.sessionCache.SetVisionDetail(*a.info.sessionId, - services.VisionDetailHigh) - sendVisionInstructionCard(*a.ctx, a.info.sessionId, - a.info.msgId) + + if isVisionCommand(a) { + initializeVisionMode(a) + sendVisionInstructionCard(*a.ctx, a.info.sessionId, a.info.msgId) return false } mode := a.handler.sessionCache.GetMode(*a.info.sessionId) - fmt.Println("a.info.msgType: ", a.info.msgType) - logger.Debug("MODE:", mode) + if a.info.msgType == "image" { + if mode != services.ModeVision { + sendVisionModeCheckCard(*a.ctx, a.info.sessionId, a.info.msgId) + return false + } - // 收到一张图片,且不在图片推理模式下, 提醒是否切换到图片推理模式 - if a.info.msgType == "image" && mode != services.ModeVision { - sendVisionModeCheckCard(*a.ctx, a.info.sessionId, a.info.msgId) - return false + return va.handleVisionImage(a) } - // todo - //return false - - if a.info.msgType == "image" && mode == services.ModeVision { - //保存图片 - imageKey := a.info.imageKey - //fmt.Printf("fileKey: %s \n", imageKey) - msgId := a.info.msgId - //fmt.Println("msgId: ", *msgId) - req := larkim.NewGetMessageResourceReqBuilder().MessageId( - *msgId).FileKey(imageKey).Type("image").Build() - resp, err := initialization.GetLarkClient().Im.MessageResource.Get(context.Background(), req) - fmt.Println(resp, err) - if err != nil { - //fmt.Println(err) - replyMsg(*a.ctx, fmt.Sprintf("🤖️:图片下载失败,请稍后再试~\n 错误信息: %v", err), - a.info.msgId) - return false - } + if a.info.msgType == "post" && mode == services.ModeVision { + return va.handleVisionPost(a) + } - f := fmt.Sprintf("%s.png", imageKey) - fmt.Println(f) - resp.WriteFile(f) - defer os.Remove(f) + return true +} - base64, err := openai.GetBase64FromImage(f) - if err != nil { - replyMsg(*a.ctx, fmt.Sprintf("🤖️:图片下载失败,请稍后再试~\n 错误信息: %v", err), - a.info.msgId) - return false - } - // - var msg []openai.VisionMessages - detail := a.handler.sessionCache.GetVisionDetail(*a.info.sessionId) - // 如果没有提示词,默认模拟ChatGPT - - content2 := []openai.ContentType{ - {Type: "text", Text: "图片里面有什么", ImageURL: nil}, - {Type: "image_url", ImageURL: &openai.ImageURL{ - URL: "data:image/jpeg;base64," + base64, - Detail: detail, - }}, - } +func isVisionCommand(a *ActionInfo) bool { + _, foundPic := utils.EitherTrimEqual(a.info.qParsed, "/vision", "图片推理") + return foundPic +} - msg = append(msg, openai.VisionMessages{ - Role: "user", Content: content2, - }) +func initializeVisionMode(a *ActionInfo) { + a.handler.sessionCache.Clear(*a.info.sessionId) + a.handler.sessionCache.SetMode(*a.info.sessionId, services.ModeVision) + a.handler.sessionCache.SetVisionDetail(*a.info.sessionId, services.VisionDetailHigh) +} - // get ai mode as temperature - fmt.Println("msg: ", msg) - completions, err := a.handler.gpt.GetVisionInfo(msg) +func (va *VisionAction) handleVisionImage(a *ActionInfo) bool { + detail := a.handler.sessionCache.GetVisionDetail(*a.info.sessionId) + base64, err := downloadAndEncodeImage(a.info.imageKey, a.info.msgId) + if err != nil { + replyWithErrorMsg(*a.ctx, err, a.info.msgId) + return false + } + + return va.processImageAndReply(a, base64, detail) +} + +func (va *VisionAction) handleVisionPost(a *ActionInfo) bool { + detail := a.handler.sessionCache.GetVisionDetail(*a.info.sessionId) + var base64s []string + + for _, imageKey := range a.info.imageKeys { + if imageKey == "" { + continue + } + base64, err := downloadAndEncodeImage(imageKey, a.info.msgId) if err != nil { - replyMsg(*a.ctx, fmt.Sprintf( - "🤖️:消息机器人摆烂了,请稍后再试~\n错误信息: %v", err), a.info.msgId) + replyWithErrorMsg(*a.ctx, err, a.info.msgId) return false } - sendOldTopicCard(*a.ctx, a.info.sessionId, a.info.msgId, - completions.Content) - return false - //a.handler.sessionCache.SetMsg(*a.info.sessionId, msg) + base64s = append(base64s, base64) + } + if len(base64s) == 0 { + replyMsg(*a.ctx, "🤖️:请发送一张图片", a.info.msgId) + return false } - if a.info.msgType == "post" && mode == services.ModeVision { - fmt.Println(a.info.imageKeys) - fmt.Println(a.info.qParsed) - imagesKeys := a.info.imageKeys - var base64s []string - if len(imagesKeys) == 0 { - replyMsg(*a.ctx, "🤖️:请发送一张图片", a.info.msgId) - return false - } - //保存图片 - for i := 0; i < len(imagesKeys); i++ { - if imagesKeys[i] == "" { - continue - } - imageKey := imagesKeys[i] - msgId := a.info.msgId - //fmt.Println("msgId: ", *msgId) - req := larkim.NewGetMessageResourceReqBuilder().MessageId( - *msgId).FileKey(imageKey).Type("image").Build() - resp, err := initialization.GetLarkClient().Im.MessageResource.Get(context.Background(), req) - if err != nil { - //fmt.Println(err) - replyMsg(*a.ctx, fmt.Sprintf("🤖️:图片下载失败,请稍后再试~\n 错误信息: %v", err), - a.info.msgId) - return false - } - - f := fmt.Sprintf("%s.png", imageKey) - fmt.Println(f) - resp.WriteFile(f) - defer os.Remove(f) - - base64, err := openai.GetBase64FromImage(f) - base64s = append(base64s, base64) - if err != nil { - replyMsg(*a.ctx, fmt.Sprintf("🤖️:图片下载失败,请稍后再试~\n 错误信息: %v", err), - a.info.msgId) - return false - } - } + return va.processMultipleImagesAndReply(a, base64s, detail) +} - var msg []openai.VisionMessages - detail := a.handler.sessionCache.GetVisionDetail(*a.info.sessionId) - // 如果没有提示词,默认模拟ChatGPT +func downloadAndEncodeImage(imageKey string, msgId *string) (string, error) { + f := fmt.Sprintf("%s.png", imageKey) + defer os.Remove(f) - content0 := []openai.ContentType{ - {Type: "text", Text: a.info.qParsed, ImageURL: nil}, - } - // 循环数组 - for i := 0; i < len(base64s); i++ { - content1 := []openai.ContentType{ - {Type: "image_url", ImageURL: &openai.ImageURL{ - URL: "data:image/jpeg;base64," + base64s[i], - Detail: detail, - }}, - } - content0 = append(content0, content1...) - } + req := larkim.NewGetMessageResourceReqBuilder().MessageId(*msgId).FileKey(imageKey).Type("image").Build() + resp, err := initialization.GetLarkClient().Im.MessageResource.Get(context.Background(), req) + if err != nil { + return "", err + } - msg = append(msg, openai.VisionMessages{ - Role: "user", Content: content0, - }) + resp.WriteFile(f) + return openai.GetBase64FromImage(f) +} - // get ai mode as temperature - fmt.Println("msg: ", msg) - completions, err := a.handler.gpt.GetVisionInfo(msg) - if err != nil { - replyMsg(*a.ctx, fmt.Sprintf( - "🤖️:消息机器人摆烂了,请稍后再试~\n错误信息: %v", err), a.info.msgId) - return false - } - sendOldTopicCard(*a.ctx, a.info.sessionId, a.info.msgId, - completions.Content) +func replyWithErrorMsg(ctx context.Context, err error, msgId *string) { + replyMsg(ctx, fmt.Sprintf("🤖️:图片下载失败,请稍后再试~\n 错误信息: %v", err), msgId) +} + +func (va *VisionAction) processImageAndReply(a *ActionInfo, base64 string, detail string) bool { + msg := createVisionMessages("解释这个图片", base64, detail) + completions, err := a.handler.gpt.GetVisionInfo(msg) + if err != nil { + replyWithErrorMsg(*a.ctx, err, a.info.msgId) return false - //a.handler.sessionCache.SetMsg(*a.info.sessionId, msg) + } + sendVisionTopicCard(*a.ctx, a.info.sessionId, a.info.msgId, completions.Content) + return false +} +func (va *VisionAction) processMultipleImagesAndReply(a *ActionInfo, base64s []string, detail string) bool { + msg := createMultipleVisionMessages(a.info.qParsed, base64s, detail) + completions, err := a.handler.gpt.GetVisionInfo(msg) + if err != nil { + replyWithErrorMsg(*a.ctx, err, a.info.msgId) return false + } + sendVisionTopicCard(*a.ctx, a.info.sessionId, a.info.msgId, completions.Content) + return false +} +func createVisionMessages(query, base64Image, detail string) []openai.VisionMessages { + return []openai.VisionMessages{ + { + Role: "user", + Content: []openai.ContentType{ + {Type: "text", Text: query}, + {Type: "image_url", ImageURL: &openai.ImageURL{ + URL: "data:image/jpeg;base64," + base64Image, + Detail: detail, + }}, + }, + }, } +} - return true +func createMultipleVisionMessages(query string, base64Images []string, detail string) []openai.VisionMessages { + content := []openai.ContentType{{Type: "text", Text: query}} + for _, base64Image := range base64Images { + content = append(content, openai.ContentType{ + Type: "image_url", + ImageURL: &openai.ImageURL{ + URL: "data:image/jpeg;base64," + base64Image, + Detail: detail, + }, + }) + } + return []openai.VisionMessages{{Role: "user", Content: content}} } diff --git a/code/handlers/msg.go b/code/handlers/msg.go index 68e8fff6..3b752149 100644 --- a/code/handlers/msg.go +++ b/code/handlers/msg.go @@ -767,6 +767,15 @@ func sendOldTopicCard(ctx context.Context, replyCard(ctx, msgId, newCard) } +func sendVisionTopicCard(ctx context.Context, + sessionId *string, msgId *string, content string) { + newCard, _ := newSendCard( + withHeader("🕵️图片推理结果", larkcard.TemplateBlue), + withMainText(content), + withNote("让LLM和你一起推理图片的内容~")) + replyCard(ctx, msgId, newCard) +} + func sendHelpCard(ctx context.Context, sessionId *string, msgId *string) { newCard, _ := newSendCard( From c1d811e59af926ec0a0084ede540963d1a9b6a88 Mon Sep 17 00:00:00 2001 From: river Date: Mon, 20 Nov 2023 23:20:51 +0800 Subject: [PATCH 12/12] =?UTF-8?q?feat:=20=E8=BF=98=E5=8E=9F=E5=8F=82?= =?UTF-8?q?=E6=95=B0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- code/logger/logger.go | 2 +- code/services/openai/common.go | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/code/logger/logger.go b/code/logger/logger.go index df9aadc8..75442471 100644 --- a/code/logger/logger.go +++ b/code/logger/logger.go @@ -24,7 +24,7 @@ func init() { //WarnLevel //InfoLevel //DebugLevel - logger.Level = logrus.DebugLevel + logger.Level = logrus.InfoLevel } diff --git a/code/services/openai/common.go b/code/services/openai/common.go index 7fd812f8..85f6341e 100644 --- a/code/services/openai/common.go +++ b/code/services/openai/common.go @@ -19,7 +19,7 @@ import ( type PlatForm string const ( - MaxRetries = 1 + MaxRetries = 3 ) const ( AzureApiUrlV1 = "openai.azure.com/openai/deployments/"