Support OpenAI embedding. #1542

duxin40 · 2024-11-25T14:28:55Z

Ⅰ. Describe what this PR did

This PR is to support openai embedding.

Ⅱ. Does this pull request fix one issue?

Ⅲ. Why don't you add test cases (unit test/integration test)?

Ⅳ. Describe how to verify it

build ai-proxy:

cd ./higress/plugins/wasm-go/extensions/ai-proxy

tinygo build -o ai.wasm -scheduler=none -target=wasi -gc=custom -tags="custommalloc nottinygc_finalizer proxy_wasm_version_0_2_100" ./

build ai-cache

cd ./higress/plugins/wasm-go/extensions/ai-cache

tinygo build -o main.wasm -scheduler=none -target=wasi -gc=custom -tags="custommalloc nottinygc_finalizer proxy_wasm_version_0_2_100" ./

start higress

docker-compose.yaml:

services:
  envoy:
    image: higress-registry.cn-hangzhou.cr.aliyuncs.com/higress/gateway:v2.0.2
    entrypoint: /usr/local/bin/envoy
    command: -c /etc/envoy/envoy.yaml --component-log-level wasm:debug
    networks:
      - wasmtest
    ports:
      - "10002:10002"
    volumes:
      - ./envoy.yaml:/etc/envoy/envoy.yaml
      - ./main.wasm:/etc/envoy/main.wasm
      - ../ai-proxy/ai.wasm:/etc/envoy/ai.wasm

networks:
  wasmtest: {}

envoy.yaml:

admin:
  address:
    socket_address:
      protocol: TCP
      address: 0.0.0.0
      port_value: 9901
static_resources:
  listeners:
    - name: listener_0
      address:
        socket_address:
          protocol: TCP
          address: 0.0.0.0
          port_value: 10002
      filter_chains:
        - filters:
            - name: envoy.filters.network.http_connection_manager
              typed_config:
                "@type": type.googleapis.com/envoy.extensions.filters.network.http_connection_manager.v3.HttpConnectionManager
                scheme_header_transformation:
                  scheme_to_overwrite: https
                stat_prefix: ingress_http
                # Output envoy logs to stdout
                access_log:
                  - name: envoy.access_loggers.stdout
                    typed_config:
                      "@type": type.googleapis.com/envoy.extensions.access_loggers.stream.v3.StdoutAccessLog
                # Modify as required
                route_config:
                  name: local_route
                  virtual_hosts:
                    - name: local_service
                      domains: [ "*" ]
                      routes:
                        - match:
                            prefix: "/"
                          route:
                            cluster: openai
                            timeout: 300s
                http_filters:
                  - name: wasmtest
                    typed_config:
                      "@type": type.googleapis.com/udpa.type.v1.TypedStruct
                      type_url: type.googleapis.com/envoy.extensions.filters.http.wasm.v3.Wasm
                      value:
                        config:
                          name: wasmtest
                          vm_config:
                            runtime: envoy.wasm.runtime.v8
                            code:
                              local:
                                filename: /etc/envoy/ai.wasm
                          configuration:
                            "@type": "type.googleapis.com/google.protobuf.StringValue"
                            value: |
                              {
                                "provider": {
                                  "type": "openai",
                                  "apiTokens": [
                                    "sk-"
                                  ]
                                }
                              }

                  - name: cache
                    typed_config:
                      "@type": type.googleapis.com/udpa.type.v1.TypedStruct
                      type_url: type.googleapis.com/envoy.extensions.filters.http.wasm.v3.Wasm
                      value:
                        config:
                          name: cache
                          vm_config:
                            runtime: envoy.wasm.runtime.v8
                            code:
                              local:
                                filename: /etc/envoy/main.wasm
                          configuration:
                            "@type": "type.googleapis.com/google.protobuf.StringValue"
                            value: |
                              {
                                "embedding": {
                                  "type": "openai",
                                  "serviceName": "openai.dns",
                                  "apiKey": "sk-"
                                },
                                "vector": {
                                  "type": "dashvector",
                                  "serviceName": "dashvector.dns",
                                  "collectionID": "test1",
                                  "serviceHost": "your host",
                                  "apiKey": "your key",
                                  "threshold": 0.4
                                },
                                "cache": {
                                  "serviceName": "",
                                  "type": ""
                                }
                              }
                  - name: envoy.filters.http.router
                    typed_config:
                      "@type": type.googleapis.com/envoy.extensions.filters.http.router.v3.Router
  clusters:
    - name: openai
      connect_timeout: 30s
      type: LOGICAL_DNS
      dns_lookup_family: V4_ONLY
      lb_policy: ROUND_ROBIN
      load_assignment:
        cluster_name: openai
        endpoints:
          - lb_endpoints:
              - endpoint:
                  address:
                    socket_address:
                      address: api.openai.com
                      port_value: 443
      transport_socket:
        name: envoy.transport_sockets.tls
        typed_config:
          "@type": type.googleapis.com/envoy.extensions.transport_sockets.tls.v3.UpstreamTlsContext
          "sni": "api.openai.com"

    - name: outbound|443||openai.dns
      connect_timeout: 30s
      type: LOGICAL_DNS
      dns_lookup_family: V4_ONLY
      lb_policy: ROUND_ROBIN
      load_assignment:
        cluster_name: outbound|443||openai.dns
        endpoints:
          - lb_endpoints:
              - endpoint:
                  address:
                    socket_address:
                      address: api.openai.com
                      port_value: 443
      transport_socket:
        name: envoy.transport_sockets.tls
        typed_config:
          "@type": type.googleapis.com/envoy.extensions.transport_sockets.tls.v3.UpstreamTlsContext
          "sni": "api.openai.com"

    - name: outbound|443||dashvector.dns
      connect_timeout: 30s
      type: LOGICAL_DNS
      dns_lookup_family: V4_ONLY
      lb_policy: ROUND_ROBIN
      load_assignment:
        cluster_name: outbound|443||dashvector.dns
        endpoints:
          - lb_endpoints:
              - endpoint:
                  address:
                    socket_address:
                      address: vrs-*aliyuncs.com
                      port_value: 443
      transport_socket:
        name: envoy.transport_sockets.tls
        typed_config:
          "@type": type.googleapis.com/envoy.extensions.transport_sockets.tls.v3.UpstreamTlsContext
          "sni": "vrs-*aliyuncs.com"

curl test
request:

curl "http://localhost:10002/ai/v1/chat/completions"  -H "Content-Type: application/json"  -H "Authorization: Bearer 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" -d '{
  "model": "gpt-3.5-turbo",
  "messages": [
    {
      "role": "user",
      "content": "你好，你是谁？"
    }
  ]
}'

embedding response log:

Ⅴ. Special notes for reviews

CLAassistant · 2024-11-25T14:29:02Z

All committers have signed the CLA.

duxin40 · 2024-11-26T01:45:26Z

@CH3CHO 辛苦有时间帮忙review下～

CH3CHO · 2024-11-26T01:46:16Z

@CH3CHO 辛苦有时间帮忙review下～

好的。预计今天能给出 review 意见。另外麻烦先签署一下 CLA。谢谢！

CH3CHO

LGTM. Thanks.

codecov-commenter · 2024-12-06T03:49:20Z

Codecov Report

All modified and coverable lines are covered by tests ✅

Project coverage is 43.50%. Comparing base (ef31e09) to head (0328232).
Report is 225 commits behind head on main.

Additional details and impacted files

@@            Coverage Diff             @@
##             main    #1542      +/-   ##
==========================================
+ Coverage   35.91%   43.50%   +7.59%     
==========================================
  Files          69       76       +7     
  Lines       11576    12325     +749     
==========================================
+ Hits         4157     5362    +1205     
+ Misses       7104     6627     -477     
- Partials      315      336      +21

see 69 files with indirect coverage changes

johnlanni · 2024-12-06T05:35:56Z

@duxin40 There are non-GitHub accounts in the historical commit messages, which cannot pass the CLA check

Signed-off-by: duxin40 <[email protected]>

duxin40 · 2024-12-09T11:55:30Z

@CH3CHO @johnlanni thanks for the reminder, i have passed the CLA, plz help review again~

duxin40 requested review from CH3CHO, WeixinX and johnlanni as code owners November 25, 2024 14:28

duxin40 changed the title ~~Support openai embedding.~~ Support OpenAI embedding. Nov 25, 2024

CH3CHO approved these changes Nov 26, 2024

View reviewed changes

duxin40 and others added 3 commits December 9, 2024 19:47

support embedding service of OpenAI in ai-cache

1691e80

Signed-off-by: duxin40 <[email protected]>

fix bug for openai embedding

5cc7433

Signed-off-by: duxin40 <[email protected]>

delete unused file

2fc1a4f

Signed-off-by: duxin40 <[email protected]>

duxin40 force-pushed the main branch from 289baa0 to 2fc1a4f Compare December 9, 2024 11:49

Merge branch 'main' into main

0328232

CH3CHO merged commit be57960 into alibaba:main Dec 11, 2024
13 checks passed

Beatrueman mentioned this pull request Feb 20, 2025

关于AI缓存插件对接ollama开发时遇到的问题 #1793

Closed

VinceCui pushed a commit to VinceCui/higress that referenced this pull request May 21, 2025

Support OpenAI embedding. (alibaba#1542)

6c4bec9

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Support OpenAI embedding. #1542

Support OpenAI embedding. #1542

Uh oh!

duxin40 commented Nov 25, 2024 •

edited

Loading

Uh oh!

CLAassistant commented Nov 25, 2024 •

edited

Loading

Uh oh!

duxin40 commented Nov 26, 2024

Uh oh!

CH3CHO commented Nov 26, 2024

Uh oh!

CH3CHO left a comment

Uh oh!

codecov-commenter commented Dec 6, 2024 •

edited

Loading

Uh oh!

johnlanni commented Dec 6, 2024

Uh oh!

duxin40 commented Dec 9, 2024

Uh oh!

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

5 participants

Support OpenAI embedding. #1542

Support OpenAI embedding. #1542

Uh oh!

Conversation

duxin40 commented Nov 25, 2024 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Ⅰ. Describe what this PR did

Ⅱ. Does this pull request fix one issue?

Ⅲ. Why don't you add test cases (unit test/integration test)?

Ⅳ. Describe how to verify it

Ⅴ. Special notes for reviews

Uh oh!

CLAassistant commented Nov 25, 2024 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

duxin40 commented Nov 26, 2024

Uh oh!

CH3CHO commented Nov 26, 2024

Uh oh!

CH3CHO left a comment

Choose a reason for hiding this comment

Uh oh!

codecov-commenter commented Dec 6, 2024 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Codecov Report

Uh oh!

johnlanni commented Dec 6, 2024

Uh oh!

duxin40 commented Dec 9, 2024

Uh oh!

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

5 participants

duxin40 commented Nov 25, 2024 •

edited

Loading

CLAassistant commented Nov 25, 2024 •

edited

Loading

codecov-commenter commented Dec 6, 2024 •

edited

Loading