cf-openai-azure-proxy 的脚本创建和测试

124 min read
// The name of your Azure OpenAI Resource.
const resourceName=RESOURCE_NAME

// The deployment name you chose when you deployed the model.
const mapper = {
    'gpt-3.5-turbo': DEPLOY_NAME_GPT35,
    'gpt-4': DEPLOY_NAME_GPT4
};

const apiVersion="2023-05-15"

addEventListener("fetch", (event) => {
  event.respondWith(handleRequest(event.request));
});

async function handleRequest(request) {
  if (request.method === 'OPTIONS') {
    return handleOPTIONS(request)
  }

  const url = new URL(request.url);
  if (url.pathname.startsWith("//")) {
    url.pathname = url.pathname.replace('/',"")
  }
  if (url.pathname === '/v1/chat/completions') {
    var path="chat/completions"
  } else if (url.pathname === '/v1/completions') {
    var path="completions"
  } else if (url.pathname === '/v1/models') {
    return handleModels(request)
  } else {
    return new Response('404 Not Found', { status: 404 })
  }

  let body;
  if (request.method === 'POST') {
    body = await request.json();
  }

  const modelName = body?.model;  
  const deployName = mapper[modelName] || '' 

  if (deployName === '') {
    return new Response('Missing model mapper', {
        status: 403
    });
  }
  const fetchAPI = `https://${resourceName}.openai.azure.com/openai/deployments/${deployName}/${path}?api-version=${apiVersion}`

  const authKey = request.headers.get('Authorization');
  if (!authKey) {
    return new Response("Not allowed", {
      status: 403
    });
  }

  const payload = {
    method: request.method,
    headers: {
      "Content-Type": "application/json",
      "api-key": authKey.replace('Bearer ', ''),
    },
    body: typeof body === 'object' ? JSON.stringify(body) : '{}',
  };

  let response = await fetch(fetchAPI, payload);
  response = new Response(response.body, response);
  response.headers.set("Access-Control-Allow-Origin", "*");

  if (body?.stream != true){
    return response
  } 

  let { readable, writable } = new TransformStream()
  stream(response.body, writable);
  return new Response(readable, response);

}

function sleep(ms) {
  return new Promise(resolve => setTimeout(resolve, ms));
}

// support printer mode and add newline
async function stream(readable, writable) {
  const reader = readable.getReader();
  const writer = writable.getWriter();

  // const decoder = new TextDecoder();
  const encoder = new TextEncoder();
  const decoder = new TextDecoder();
// let decodedValue = decoder.decode(value);
  const newline = "\n";
  const delimiter = "\n\n"
  const encodedNewline = encoder.encode(newline);

  let buffer = "";
  while (true) {
    let { value, done } = await reader.read();
    if (done) {
      break;
    }
    buffer += decoder.decode(value, { stream: true }); // stream: true is important here,fix the bug of incomplete line
    let lines = buffer.split(delimiter);

    // Loop through all but the last line, which may be incomplete.
    for (let i = 0; i < lines.length - 1; i++) {
      await writer.write(encoder.encode(lines[i] + delimiter));
      await sleep(20);
    }

    buffer = lines[lines.length - 1];
  }

  if (buffer) {
    await writer.write(encoder.encode(buffer));
  }
  await writer.write(encodedNewline)
  await writer.close();
}

async function handleModels(request) {
  const data = {
    "object": "list",
    "data": []  
  };

  for (let key in mapper) {
    data.data.push({
      "id": key,
      "object": "model",
      "created": 1677610602,
      "owned_by": "openai",
      "permission": [{
        "id": "modelperm-M56FXnG1AsIr3SXq8BYPvXJA",
        "object": "model_permission",
        "created": 1679602088,
        "allow_create_engine": false,
        "allow_sampling": true,
        "allow_logprobs": true,
        "allow_search_indices": false,
        "allow_view": true,
        "allow_fine_tuning": false,
        "organization": "*",
        "group": null,
        "is_blocking": false
      }],
      "root": key,
      "parent": null
    });  
  }

  const json = JSON.stringify(data, null, 2);
  return new Response(json, {
    headers: { 'Content-Type': 'application/json' },
  });
}

async function handleOPTIONS(request) {
    return new Response(null, {
      headers: {
        'Access-Control-Allow-Origin': '*',
        'Access-Control-Allow-Methods': '*',
        'Access-Control-Allow-Headers': '*'
      }
    })
}

在Workers 和 Pages 的页面创建 Worker,填入上述的代码, 在 Custom Domains 的中填写你需要访问的域名,测试脚本如下:

这是一个基于你提供的Cloudflare Worker的curl命令行示例。我假设你的Cloudflare Worker部署在https://your-worker.example.com,你需要将其替换为实际的URL。

  1. GET 请求获取模型信息:
curl -X GET https://your-worker.example.com/v1/models
  1. POST 请求生成文本:

这个请求需要一个请求体,其中包含模型名称和提示。下面的示例使用模型"gpt-3.5-turbo",提示是"Translate the following English text to French: '{}'"。请记住替换AUTH_TOKEN为你的实际token。

curl -X POST https://your-worker.example.com/v1/completions \
-H "Content-Type: application/json" \
-H "Authorization: Bearer AUTH_TOKEN" \
-d '{
  "model": "gpt-3.5-turbo",
  "prompt": "Translate the following English text to French: {}",
  "max_tokens": 60
}'
  1. POST 请求进行多轮对话:

这个请求需要一个请求体,其中包含模型名称和消息数组。下面的示例使用模型"gpt-3.5-turbo",并设置了一个系统消息和一个用户消息。请记住替换AUTH_TOKEN为你的实际token。

curl -X POST https://your-worker.example.com/v1/chat/completions \
-H "Content-Type: application/json" \
-H "Authorization: Bearer AUTH_TOKEN" \
-d '{
  "model": "gpt-3.5-turbo",
  "messages": [
    {"role": "system", "content": "You are a helpful assistant."},
    {"role": "user", "content": "Who won the world series in 2020?"}
  ]
}'

请记住,curl命令的行尾的\用于将命令分隔到多行,以便于阅读。如果你在一个行中输入整个命令,不需要\

这些只是示例。在实际使用时,你需要根据你的实际需求和模型的特性来设置请求体的内容。