åå試ãã¦ã¿ã Microsoft.Extensions.AI (MEAI) 㯠OpenAI ã Azure AI Inference åãã«ã¯ã©ã¤ãã©ãªãç¨æããã¦ããã®ã§ãé常ã«ç°¡åã« Generative AI ã®æ©è½ãå©ç¨åºæ¥ãããã«ãªã£ã¦ãã¾ããæ½è±¡åãããã¤ã³ã¿ã¼ãã§ã¼ã¹ã«ãããOpenAI ã Azure AI Inference ã¨ãã£ãéããå¸åãã¦ããã®ã¯å¤§ããªã¡ãªããã§ãã
MEAI ã¯æ½è±¡åãããã¤ã³ã¿ã¼ãã§ã¼ã¹ãç¬ç«ãã NuGet ããã±ã¼ã¸ã§æä¾ããã¦ããã®ã§ããããå©ç¨ãã¦å®è£ ããã¨ä»ã® API åãã«ãå®è£ ã§ãã¾ãã
以ä¸ã®ããã« ASP.NET Core ãªã©ã¨åæ§ã« Abstractions ããã±ã¼ã¸ãæä¾ããã¦ãã¾ãã
å ¬å¼ããã°ã Abstractions ããã±ã¼ã¸ã® README ã§ã¯åºå®å¤ãè¿ããµã³ãã«å®è£ ãç´¹ä»ããã¦ãã¾ãããä»åã¯èªåãããã¾ã§å¼ã£ã¦ãã ONNX Runtime Generative AI åãã®ã¯ã©ã¤ã¢ã³ããå®è£ ãã¦ã¿ã¾ããã
ONNX Runtime Generative AI ã使ã£ã SLM ã®å©ç¨ã«ã¤ãã¦ã¯ä»¥åæ¸ããã®ã§ãããããã¼ã¹ã« MEAI åãã«ã¯ã©ã¤ã¢ã³ããå®è£ ããã¨ããæµãã§ãã
MEAI 㯠IChatClient
ã¨ããã¤ã³ã¿ã¼ãã§ã¼ã¹ãå®è£
ããã°è¯ãã®ã§ãããã«åãããã« ONNX Runtime Generative AI ãå¼ã³åºãã¨ããã·ã³ãã«ãªå®è£
ã§ããONNX Runtime Generative AI ã使ã£ãå ´åã§ãã¹ããªã¼ãã³ã°ã¯å¯¾å¿ã§ããã®ã§å®è£
ãã¦ã¿ã¾ããã
å®è£
ãã OnnxRuntimeChatClient
ã®å
¨ä½ã¯ä»¥ä¸ã®ããã«ãªãã¾ããã¾ãã¾ãã®éå®è£
ã«ãªã£ã¦ãã¾ãããå¿
è¦ãªæ©è½ã¯ä¸éãå
¥ãã¦ããã¤ããã§ãã
using System.Text; using Microsoft.Extensions.AI; using Microsoft.ML.OnnxRuntimeGenAI; public class OnnxRuntimeChatClient : IChatClient { public OnnxRuntimeChatClient(string modelPath) { _model = new Model(modelPath); _tokenizer = new Tokenizer(_model); } private readonly Model _model; private readonly Tokenizer _tokenizer; public void Dispose() { _tokenizer.Dispose(); _model.Dispose(); } public async Task<ChatCompletion> CompleteAsync(IList<ChatMessage> chatMessages, ChatOptions? options = null, CancellationToken cancellationToken = new CancellationToken()) { var sequences = _tokenizer.Encode(BuildPrompt(chatMessages)); var generatorParams = new GeneratorParams(_model); if (options?.MaxOutputTokens is not null) { generatorParams.SetSearchOption("max_length", options.MaxOutputTokens.Value); } if (options?.Temperature is not null) { generatorParams.SetSearchOption("temperature", options.Temperature.Value); } if (options?.TopP is not null) { generatorParams.SetSearchOption("top_p", options.TopP.Value); } generatorParams.SetInputSequences(sequences); generatorParams.TryGraphCaptureWithMaxBatchSize(1); var outputSequences = _model.Generate(generatorParams); var outputText = _tokenizer.Decode(outputSequences[0]); return new(new ChatMessage { Role = ChatRole.Assistant, Text = outputText }); } public async IAsyncEnumerable<StreamingChatCompletionUpdate> CompleteStreamingAsync(IList<ChatMessage> chatMessages, ChatOptions? options = null, CancellationToken cancellationToken = new CancellationToken()) { var sequences = _tokenizer.Encode(BuildPrompt(chatMessages)); var generatorParams = new GeneratorParams(_model); if (options?.MaxOutputTokens is not null) { generatorParams.SetSearchOption("max_length", options.MaxOutputTokens.Value); } if (options?.Temperature is not null) { generatorParams.SetSearchOption("temperature", options.Temperature.Value); } if (options?.TopP is not null) { generatorParams.SetSearchOption("top_p", options.TopP.Value); } generatorParams.SetInputSequences(sequences); generatorParams.TryGraphCaptureWithMaxBatchSize(1); using var tokenizerStream = _tokenizer.CreateStream(); using var generator = new Generator(_model, generatorParams); while (!generator.IsDone()) { generator.ComputeLogits(); generator.GenerateNextToken(); var outputText = tokenizerStream.Decode(generator.GetSequence(0)[^1]); yield return new StreamingChatCompletionUpdate { Role = ChatRole.Assistant, Text = outputText }; } } public TService? GetService<TService>(object? key = null) where TService : class => this as TService; public ChatClientMetadata Metadata { get; } private string BuildPrompt(IList<ChatMessage> chatMessages) { var prompt = new StringBuilder(); foreach (var chatMessage in chatMessages) { if (chatMessage.Role == ChatRole.System) { prompt.Append($"<|system|>{chatMessage.Text}<|end|>"); } else if (chatMessage.Role == ChatRole.User) { prompt.Append($"<|user|>{chatMessage.Text}<|end|>"); } else if (chatMessage.Role == ChatRole.Assistant) { prompt.Append($"<|assistant|>{chatMessage.Text}<|end|>"); } } prompt.Append("<|assistant|>"); return prompt.ToString(); } }
å®è£
ã¨ãã¦ã¯é£ãããªãã®ã§èª¬æã¯ãã¾ããããåºæ¬ã¯ CompleteAsync
㨠CompleteStreamingAsync
ãå®è£
ããã ãã§åãã¾ããONNX Runtime ã¯éåæã¡ã½ãããç¨æããã¦ããªãã®ã§ããã®å®è£
ã§ã使ã£ã¦ãã¾ããã IAsyncEnumerable<T>
ã®å®è£
æ¹æ³ãåãããªãã£ãã®ã§ async
ãä»ãã¦å¯¾å¿ãã¦ãã¾ãã
ä»åã¯åä½ç¢ºèªã®ããã«ãªãªã¼ã¹ãããã°ããã® Phi-3.5 mini ã使ã£ã¦ã¿ã¾ããONNX ãã¼ã¸ã§ã³ãåºãã®ã§ãã¦ã³ãã¼ãããã ãã§ç°¡åã«è©¦ããããã«ãªãã¾ããã
ãã®ã¯ã©ã¹ãå©ç¨ãã¦æ¨è«ãè¡ããµã³ãã«ã³ã¼ãã¯ä»¥ä¸ã®ããã«ã·ã³ãã«ãªãã®ã«ãªãã¾ããéåæã¤ãã¬ã¼ã¿ã¼ã«å¯¾å¿ãã¦ããã®ã§ã使ãåæãè¯ãã§ããã
using Microsoft.Extensions.AI; var client = new OnnxRuntimeChatClient(@".\Phi-3.5-mini-instruct-onnx\cpu_and_mobile\cpu-int4-awq-block-128-acc-level-4"); await foreach (var update in client.CompleteStreamingAsync("Microsoft ã«ã¤ãã¦ç°¡åã«èª¬æãã¦ãã ãã")) { Console.Write(update); }
ãã®ã³ã¼ããå®è¡ãã¦ã¿ãã¨ã¹ããªã¼ãã³ã°ã§ Phi-3.5 mini ã®æ¨è«çµæã表示ããã¦ããã¾ããçµæã®å 容ã¯ä¾ã«ãã£ã¦ SLM ã£ã½ãé©å½ãªæãã§ãããOpenAI ã®æã¨åãã³ã¼ã㧠SLM ãå©ç¨ã§ãã¦ãã¾ãã
æ£ç´ãªã¨ããå ¬å¼ã§ ONNX Runtime Generative AI ã«å¯¾å¿ããã©ã¤ãã©ãªããªãªã¼ã¹ãããã®ããã¹ãã§ãããæ½è±¡åãããã¤ã³ã¿ã¼ãã§ã¼ã¹ãæä¾ããã¦ããã®ã§ç¬èªã®ã©ã¤ãã©ãªãç°¡åã«å®è£ ã§ããã®ã¯ããªã大ããªã¡ãªããã ã¨æãã¾ãã