Skip to content
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
Show all changes
45 commits
Select commit Hold shift + click to select a range
ffe9a92
Prototype of using ImageGenerationTool
ericstj Aug 5, 2025
e5edc77
Handle DataContent returned from ImageGen
ericstj Aug 8, 2025
2d19cce
React to rename and improve metadata
ericstj Aug 9, 2025
5eef474
Handle image_generation tool content from streaming
ericstj Aug 20, 2025
ff80804
Add handling for combining updates with images
ericstj Aug 25, 2025
1725ce1
Add tests for new ChatResponseUpdateExtensions
ericstj Aug 26, 2025
c44f5fb
Merge branch 'main' of https://github.com/dotnet/extensions into Imag…
ericstj Sep 19, 2025
b4fe94b
Rename ImageGenerationTool to HostedImageGenerationTool
ericstj Sep 20, 2025
06bfa30
Remove ChatResponseUpdateCoalescingOptions
ericstj Sep 20, 2025
ca8b15d
Add ImageGeneratingChatClient
ericstj Sep 23, 2025
62e0ac5
Fix namespace of tool
ericstj Sep 26, 2025
81e6e5a
Replace traces of function calling
ericstj Sep 26, 2025
6559a66
More namepsace fix
ericstj Sep 26, 2025
398bbdb
Enable editing
ericstj Sep 30, 2025
ac2de35
Merge branch 'main' of https://github.com/dotnet/extensions into Imag…
ericstj Sep 30, 2025
1d96532
Update to preview OpenAI with image tool support
ericstj Oct 1, 2025
6a6ffa2
Temporary OpenAI feed
ericstj Oct 3, 2025
94ceab2
Fix tests
ericstj Oct 3, 2025
96e9747
Add integration tests for ImageGeneratingChatClient
ericstj Oct 3, 2025
9ddc91a
Remove ChatRole.Tool -> Assistant workaround
ericstj Oct 4, 2025
3b589ac
Remove use of private reflection for Image results
ericstj Oct 6, 2025
20919ab
Add ChatResponseUpdate.Clone
ericstj Oct 6, 2025
e5f68a6
Move all mutable state into RequestState object
ericstj Oct 7, 2025
9f9a430
Adjust prompt to improve integration test reliability
ericstj Oct 7, 2025
799a72e
Refactor tool initialization
ericstj Oct 7, 2025
6029b01
Add integration tests for streaming
ericstj Oct 7, 2025
173352a
Merge remote-tracking branch 'upstream/main' into ImageGenerationTool
ericstj Oct 27, 2025
69d2d98
React to changes and fix tests
ericstj Oct 27, 2025
86363f8
Address feedback
ericstj Oct 28, 2025
94cffbd
Fix SkipTestException from ConditionalTheory
ericstj Oct 30, 2025
67089ab
Merge branch 'main' of https://github.com/dotnet/extensions into Imag…
ericstj Oct 30, 2025
56cf3b4
Fix formatting
ericstj Oct 30, 2025
ad2b953
Add back image replacement coalescing (removed in merge)
ericstj Oct 30, 2025
e20b768
Fix template tests and use new OpenAI
ericstj Oct 30, 2025
b392df3
Merge branch 'main' of https://github.com/dotnet/extensions into Imag…
ericstj Nov 3, 2025
a38fd6c
Remove use of temporary staging nuget feed
ericstj Nov 3, 2025
cfa3f16
Address feedback
ericstj Nov 4, 2025
dcdebef
Make ImageGeneratingChatClient use ImageGenerationTool*Content
ericstj Nov 4, 2025
a60ddd2
Remove ApplyUpdates and Coalesce ImageResults instead of DataContent.
ericstj Nov 5, 2025
50db985
Workaround OpenAI issue where image data is not read for partial images.
ericstj Nov 5, 2025
10467a5
Improved workaround
ericstj Nov 5, 2025
8cff0ae
Return ImageGenerationToolCallContent from OpenAI
ericstj Nov 5, 2025
7780595
Add OpenAI image tool tests with representation of real traffic
ericstj Nov 6, 2025
9b94d68
Correct the event sequence for streaming single image
ericstj Nov 6, 2025
87dfaa4
Fix some docs and refactor for clarity
ericstj Nov 7, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ namespace Microsoft.Extensions.AI;
/// </para>
/// <para>
/// The relationship between <see cref="ChatResponse"/> and <see cref="ChatResponseUpdate"/> is
/// codified in the <see cref="ChatResponseExtensions.ToChatResponseAsync"/> and
/// codified in the <see cref="ChatResponseExtensions.ToChatResponseAsync(IAsyncEnumerable{ChatResponseUpdate}, System.Threading.CancellationToken)"/> and
/// <see cref="ChatResponse.ToChatResponseUpdates"/>, which enable bidirectional conversions
/// between the two. Note, however, that the provided conversions may be lossy, for example if multiple
/// updates all have different <see cref="RawRepresentation"/> objects whereas there's only one slot for
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.

using System.Diagnostics.CodeAnalysis;

namespace Microsoft.Extensions.AI;

/// <summary>
/// Provides options for configuring how <see cref="ChatResponseUpdate"/> instances are coalesced
/// when converting them to <see cref="ChatMessage"/> instances.
/// </summary>
[Experimental("EXTAI0001")]
public class ChatResponseUpdateCoalescingOptions
{
/// <summary>
/// Gets or sets a value indicating whether to replace existing <see cref="DataContent"/> items
/// when a new <see cref="DataContent"/> item with the same <see cref="DataContent.Name"/> is encountered.
/// </summary>
/// <value>
/// <see langword="true"/> to replace existing <see cref="DataContent"/> items with the same name;
/// <see langword="false"/> to keep all <see cref="DataContent"/> items. The default is <see langword="false"/>.
/// </value>
/// <remarks>
/// When this property is <see langword="true"/>, if a <see cref="DataContent"/> item is being added
/// and there's already a <see cref="DataContent"/> item in the content list with the same
/// <see cref="DataContent.Name"/>, the existing item will be replaced with the new one.
/// This is useful for scenarios where updated data should override previous data with the same identifier.
/// </remarks>
public bool ReplaceDataContentWithSameName { get; set; }
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.

using System.Collections.Generic;
using System.Diagnostics.CodeAnalysis;

namespace Microsoft.Extensions.AI;

/// <summary>Represents a hosted tool that can be specified to an AI service to enable it to perform image generation.</summary>
/// <remarks>
/// This tool does not itself implement image generation. It is a marker that can be used to inform a service
/// that the service is allowed to perform image generation if the service is capable of doing so.
/// </remarks>
[Experimental("MEAI001")]
public class ImageGenerationTool : AITool
{
/// <summary>
/// Initializes a new instance of the <see cref="ImageGenerationTool"/> class with the specified options.
/// </summary>
/// <param name="options">The options to configure the image generation request. If <paramref name="options"/> is <see langword="null"/>, default options will be used.</param>
public ImageGenerationTool(ImageGenerationOptions? options = null)
: base()
{
AdditionalProperties = new AdditionalPropertiesDictionary(new Dictionary<string, object?>
{
[nameof(ImageGenerationOptions)] = options
});
}

/// <inheritdoc />
public override IReadOnlyDictionary<string, object?> AdditionalProperties { get; }
}
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,9 @@ namespace Microsoft.Extensions.AI;
WriteIndented = true)]
[JsonSerializable(typeof(OpenAIClientExtensions.ToolJson))]
[JsonSerializable(typeof(IDictionary<string, object?>))]
[JsonSerializable(typeof(IDictionary<string, string?>))]
[JsonSerializable(typeof(string[]))]
[JsonSerializable(typeof(IEnumerable<string>))]
[JsonSerializable(typeof(JsonElement))]
[JsonSerializable(typeof(int))]
internal sealed partial class OpenAIJsonContext : JsonSerializerContext;
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,17 @@
using System;
using System.ClientModel.Primitives;
using System.Collections.Generic;
using System.Diagnostics.CodeAnalysis;
using System.Linq;
using System.Reflection;
using System.Runtime.CompilerServices;
using System.Text;
using System.Text.Json;
using System.Text.Json.Serialization.Metadata;
using System.Threading;
using System.Threading.Tasks;
using Microsoft.Shared.Diagnostics;
using OpenAI.Images;
using OpenAI.Responses;

#pragma warning disable S907 // "goto" statement should not be used
Expand Down Expand Up @@ -163,7 +166,15 @@ internal static IEnumerable<ChatMessage> ToChatMessages(IEnumerable<ResponseItem
break;

default:
message.Contents.Add(new() { RawRepresentation = outputItem });
if (outputItem.GetType().Name == "InternalImageGenToolCallItemResource")
{
message.Contents.Add(GetContentFromImageGen(outputItem));
}
else
{
message.Contents.Add(new() { RawRepresentation = outputItem });
}

break;
}
}
Expand All @@ -174,6 +185,118 @@ internal static IEnumerable<ChatMessage> ToChatMessages(IEnumerable<ResponseItem
}
}

[DynamicDependency(DynamicallyAccessedMemberTypes.All, typeof(ResponseItem))]
private static DataContent GetContentFromImageGen(ResponseItem outputItem)
{
const BindingFlags InternalBindingFlags = BindingFlags.Public | BindingFlags.NonPublic | BindingFlags.Instance;
var imageGenResultType = Type.GetType("OpenAI.Responses.InternalImageGenToolCallItemResource, OpenAI");
if (imageGenResultType == null)
{
throw new InvalidOperationException("Unable to determine the type of the image generation result.");
}

var imageGenStatus = imageGenResultType.GetProperty("Status", InternalBindingFlags)?.GetValue(outputItem)?.ToString();
var imageGenResult = imageGenResultType.GetProperty("Result", InternalBindingFlags)?.GetValue(outputItem) as string;

IDictionary<string, BinaryData>? additionalRawData = imageGenResultType
.GetProperty("SerializedAdditionalRawData", InternalBindingFlags)
?.GetValue(outputItem) as IDictionary<string, BinaryData>;

// Properties
// background
// output_format
// quality
// revised_prompt
// size

string outputFormat = getStringProperty("output_format") ?? "png";

var resultBytes = Convert.FromBase64String(imageGenResult ?? string.Empty);

return new DataContent(resultBytes, $"image/{outputFormat}")
{
RawRepresentation = outputItem,
AdditionalProperties = new()
{
["background"] = getStringProperty("background"),
["output_format"] = outputFormat,
["quality"] = getStringProperty("quality"),
["revised_prompt"] = getStringProperty("revised_prompt"),
["size"] = getStringProperty("size"),
["status"] = imageGenStatus,
}
};

string? getStringProperty(string name)
{
if (additionalRawData?.TryGetValue(name, out var outputFormat) == true)
{
var stringJsonTypeInfo = (JsonTypeInfo<string>)AIJsonUtilities.DefaultOptions.GetTypeInfo(typeof(string));
return JsonSerializer.Deserialize(outputFormat, stringJsonTypeInfo);
}

return null;
}
}

[DynamicDependency(DynamicallyAccessedMemberTypes.All, typeof(ResponseItem))]
private static DataContent GetContentFromImageGenPartialImageEvent(StreamingResponseUpdate update)
{
const BindingFlags InternalBindingFlags = BindingFlags.Public | BindingFlags.NonPublic | BindingFlags.Instance;
var partialImageEventType = Type.GetType("OpenAI.Responses.InternalResponseImageGenCallPartialImageEvent, OpenAI");
if (partialImageEventType == null)
{
throw new InvalidOperationException("Unable to determine the type of the image generation result.");
}

var imageGenResult = partialImageEventType.GetProperty("PartialImageB64", InternalBindingFlags)?.GetValue(update) as string;
var imageGenItemId = partialImageEventType.GetProperty("ItemId", InternalBindingFlags)?.GetValue(update) as string;
var imageGenOutputIndex = partialImageEventType.GetProperty("OutputIndex", InternalBindingFlags)?.GetValue(update) as int?;
var imageGenPartialImageIndex = partialImageEventType.GetProperty("PartialImageIndex", InternalBindingFlags)?.GetValue(update) as int?;

IDictionary<string, BinaryData>? additionalRawData = partialImageEventType
.GetProperty("SerializedAdditionalRawData", InternalBindingFlags)
?.GetValue(update) as IDictionary<string, BinaryData>;

// Properties
// background
// output_format
// quality
// revised_prompt
// size

string outputFormat = getStringProperty("output_format") ?? "png";

var resultBytes = Convert.FromBase64String(imageGenResult ?? string.Empty);

return new DataContent(resultBytes, $"image/{outputFormat}")
{
RawRepresentation = update,
AdditionalProperties = new()
{
["ItemId"] = imageGenItemId,
["OutputIndex"] = imageGenOutputIndex,
["PartialImageIndex"] = imageGenPartialImageIndex,
["background"] = getStringProperty("background"),
["output_format"] = outputFormat,
["quality"] = getStringProperty("quality"),
["revised_prompt"] = getStringProperty("revised_prompt"),
["size"] = getStringProperty("size"),
}
};

string? getStringProperty(string name)
{
if (additionalRawData?.TryGetValue(name, out var outputFormat) == true)
{
var stringJsonTypeInfo = (JsonTypeInfo<string>)AIJsonUtilities.DefaultOptions.GetTypeInfo(typeof(string));
return JsonSerializer.Deserialize(outputFormat, stringJsonTypeInfo);
}

return null;
}
}

/// <inheritdoc />
public IAsyncEnumerable<ChatResponseUpdate> GetStreamingResponseAsync(
IEnumerable<ChatMessage> messages, ChatOptions? options = null, CancellationToken cancellationToken = default)
Expand Down Expand Up @@ -324,7 +447,16 @@ ChatResponseUpdate CreateUpdate(AIContent? content = null) =>
break;

default:
yield return CreateUpdate();

if (streamingUpdate.GetType().Name == "InternalResponseImageGenCallPartialImageEvent")
{
yield return CreateUpdate(GetContentFromImageGenPartialImageEvent(streamingUpdate));
}
else
{
yield return CreateUpdate();
}

break;
}
}
Expand All @@ -349,6 +481,59 @@ internal static ResponseTool ToResponseTool(AIFunction aiFunction, ChatOptions?
strict ?? false);
}

internal static ResponseTool ToImageResponseTool(ImageGenerationTool imageGenerationTool, ChatOptions? options = null)
{
ImageGenerationOptions? imageGenerationOptions = null;
if (imageGenerationTool.AdditionalProperties.TryGetValue(nameof(ImageGenerationOptions), out object? optionsObj))
{
imageGenerationOptions = optionsObj as ImageGenerationOptions;
}
else if (options?.AdditionalProperties?.TryGetValue(nameof(ImageGenerationOptions), out object? optionsObj2) ?? false)
{
imageGenerationOptions = optionsObj2 as ImageGenerationOptions;
}

var toolOptions = imageGenerationOptions?.RawRepresentationFactory?.Invoke(null!) as Dictionary<string, object> ?? new();
toolOptions["type"] = "image_generation";

// Size: Image dimensions (e.g., 1024x1024, 1024x1536)
if (imageGenerationOptions?.ImageSize is not null && !toolOptions.ContainsKey("size"))
{
// Use a custom type to ensure the size is formatted correctly.
// This is a workaround for OpenAI's specific size format requirements.
toolOptions["size"] = new GeneratedImageSize(
imageGenerationOptions.ImageSize.Value.Width,
imageGenerationOptions.ImageSize.Value.Height).ToString();
}

// Format: File output format
if (imageGenerationOptions?.MediaType is not null && !toolOptions.ContainsKey("format"))
{
toolOptions["output_format"] = imageGenerationOptions.MediaType switch
{
"image/png" => GeneratedImageFileFormat.Png.ToString(),
"image/jpeg" => GeneratedImageFileFormat.Jpeg.ToString(),
"image/webp" => GeneratedImageFileFormat.Webp.ToString(),
_ => string.Empty,
};
}

// unexposed properties, string unless noted
// background: transparent, opaque, auto
// input_fidelity: effort model exerts to match input (high, low)
// input_image_mask: optional image mask for inpainting. Object with property file_id string or image_url data string.
// model: Model ID to use for image generation
// moderation: Moderation level (auto, low)
// output_compression: (int) Compression level (0-100%) for JPEG and WebP formats
// partial_images: (int) Number of partial images to return (0-3)
// quality: Rendering quality (e.g. low, medium, high)

// Can't create the tool, but we can deserialize it from Json
BinaryData? toolOptionsData = BinaryData.FromBytes(
JsonSerializer.SerializeToUtf8Bytes(toolOptions, OpenAIJsonContext.Default.IDictionaryStringObject));
return ModelReaderWriter.Read<ResponseTool>(toolOptionsData, ModelReaderWriterOptions.Json)!;
}

/// <summary>Creates a <see cref="ChatRole"/> from a <see cref="MessageRole"/>.</summary>
private static ChatRole ToChatRole(MessageRole? role) =>
role switch
Expand Down Expand Up @@ -403,6 +588,10 @@ private ResponseCreationOptions ToOpenAIResponseCreationOptions(ChatOptions? opt
result.Tools.Add(ToResponseTool(aiFunction, options));
break;

case ImageGenerationTool imageGenerationTool:
result.Tools.Add(ToImageResponseTool(imageGenerationTool, options));
break;

case HostedWebSearchTool webSearchTool:
WebSearchUserLocation? location = null;
if (webSearchTool.AdditionalProperties.TryGetValue(nameof(WebSearchUserLocation), out object? objLocation))
Expand Down
Loading
Loading