diff --git a/docs/readme.md b/docs/readme.md index ada0f0d2..79e4ce10 100644 --- a/docs/readme.md +++ b/docs/readme.md @@ -112,6 +112,7 @@ Release Notes ### Upcoming - Changes - Firebase AI: Add support for Grounding with Google Maps. + - Firebase AI: Improved image configuration when using Nano Banana. - Storage: Added `ListAsync` API to list items and prefixes under a reference. ### 13.10.0 diff --git a/firebaseai/src/Candidate.cs b/firebaseai/src/Candidate.cs index 9565291f..6a91a7ea 100644 --- a/firebaseai/src/Candidate.cs +++ b/firebaseai/src/Candidate.cs @@ -64,6 +64,46 @@ public enum FinishReason /// Token generation was stopped because the function call generated by the model was invalid. /// MalformedFunctionCall, + /// + /// Token generation stopped because generated images contain safety violations. + /// + ImageSafety, + /// + /// Image generation stopped because generated images have other prohibited content. + /// + ImageProhibitedContent, + /// + /// Image generation stopped because of other miscellaneous issue. + /// + ImageOther, + /// + /// The model was expected to generate an image, but none was generated. + /// + NoImage, + /// + /// Image generation stopped due to recitation. + /// + ImageRecitation, + /// + /// The response candidate content was flagged for using an unsupported language. + /// + Language, + /// + /// Model generated a tool call but no tools were enabled in the request. + /// + UnexpectedToolCall, + /// + /// Model called too many tools consecutively, thus the system exited execution. + /// + TooManyToolCalls, + /// + /// Request has at least one thought signature missing. + /// + MissingThoughtSignature, + /// + /// Finished due to malformed response. + /// + MalformedResponse, } /// @@ -137,6 +177,16 @@ private static FinishReason ParseFinishReason(string str) "PROHIBITED_CONTENT" => Firebase.AI.FinishReason.ProhibitedContent, "SPII" => Firebase.AI.FinishReason.SPII, "MALFORMED_FUNCTION_CALL" => Firebase.AI.FinishReason.MalformedFunctionCall, + "IMAGE_SAFETY" => Firebase.AI.FinishReason.ImageSafety, + "IMAGE_PROHIBITED_CONTENT" => Firebase.AI.FinishReason.ImageProhibitedContent, + "IMAGE_OTHER" => Firebase.AI.FinishReason.ImageOther, + "NO_IMAGE" => Firebase.AI.FinishReason.NoImage, + "IMAGE_RECITATION" => Firebase.AI.FinishReason.ImageRecitation, + "LANGUAGE" => Firebase.AI.FinishReason.Language, + "UNEXPECTED_TOOL_CALL" => Firebase.AI.FinishReason.UnexpectedToolCall, + "TOO_MANY_TOOL_CALLS" => Firebase.AI.FinishReason.TooManyToolCalls, + "MISSING_THOUGHT_SIGNATURE" => Firebase.AI.FinishReason.MissingThoughtSignature, + "MALFORMED_RESPONSE" => Firebase.AI.FinishReason.MalformedResponse, _ => Firebase.AI.FinishReason.Unknown, }; } diff --git a/firebaseai/src/GenerationConfig.cs b/firebaseai/src/GenerationConfig.cs index b8df13bb..926416c2 100644 --- a/firebaseai/src/GenerationConfig.cs +++ b/firebaseai/src/GenerationConfig.cs @@ -40,6 +40,7 @@ public readonly struct GenerationConfig private readonly JsonSchema _responseJsonSchema; private readonly List _responseModalities; private readonly ThinkingConfig? _thinkingConfig; + private readonly ImageConfig? _imageConfig; /// /// Creates a new `GenerationConfig` value. @@ -168,6 +169,9 @@ public readonly struct GenerationConfig /// An error will be returned if this field is set for models that don't /// support thinking. /// + /// + /// Configuration for the aspect ratio and size of generated images. + /// public GenerationConfig( float? temperature = null, float? topP = null, @@ -181,7 +185,8 @@ public GenerationConfig( Schema responseSchema = null, JsonSchema responseJsonSchema = null, IEnumerable responseModalities = null, - ThinkingConfig? thinkingConfig = null) + ThinkingConfig? thinkingConfig = null, + ImageConfig? imageConfig = null) { _temperature = temperature; _topP = topP; @@ -197,6 +202,7 @@ public GenerationConfig( _responseModalities = responseModalities != null ? new List(responseModalities) : null; _thinkingConfig = thinkingConfig; + _imageConfig = imageConfig; } /// @@ -222,7 +228,8 @@ internal Dictionary ToJson() jsonDict["responseModalities"] = _responseModalities.Select(EnumConverters.ResponseModalityToString).ToList(); } - if (_thinkingConfig != null) jsonDict["thinkingConfig"] = _thinkingConfig?.ToJson(); + if (_thinkingConfig != null) jsonDict["thinkingConfig"] = _thinkingConfig.Value.ToJson(); + if (_imageConfig != null) jsonDict["imageConfig"] = _imageConfig.Value.ToJson(); return jsonDict; } @@ -321,5 +328,81 @@ internal Dictionary ToJson() } } + /// + /// Configuration options for generating images with Gemini models. + /// + public readonly struct ImageConfig + { + /// + /// The aspect ratio of generated images. + /// + public readonly struct AspectRatio + { + public string Value { get; } + + /// + /// Constructs a custom AspectRatio, instead of one of the presets. + /// Note that the backend model needs to support the requested ratio. + /// + public AspectRatio(string value) { Value = value; } + + public static readonly AspectRatio Square1x1 = new("1:1"); + public static readonly AspectRatio Portrait9x16 = new("9:16"); + public static readonly AspectRatio Landscape16x9 = new("16:9"); + public static readonly AspectRatio Portrait3x4 = new("3:4"); + public static readonly AspectRatio Landscape4x3 = new("4:3"); + public static readonly AspectRatio Portrait2x3 = new("2:3"); + public static readonly AspectRatio Landscape3x2 = new("3:2"); + public static readonly AspectRatio Portrait4x5 = new("4:5"); + public static readonly AspectRatio Landscape5x4 = new("5:4"); + public static readonly AspectRatio Portrait1x4 = new("1:4"); + public static readonly AspectRatio Landscape4x1 = new("4:1"); + public static readonly AspectRatio Portrait1x8 = new("1:8"); + public static readonly AspectRatio Landscape8x1 = new("8:1"); + public static readonly AspectRatio Ultrawide21x9 = new("21:9"); + public override string ToString() => Value; + } + + /// + /// The size of images to generate. + /// + public readonly struct ImageSize + { + public string Value { get; } + + /// + /// Constructs a custom ImageSize, instead of one of the presets. + /// Note that the backend model needs to support the requested size. + /// + public ImageSize(string value) { Value = value; } + + public static readonly ImageSize Size512 = new("512"); + public static readonly ImageSize Size1K = new("1K"); + public static readonly ImageSize Size2K = new("2K"); + public static readonly ImageSize Size4K = new("4K"); + + public override string ToString() => Value; + } + + public AspectRatio? Ratio { get; } + public ImageSize? Size { get; } + + /// + /// Creates a new `ImageConfig` with the given settings. + /// + public ImageConfig(AspectRatio? aspectRatio = null, ImageSize? imageSize = null) + { + Ratio = aspectRatio; + Size = imageSize; + } + + internal Dictionary ToJson() + { + Dictionary jsonDict = new(); + if (Ratio?.Value is string aspectRatio) jsonDict["aspectRatio"] = aspectRatio; + if (Size?.Value is string imageSize) jsonDict["imageSize"] = imageSize; + return jsonDict; + } + } } diff --git a/firebaseai/testapp/Assets/Firebase/Sample/FirebaseAI/UIHandlerAutomated.cs b/firebaseai/testapp/Assets/Firebase/Sample/FirebaseAI/UIHandlerAutomated.cs index 1cc30ad5..a2cc0ee8 100644 --- a/firebaseai/testapp/Assets/Firebase/Sample/FirebaseAI/UIHandlerAutomated.cs +++ b/firebaseai/testapp/Assets/Firebase/Sample/FirebaseAI/UIHandlerAutomated.cs @@ -188,6 +188,8 @@ protected override void Start() TestReadSecureFile, // Internal tests for Json parsing, requires using a source library. InternalTestBasicReplyShort, + InternalTestFinishReasonExpanded, + InternalTestImageConfigSerialization, InternalTestCitations, InternalTestBlockedSafetyWithMessage, InternalTestFinishReasonSafetyNoContent, @@ -878,7 +880,9 @@ async Task TestGenerateImage(Backend backend) { var model = GetFirebaseAI(backend).GetGenerativeModel("gemini-2.5-flash-image", generationConfig: new GenerationConfig( - responseModalities: new[] { ResponseModality.Text, ResponseModality.Image }) + responseModalities: new[] { ResponseModality.Text, ResponseModality.Image }, + imageConfig: new ImageConfig( + aspectRatio: ImageConfig.AspectRatio.Square1x1)) ); GenerateContentResponse response = await model.GenerateContentAsync( @@ -890,6 +894,7 @@ async Task TestGenerateImage(Backend backend) // We don't care much about the response, just that there is an image, and text. bool foundText = false; bool foundImage = false; + Texture2D image = new(1, 2); var candidate = response.Candidates.First(); foreach (var part in candidate.Content.Parts) { @@ -902,10 +907,13 @@ async Task TestGenerateImage(Backend backend) if (dataPart.MimeType.Contains("image")) { foundImage = true; + image.LoadImage(dataPart.Data.ToArray()); } } } Assert($"Missing expected modalities. Text: {foundText}, Image: {foundImage}", foundText && foundImage); + // The height and width should match, since we requested a 1x1 aspect ratio. + AssertEq("Image dimensions should match.", image.height, image.width); } // Test generating an image via Imagen. @@ -1541,6 +1549,61 @@ async Task InternalTestBasicReplyShort() ValidateUsageMetadata(response.UsageMetadata, 6, 7, 0, 0, 13); } + // Test that parsing a response with expanded FinishReason works. + Task InternalTestFinishReasonExpanded() + { + string jsonStr = @"{ + ""candidates"": [{ + ""content"": { + ""parts"": [{""text"": ""Hello""}], + ""role"": ""model"" + }, + ""finishReason"": ""IMAGE_SAFETY"" + }] + }"; + Dictionary json = (Dictionary)Json.Deserialize(jsonStr); + GenerateContentResponse response = GenerateContentResponse.FromJson(json, FirebaseAI.Backend.InternalProvider.VertexAI); + + Assert("Response missing candidates.", response.Candidates.Any()); + Candidate candidate = response.Candidates.First(); + AssertEq("FinishReason", candidate.FinishReason, FinishReason.ImageSafety); + + // Test another one + jsonStr = @"{ + ""candidates"": [{ + ""content"": {""parts"": []}, + ""finishReason"": ""MALFORMED_RESPONSE"" + }] + }"; + json = (Dictionary)Json.Deserialize(jsonStr); + response = GenerateContentResponse.FromJson(json, FirebaseAI.Backend.InternalProvider.VertexAI); + candidate = response.Candidates.First(); + AssertEq("FinishReason", candidate.FinishReason, FinishReason.MalformedResponse); + + return Task.CompletedTask; + } + + // Test that ImageConfig serialization works as expected. + Task InternalTestImageConfigSerialization() + { + var imageConfig = new ImageConfig(ImageConfig.AspectRatio.Landscape16x9, ImageConfig.ImageSize.Size1K); + var json = imageConfig.ToJson(); + + AssertEq("ImageConfig.aspectRatio", json["aspectRatio"], "16:9"); + AssertEq("ImageConfig.imageSize", json["imageSize"], "1K"); + + var genConfig = new GenerationConfig(imageConfig: imageConfig); + var genJson = genConfig.ToJson(); + + Assert("GenerationConfig missing imageConfig", genJson.ContainsKey("imageConfig")); + var imageConfigJson = genJson["imageConfig"] as Dictionary; + Assert("imageConfig is not a dictionary", imageConfigJson != null); + AssertEq("imageConfigJson.aspectRatio", imageConfigJson["aspectRatio"], "16:9"); + AssertEq("imageConfigJson.imageSize", imageConfigJson["imageSize"], "1K"); + + return Task.CompletedTask; + } + // Test that parsing a response including Citations works. // https://github.com/FirebaseExtended/vertexai-sdk-test-data/blob/main/mock-responses/unary-success-citations.json async Task InternalTestCitations()