feat: Add GeneratedOutput enum for direct JSON returns (#1395)

thisisharsh7 · web-flow · commit ea4d37057489 · 2025-12-20T23:37:31.000-08:00
feat: allow LlmGenerationClient::generate() to return JSON directly
diff --git a/rust/cocoindex/src/llm/anthropic.rs b/rust/cocoindex/src/llm/anthropic.rs
@@ -2,7 +2,7 @@ use crate::prelude::*;
 use base64::prelude::*;
 
 use crate::llm::{
-    LlmGenerateRequest, LlmGenerateResponse, LlmGenerationClient, OutputFormat,
+    GeneratedOutput, LlmGenerateRequest, LlmGenerateResponse, LlmGenerationClient, OutputFormat,
     ToJsonSchemaOptions, detect_image_mime_type,
 };
 use anyhow::Context;
@@ -126,22 +126,21 @@ impl LlmGenerationClient for Client {
                 }
             }
         }
-        let text = if let Some(json) = extracted_json {
-            // Try strict JSON serialization first
-            serde_json::to_string(&json)?
+        let json_value = if let Some(json) = extracted_json {
+            json
         } else {
             // Fallback: try text if no tool output found
             match &mut resp_json["content"][0]["text"] {
                 serde_json::Value::String(s) => {
                     // Try strict JSON parsing first
                     match utils::deser::from_json_str::<serde_json::Value>(s) {
-                        Ok(_) => std::mem::take(s),
+                        Ok(value) => value,
                         Err(e) => {
                             // Try permissive json5 parsing as fallback
                             match json5::from_str::<serde_json::Value>(s) {
                                 Ok(value) => {
                                     println!("[Anthropic] Used permissive JSON5 parser for output");
-                                    serde_json::to_string(&value)?
+                                    value
                                 }
                                 Err(e2) => {
                                     return Err(anyhow::anyhow!(format!(
@@ -160,7 +159,9 @@ impl LlmGenerationClient for Client {
             }
         };
 
-        Ok(LlmGenerateResponse { text })
+        Ok(LlmGenerateResponse {
+            output: GeneratedOutput::Json(json_value),
+        })
     }
 
     fn json_schema_options(&self) -> ToJsonSchemaOptions {
diff --git a/rust/cocoindex/src/llm/bedrock.rs b/rust/cocoindex/src/llm/bedrock.rs
@@ -2,7 +2,7 @@ use crate::prelude::*;
 use base64::prelude::*;
 
 use crate::llm::{
-    LlmGenerateRequest, LlmGenerateResponse, LlmGenerationClient, OutputFormat,
+    GeneratedOutput, LlmGenerateRequest, LlmGenerateResponse, LlmGenerationClient, OutputFormat,
     ToJsonSchemaOptions, detect_image_mime_type,
 };
 use anyhow::Context;
@@ -83,6 +83,7 @@ impl LlmGenerationClient for Client {
         }
 
         // Handle structured output using tool schema
+        let has_json_schema = request.output_format.is_some();
         if let Some(OutputFormat::JsonSchema { schema, name }) = request.output_format.as_ref() {
             let schema_json = serde_json::to_value(schema)?;
             payload["toolConfig"] = serde_json::json!({
@@ -134,7 +135,7 @@ impl LlmGenerationClient for Client {
         let message = &output["message"];
         let content = &message["content"];
 
-        let text = if let Some(content_array) = content.as_array() {
+        let generated_output = if let Some(content_array) = content.as_array() {
             // Look for tool use first (structured output)
             let mut extracted_json: Option<serde_json::Value> = None;
             for item in content_array {
@@ -148,7 +149,19 @@ impl LlmGenerationClient for Client {
 
             if let Some(json) = extracted_json {
                 // Return the structured output as JSON
-                serde_json::to_string(&json)?
+                GeneratedOutput::Json(json)
+            } else if has_json_schema {
+                // If JSON schema was requested but no tool output found, try parsing text as JSON
+                let mut text_parts = Vec::new();
+                for item in content_array {
+                    if let Some(text) = item.get("text") {
+                        if let Some(text_str) = text.as_str() {
+                            text_parts.push(text_str);
+                        }
+                    }
+                }
+                let text = text_parts.join("");
+                GeneratedOutput::Json(serde_json::from_str(&text)?)
             } else {
                 // Fall back to text content
                 let mut text_parts = Vec::new();
@@ -159,13 +172,15 @@ impl LlmGenerationClient for Client {
                         }
                     }
                 }
-                text_parts.join("")
+                GeneratedOutput::Text(text_parts.join(""))
             }
         } else {
             return Err(anyhow::anyhow!("No content found in Bedrock response"));
         };
 
-        Ok(LlmGenerateResponse { text })
+        Ok(LlmGenerateResponse {
+            output: generated_output,
+        })
     }
 
     fn json_schema_options(&self) -> ToJsonSchemaOptions {
diff --git a/rust/cocoindex/src/llm/gemini.rs b/rust/cocoindex/src/llm/gemini.rs
@@ -1,8 +1,8 @@
 use crate::prelude::*;
 
 use crate::llm::{
-    LlmEmbeddingClient, LlmGenerateRequest, LlmGenerateResponse, LlmGenerationClient, OutputFormat,
-    ToJsonSchemaOptions, detect_image_mime_type,
+    GeneratedOutput, LlmEmbeddingClient, LlmGenerateRequest, LlmGenerateResponse,
+    LlmGenerationClient, OutputFormat, ToJsonSchemaOptions, detect_image_mime_type,
 };
 use base64::prelude::*;
 use google_cloud_aiplatform_v1 as vertexai;
@@ -134,6 +134,7 @@ impl LlmGenerationClient for AiStudioClient {
         }
 
         // If structured output is requested, add schema and responseMimeType
+        let has_json_schema = request.output_format.is_some();
         if let Some(OutputFormat::JsonSchema { schema, .. }) = &request.output_format {
             let schema_json = serde_json::to_value(schema)?;
             payload["generationConfig"] = serde_json::json!({
@@ -162,7 +163,13 @@ impl LlmGenerationClient for AiStudioClient {
             _ => bail!("No text in response"),
         };
 
-        Ok(LlmGenerateResponse { text })
+        let output = if has_json_schema {
+            GeneratedOutput::Json(serde_json::from_str(&text)?)
+        } else {
+            GeneratedOutput::Text(text)
+        };
+
+        Ok(LlmGenerateResponse { output })
     }
 
     fn json_schema_options(&self) -> ToJsonSchemaOptions {
@@ -331,6 +338,7 @@ impl LlmGenerationClient for VertexAiClient {
         });
 
         // Compose generation config
+        let has_json_schema = request.output_format.is_some();
         let mut generation_config = None;
         if let Some(OutputFormat::JsonSchema { schema, .. }) = &request.output_format {
             let schema_json = serde_json::to_value(schema)?;
@@ -367,7 +375,14 @@ impl LlmGenerationClient for VertexAiClient {
         else {
             bail!("No text in response");
         };
-        Ok(super::LlmGenerateResponse { text })
+
+        let output = if has_json_schema {
+            super::GeneratedOutput::Json(serde_json::from_str(&text)?)
+        } else {
+            super::GeneratedOutput::Text(text)
+        };
+
+        Ok(super::LlmGenerateResponse { output })
     }
 
     fn json_schema_options(&self) -> ToJsonSchemaOptions {
diff --git a/rust/cocoindex/src/llm/mod.rs b/rust/cocoindex/src/llm/mod.rs
@@ -74,9 +74,15 @@ pub struct LlmGenerateRequest<'a> {
     pub output_format: Option<OutputFormat<'a>>,
 }
 
+#[derive(Debug)]
+pub enum GeneratedOutput {
+    Json(serde_json::Value),
+    Text(String),
+}
+
 #[derive(Debug)]
 pub struct LlmGenerateResponse {
-    pub text: String,
+    pub output: GeneratedOutput,
 }
 
 #[async_trait]
diff --git a/rust/cocoindex/src/llm/ollama.rs b/rust/cocoindex/src/llm/ollama.rs
@@ -89,6 +89,7 @@ impl LlmGenerationClient for Client {
         &self,
         request: super::LlmGenerateRequest<'req>,
     ) -> Result<super::LlmGenerateResponse> {
+        let has_json_schema = request.output_format.is_some();
         let req = OllamaRequest {
             model: request.model,
             prompt: request.user_prompt.as_ref(),
@@ -109,9 +110,14 @@ impl LlmGenerationClient for Client {
         .await
         .context("Ollama API error")?;
         let json: OllamaResponse = res.json().await?;
-        Ok(super::LlmGenerateResponse {
-            text: json.response,
-        })
+
+        let output = if has_json_schema {
+            super::GeneratedOutput::Json(serde_json::from_str(&json.response)?)
+        } else {
+            super::GeneratedOutput::Text(json.response)
+        };
+
+        Ok(super::LlmGenerateResponse { output })
     }
 
     fn json_schema_options(&self) -> super::ToJsonSchemaOptions {
diff --git a/rust/cocoindex/src/llm/openai.rs b/rust/cocoindex/src/llm/openai.rs
@@ -184,6 +184,7 @@ where
         &self,
         request: super::LlmGenerateRequest<'req>,
     ) -> Result<super::LlmGenerateResponse> {
+        let has_json_schema = request.output_format.is_some();
         let request = &request;
         let response = retryable::run(
             || async {
@@ -203,7 +204,13 @@ where
             .and_then(|choice| choice.message.content)
             .ok_or_else(|| anyhow::anyhow!("No response from OpenAI"))?;
 
-        Ok(super::LlmGenerateResponse { text })
+        let output = if has_json_schema {
+            super::GeneratedOutput::Json(serde_json::from_str(&text)?)
+        } else {
+            super::GeneratedOutput::Text(text)
+        };
+
+        Ok(super::LlmGenerateResponse { output })
     }
 
     fn json_schema_options(&self) -> super::ToJsonSchemaOptions {
diff --git a/rust/cocoindex/src/ops/functions/extract_by_llm.rs b/rust/cocoindex/src/ops/functions/extract_by_llm.rs
@@ -1,5 +1,6 @@
 use crate::llm::{
-    LlmGenerateRequest, LlmGenerationClient, LlmSpec, OutputFormat, new_llm_generation_client,
+    GeneratedOutput, LlmGenerateRequest, LlmGenerationClient, LlmSpec, OutputFormat,
+    new_llm_generation_client,
 };
 use crate::ops::sdk::*;
 use crate::prelude::*;
@@ -117,7 +118,12 @@ impl SimpleFunctionExecutor for Executor {
             }),
         };
         let res = self.client.generate(req).await?;
-        let json_value: serde_json::Value = utils::deser::from_json_str(res.text.as_str())?;
+        let json_value = match res.output {
+            GeneratedOutput::Json(json) => json,
+            GeneratedOutput::Text(text) => {
+                bail!("Expected JSON response but got text: {}", text)
+            }
+        };
         let value = self.value_extractor.extract_value(json_value)?;
         Ok(value)
     }