Skip to content

Commit 3506d7d

Browse files
authored
Fix structured-output handling for non-object JSON values
Preserve scalar, array, object, boolean, number, and null structured-output payloads across provider result handling and workflow judge validation. Update prompted-JSON wording to request a JSON value, normalize provider raw-payload metadata, and retry invalid structured-output judge responses with the original schema while keeping unsupported structured output fail-fast. Includes provider, workflow-envelope, and judge retry regression coverage plus workflow documentation updates.
1 parent fa25c3d commit 3506d7d

17 files changed

Lines changed: 646 additions & 86 deletions

File tree

components/agent-session/src/psi/agent_session/workflow_judge.clj

Lines changed: 20 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -149,14 +149,26 @@
149149
:judge-output judge-output
150150
:judge-event judge-event
151151
:routing-result routing-result})
152-
{:judge-session-id judge-sid
153-
:judge-output judge-output
154-
:judge-event nil
155-
:routing-result (cond-> {:action :fail
156-
:reason :invalid-structured-output
157-
:output-key output-key}
158-
(or (:opts request-result) last-structured-output)
159-
(assoc :details {:structured-output (:structured-output structured-result)}))})))
152+
(if (< attempt max-judge-retries)
153+
(let [retry-result (if-let [opts (:opts request-result)]
154+
(turn-execution/execute-judge-turn!
155+
ctx judge-sid
156+
(judge-retry-feedback last-output expected-sigs)
157+
opts)
158+
(turn-execution/execute-judge-turn!
159+
ctx judge-sid
160+
(judge-retry-feedback last-output expected-sigs)))]
161+
(recur (inc attempt)
162+
(str/trim (:assistant-text retry-result))
163+
(:structured-output retry-result)))
164+
{:judge-session-id judge-sid
165+
:judge-output judge-output
166+
:judge-event nil
167+
:routing-result (cond-> {:action :fail
168+
:reason :invalid-structured-output
169+
:output-key output-key}
170+
(or (:opts request-result) last-structured-output)
171+
(assoc :details {:structured-output (:structured-output structured-result)}))}))))
160172
(let [routing-result (workflow-judge/evaluate-routing last-output routing-table
161173
current-step-id step-order step-runs)]
162174
(if (and (= :no-match (:action routing-result))

components/agent-session/test/psi/agent_session/workflow_judge_test.clj

Lines changed: 58 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -468,20 +468,58 @@
468468
(is (not (get-in @turn-opts* [:structured-output :require-provider-native?])))
469469
(is (= ["Return review JSON"] @turn-prompts*)))))))
470470

471+
(deftest execute-judge-invalid-structured-output-retry-then-succeeds-test
472+
;; Tests structured-output validation failures retry with the original opts and
473+
;; can recover to a valid routed judge result.
474+
(testing "structured judge output retries with structured opts and routes when retry is valid"
475+
(let [turns* (atom [])]
476+
(with-redefs [psi.session-persistence.core/messages-from-entries-in
477+
(fn [_ctx _sid] [])
478+
psi.workflow-runtime.turn-execution-contract/execute-judge-turn!
479+
(fn [_ctx sid text opts]
480+
(swap! turns* conj {:text text :opts opts})
481+
(if (= 1 (count @turns*))
482+
{:status :ok
483+
:session-id sid
484+
:assistant-text "APPROVED"}
485+
(let [ai-structured-output {:strategy :prompted-json
486+
:source :prompted-json/text
487+
:payload {"decision" "clear"
488+
"issues" []
489+
"confidence" 0.8}
490+
:raw-payload "{\"decision\":\"clear\",\"issues\":[],\"confidence\":0.8}"}]
491+
{:status :ok
492+
:session-id sid
493+
:assistant-text "{\"decision\":\"clear\",\"issues\":[],\"confidence\":0.8}"
494+
:structured-output ai-structured-output})))]
495+
(let [result (workflow-judge/execute-judge!
496+
(structured-judge-test-ctx) "parent-1" "actor-1"
497+
structured-review-judge-spec structured-review-routing-table
498+
{:current-step-id "step-3-review"
499+
:step-order step-order
500+
:step-runs structured-review-step-runs})]
501+
(is (= :clear (:judge-event result)))
502+
(is (= {:action :complete} (:routing-result result)))
503+
(is (= 2 (count @turns*)))
504+
(is (= (get-in (first @turns*) [:opts :structured-output])
505+
(get-in (second @turns*) [:opts :structured-output])))
506+
(is (re-find #"did not match any expected signal"
507+
(:text (second @turns*)))))))))
508+
471509
(deftest execute-judge-invalid-structured-output-fails-locally-test
472-
(testing "invalid structured judge output fails locally without prose routing or retry"
473-
(let [turn-prompts* (atom [])]
510+
(testing "invalid structured judge output retries and then fails locally without prose routing"
511+
(let [turns* (atom [])]
474512
(with-redefs [psi.session-persistence.core/messages-from-entries-in
475513
(fn [_ctx _sid] [])
476514
psi.workflow-runtime.turn-execution-contract/execute-judge-turn!
477515
(fn
478516
([_ctx sid text]
479-
(swap! turn-prompts* conj text)
517+
(swap! turns* conj {:text text :opts nil})
480518
{:status :ok
481519
:session-id sid
482520
:assistant-text "APPROVED"})
483-
([_ctx sid text _opts]
484-
(swap! turn-prompts* conj text)
521+
([_ctx sid text opts]
522+
(swap! turns* conj {:text text :opts opts})
485523
{:status :ok
486524
:session-id sid
487525
:assistant-text "APPROVED"}))]
@@ -490,7 +528,9 @@
490528
structured-review-judge-spec structured-review-routing-table
491529
{:current-step-id "step-3-review"
492530
:step-order step-order
493-
:step-runs structured-review-step-runs})]
531+
:step-runs structured-review-step-runs})
532+
turn-prompts (mapv :text @turns*)
533+
structured-output-opts (mapv #(get-in % [:opts :structured-output]) @turns*)]
494534
(is (nil? (:judge-event result)))
495535
(is (= {:action :fail
496536
:reason :invalid-structured-output
@@ -499,7 +539,18 @@
499539
(is (= :invalid
500540
(get-in result [:routing-result :details :structured-output :status])))
501541
(is (= :invalid (get-in result [:judge-output :review :structured-output :status])))
502-
(is (= ["Return review JSON"] @turn-prompts*)))))))
542+
(is (= 3 (count @turns*)))
543+
(is (= "Return review JSON" (first turn-prompts)))
544+
(is (every? #(re-find #"did not match any expected signal" %)
545+
(rest turn-prompts)))
546+
(is (every? some? structured-output-opts))
547+
(is (apply = structured-output-opts))
548+
(is (= (get-in (first @turns*) [:opts :structured-output :json-schema])
549+
{:type "object"
550+
:required ["decision" "issues" "confidence"]
551+
:properties {"decision" {:type "string"}
552+
"issues" {:type "array"}
553+
"confidence" {:type "number"}}})))))))
503554

504555
(deftest execute-judge-schema-valid-negative-decision-routes-test
505556
(testing "schema-valid negative decision drives the configured non-clear branch"

components/ai/src/psi/ai/providers/anthropic/structured_output.clj

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -48,12 +48,12 @@
4848

4949
(defn structured-output-result
5050
[strategy source raw-payload]
51-
(let [payload (structured-output/parse-json-object raw-payload)]
51+
(let [parse-result (structured-output/parse-json-value raw-payload)]
5252
(cond-> (assoc strategy
5353
:source source
5454
:raw-payload raw-payload)
55-
payload (assoc :payload payload)
56-
(not payload) (assoc :parse-error? true))))
55+
(:parsed? parse-result) (assoc :payload (:payload parse-result))
56+
(not parse-result) (assoc :parse-error? true))))
5757

5858
(defn emit-structured-result!
5959
[consume-fn strategy source raw-payload]

components/ai/src/psi/ai/providers/openai/chat_completions.clj

Lines changed: 22 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -369,19 +369,24 @@
369369
(resolve-chat-tool-index stream-state tool-call fallback-idx)
370370
tool-call))))
371371

372+
(defn- structured-output-result
373+
[strategy source raw-text]
374+
(when (contains? #{:provider-native :prompted-json} (:strategy strategy))
375+
(let [parse-result (structured-output/parse-json-value raw-text)]
376+
(cond-> (assoc strategy
377+
:source source
378+
:raw-text raw-text
379+
:raw-payload raw-text)
380+
(:parsed? parse-result) (assoc :payload (:payload parse-result))
381+
(not parse-result) (assoc :parse-error? true)))))
382+
372383
(defn- emit-structured-output-result!
373384
[stream-state consume-fn strategy source]
374-
(let [{:keys [structured-result-emitted? text-buffer]} stream-state
375-
raw-text @text-buffer
376-
payload (structured-output/parse-json-object raw-text)]
377-
(when (and (contains? #{:provider-native :prompted-json} (:strategy strategy))
378-
(compare-and-set! structured-result-emitted? false true))
379-
(consume-fn {:type :structured-output-result
380-
:structured-output (cond-> (assoc strategy
381-
:source source
382-
:raw-text raw-text)
383-
payload (assoc :payload payload
384-
:raw-payload payload))}))))
385+
(let [{:keys [structured-result-emitted? text-buffer]} stream-state]
386+
(when (compare-and-set! structured-result-emitted? false true)
387+
(when-let [result (structured-output-result strategy source @text-buffer)]
388+
(consume-fn {:type :structured-output-result
389+
:structured-output result})))))
385390

386391
(defn- finish-chat-chunk!
387392
[stream-state consume-fn model chunk choice strategy]
@@ -474,22 +479,19 @@
474479
(completions-usage-map model usage))
475480
logprobs (or (extract-openai-logprob-delta choice)
476481
(extract-llama-logprob-delta body))
477-
text (content/string-fragment (:content message))
478-
payload (when (contains? #{:provider-native :prompted-json} (:strategy strategy))
479-
(structured-output/parse-json-object text))]
482+
text (content/string-fragment (:content message))]
480483
(cond-> {:assistant-message (cond-> {:role "assistant"
481484
:content (completion-message->content message)
482485
:stop-reason stop-reason
483486
:timestamp (java.time.Instant/now)}
484487
(map? usage) (assoc :usage usage))
485488
:logprobs logprobs}
486489
strategy (assoc :structured-output
487-
(cond-> strategy
488-
payload (assoc :payload payload
489-
:raw-payload payload
490-
:source (if (= :provider-native (:strategy strategy))
491-
:openai/message-json
492-
:prompted-json/text))))))))
490+
(structured-output-result strategy
491+
(if (= :provider-native (:strategy strategy))
492+
:openai/message-json
493+
:prompted-json/text)
494+
text))))))
493495

494496
(defn execute-openai
495497
[conversation model options]

components/ai/src/psi/ai/providers/openai/codex_structured_output.clj

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -18,15 +18,16 @@
1818

1919
(defn structured-output-result
2020
[strategy source raw-payload]
21-
(if-let [{:keys [payload]} (structured-output/parse-json-value raw-payload)]
22-
(assoc strategy
23-
:source source
24-
:raw-payload raw-payload
25-
:payload payload)
26-
(assoc strategy
27-
:source source
28-
:raw-payload raw-payload
29-
:parse-error? true)))
21+
(let [parse-result (structured-output/parse-json-value raw-payload)]
22+
(if (:parsed? parse-result)
23+
(assoc strategy
24+
:source source
25+
:raw-payload raw-payload
26+
:payload (:payload parse-result))
27+
(assoc strategy
28+
:source source
29+
:raw-payload raw-payload
30+
:parse-error? true))))
3031

3132
(defn emit-structured-result!
3233
[consume-fn strategy source raw-payload]

components/ai/src/psi/ai/structured_output.clj

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -158,7 +158,7 @@
158158
"Return deterministic prompted-JSON fallback instructions for request."
159159
[request]
160160
(let [schema-text (json/generate-string (:json-schema request))]
161-
(str "\n\nStructured output required. Return exactly one JSON object matching "
161+
(str "\n\nStructured output required. Return exactly one JSON value matching "
162162
"the supplied JSON Schema. Do not wrap the JSON in Markdown fences, "
163163
"do not add prose, and do not emit extra top-level text.\n"
164164
"Name: " (structured-output-name request) "\n"

components/ai/test/psi/ai/providers/anthropic_structured_output_test.clj

Lines changed: 32 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
[psi.ai.conversation :as conv]
88
[psi.ai.models :as models]
99
[psi.ai.providers.anthropic :as anthropic]
10+
[psi.ai.providers.anthropic.structured-output]
1011
[psi.ai.structured-output :as structured-output])
1112
(:import [java.io ByteArrayInputStream]))
1213

@@ -330,15 +331,13 @@
330331
(is (= "{\"ok\":true}" (get-in result [:structured-output :raw-payload])))))
331332

332333
(deftest anthropic-json-schema-output-non-streaming-parse-failure-test
333-
;; Tests Anthropic JSON Schema native non-streaming invalid/non-object output
334-
;; preserves raw text and marks parse failure without a trusted payload.
334+
;; Tests Anthropic JSON Schema native non-streaming invalid output preserves
335+
;; raw text and marks parse failure without a trusted payload.
335336
(let [model (models/get-model :sonnet-4.6)
336337
convo (-> (conv/create "sys")
337338
(conv/add-user-message "Review this"))
338339
cases [{:label "invalid-json"
339-
:text "not json"}
340-
{:label "non-object-json"
341-
:text "[true]"}]]
340+
:text "not json"}]]
342341
(doseq [{:keys [label text]} cases]
343342
(let [body {:content [{:type "text" :text text}]
344343
:stop_reason "end_turn"
@@ -360,15 +359,13 @@
360359
(is (not (contains? (:structured-output result) :payload)) label)))))
361360

362361
(deftest anthropic-streaming-json-schema-output-parse-failure-test
363-
;; Tests Anthropic JSON Schema native streaming invalid/non-object output emits
364-
;; a parse-failure result with raw text and no trusted payload.
362+
;; Tests Anthropic JSON Schema native streaming invalid output emits a
363+
;; parse-failure result with raw text and no trusted payload.
365364
(let [model (models/get-model :sonnet-4.6)
366365
convo (-> (conv/create "sys")
367366
(conv/add-user-message "Review this"))
368367
cases [{:label "invalid-json"
369-
:text "not json"}
370-
{:label "non-object-json"
371-
:text "[true]"}]]
368+
:text "not json"}]]
372369
(doseq [{:keys [label text]} cases]
373370
(let [events (atom [])
374371
sse (str (sse-line "message_start"
@@ -404,6 +401,31 @@
404401
(is (true? (:parse-error? structured)) label)
405402
(is (not (contains? structured :payload)) label))))))
406403

404+
(deftest anthropic-structured-output-result-json-value-payloads-test
405+
;; Tests the shared Anthropic structured-output result helper preserves every
406+
;; valid JSON value, including present nil for JSON null.
407+
(let [strategy {:strategy :provider-native
408+
:native-mechanism :anthropic/json-schema-output}
409+
cases [{:label "string" :raw "\"DONE\"" :expected "DONE"}
410+
{:label "number" :raw "42" :expected 42}
411+
{:label "boolean" :raw "true" :expected true}
412+
{:label "array" :raw "[true]" :expected [true]}
413+
{:label "object" :raw "{\"ok\":true}" :expected {:ok true}}
414+
{:label "null" :raw "null" :expected nil}]]
415+
(doseq [{:keys [label raw expected]} cases
416+
source [:anthropic/json-schema-output :prompted-json/text]]
417+
(let [result (#'psi.ai.providers.anthropic.structured-output/structured-output-result
418+
(if (= :prompted-json/text source)
419+
{:strategy :prompted-json :fallback-used? true}
420+
strategy)
421+
source
422+
raw)]
423+
(is (= expected (:payload result)) label)
424+
(is (contains? result :payload) label)
425+
(is (not (:parse-error? result)) label)
426+
(is (= raw (:raw-payload result)) label)
427+
(is (= source (:source result)) label)))))
428+
407429
(deftest anthropic-streaming-json-schema-output-events-test
408430
;; Tests JSON Schema native streaming preserves text events and emits a parsed
409431
;; structured-output result sourced from ordinary assistant text.

0 commit comments

Comments
 (0)