fix(web): parse ask-question blocks as question-form alias (#1194) (#3053)

Accept <ask-question> as an alias for <question-form> and locate close
tags with a Unicode-safe scan so Turkish dotted-I prose before the tag
does not desync parser indices.
This commit is contained in:
吴杨帆 2026-05-27 14:34:36 +08:00 committed by GitHub
parent d0981a28b4
commit 9641c9e11c
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 89 additions and 10 deletions

View file

@ -17,6 +17,11 @@
* }
* </question-form>
*
* `<ask-question>...</ask-question>` is accepted as an alias for
* `<question-form>`, so a model that drifts to the colloquial tag
* name still renders correctly instead of leaking raw markup into
* prose (see issue #1194).
*
* Splits a final assistant text payload into ordered segments prose +
* forms so AssistantMessage can render the form inline.
*/
@ -85,15 +90,19 @@ export type FormSegment =
| { kind: 'text'; text: string }
| { kind: 'form'; form: QuestionForm; raw: string };
const OPEN_RE = /<question-form\b([^>]*)>/i;
const CLOSE_TAG = '</question-form>';
// `question-form` is the canonical tag; `ask-question` is an alias the
// model occasionally drifts to (issue #1194). The close tag must match
// the open tag name, so each match captures the name and computes its
// own close-tag string. Treat the lookup case-insensitively at scan
// time so `<Question-Form>` and `<ASK-QUESTION>` still parse.
const OPEN_RE = /<(question-form|ask-question)\b([^>]*)>/i;
export function splitOnQuestionForms(input: string): FormSegment[] {
const out: FormSegment[] = [];
let cursor = 0;
// Scan repeatedly for <question-form> opens; for each, locate the
// matching close tag and try to parse the JSON body. Anything that
// doesn't parse cleanly stays in the prose stream.
// Scan repeatedly for question-form / ask-question opens; for each,
// locate the matching close tag and try to parse the JSON body.
// Anything that doesn't parse cleanly stays in the prose stream.
while (cursor < input.length) {
const slice = input.slice(cursor);
const m = OPEN_RE.exec(slice);
@ -101,9 +110,11 @@ export function splitOnQuestionForms(input: string): FormSegment[] {
out.push({ kind: 'text', text: slice });
break;
}
const tagName = (m[1] ?? 'question-form').toLowerCase();
const closeTag = `</${tagName}>`;
const openStart = cursor + m.index;
const openEnd = openStart + m[0].length;
const closeIdx = input.indexOf(CLOSE_TAG, openEnd);
const closeIdx = findCloseTag(input, openEnd, closeTag);
if (closeIdx === -1) {
// Unterminated — leave the rest as prose so we don't swallow it.
out.push({ kind: 'text', text: slice });
@ -113,19 +124,32 @@ export function splitOnQuestionForms(input: string): FormSegment[] {
out.push({ kind: 'text', text: input.slice(cursor, openStart) });
}
const body = input.slice(openEnd, closeIdx);
const attrs = parseAttrs(m[1] ?? '');
const attrs = parseAttrs(m[2] ?? '');
const form = tryParseForm(body, attrs);
const blockEnd = closeIdx + closeTag.length;
if (form) {
out.push({ kind: 'form', form, raw: input.slice(openStart, closeIdx + CLOSE_TAG.length) });
out.push({ kind: 'form', form, raw: input.slice(openStart, blockEnd) });
} else {
// Malformed — keep raw text so the user can still see it.
out.push({ kind: 'text', text: input.slice(openStart, closeIdx + CLOSE_TAG.length) });
out.push({ kind: 'text', text: input.slice(openStart, blockEnd) });
}
cursor = closeIdx + CLOSE_TAG.length;
cursor = blockEnd;
}
return out;
}
function findCloseTag(input: string, from: number, closeTag: string): number {
const closeLower = closeTag.toLowerCase();
const tagLen = closeTag.length;
const maxStart = input.length - tagLen;
for (let i = from; i <= maxStart; i++) {
if (input.slice(i, i + tagLen).toLowerCase() === closeLower) {
return i;
}
}
return -1;
}
function parseAttrs(raw: string): Record<string, string> {
const re = /(\w+)\s*=\s*(?:"([^"]*)"|'([^']*)')/g;
const out: Record<string, string> = {};

View file

@ -2,6 +2,13 @@ import { describe, expect, it } from 'vitest';
import { formatFormAnswers, splitOnQuestionForms } from '../../src/artifacts/question-form';
const VALID_BODY = `{
"questions": [
{ "id": "platform", "label": "Platform", "type": "radio",
"options": ["Mobile", "Desktop", "Responsive"], "required": true }
]
}`;
describe('splitOnQuestionForms', () => {
it('normalizes string and object question options', () => {
const input = [
@ -67,4 +74,52 @@ describe('splitOnQuestionForms', () => {
expect(text).toContain('- Primary surface: Mobile (iOS/Android) [value: mobile]');
});
it('parses the canonical <question-form> tag', () => {
const out = splitOnQuestionForms(`prose\n<question-form id="d" title="T">${VALID_BODY}</question-form>\nmore`);
expect(out.map((s) => s.kind)).toEqual(['text', 'form', 'text']);
if (out[1]?.kind === 'form') {
expect(out[1].form.id).toBe('d');
expect(out[1].form.questions).toHaveLength(1);
}
});
it('accepts <ask-question> as an alias for <question-form> (#1194)', () => {
const out = splitOnQuestionForms(`<ask-question id="brief" title="Quick brief">${VALID_BODY}</ask-question>`);
expect(out.map((s) => s.kind)).toEqual(['form']);
if (out[0]?.kind === 'form') {
expect(out[0].form.id).toBe('brief');
expect(out[0].form.title).toBe('Quick brief');
expect(out[0].form.questions[0]?.id).toBe('platform');
}
});
it('handles mixed casing on the alias (e.g. <Ask-Question>)', () => {
const out = splitOnQuestionForms(`<Ask-Question>${VALID_BODY}</Ask-Question>`);
expect(out.map((s) => s.kind)).toEqual(['form']);
});
it('does not close one tag with the other tag name', () => {
const out = splitOnQuestionForms(`<question-form>${VALID_BODY}</ask-question>`);
expect(out.map((s) => s.kind)).toEqual(['text']);
});
it('keeps malformed JSON bodies as raw text', () => {
const out = splitOnQuestionForms(`<ask-question>not json</ask-question>`);
expect(out.map((s) => s.kind)).toEqual(['text']);
});
it('keeps unterminated tags as prose without swallowing trailing text', () => {
const out = splitOnQuestionForms(`leading <ask-question>${VALID_BODY}`);
expect(out).toHaveLength(1);
expect(out[0]).toMatchObject({ kind: 'text' });
});
it('finds close tags without Unicode index desync (#1194)', () => {
const out = splitOnQuestionForms(`prefix İ suffix<ask-question id="x">${VALID_BODY}</ask-question>`);
expect(out.map((s) => s.kind)).toEqual(['text', 'form']);
if (out[1]?.kind === 'form') {
expect(out[1].form.id).toBe('x');
}
});
});