mirror of
https://github.com/nexu-io/open-design.git
synced 2026-06-01 03:14:35 +07:00
Accept <ask-question> as an alias for <question-form> and locate close tags with a Unicode-safe scan so Turkish dotted-I prose before the tag does not desync parser indices.
This commit is contained in:
parent
d0981a28b4
commit
9641c9e11c
2 changed files with 89 additions and 10 deletions
|
|
@ -17,6 +17,11 @@
|
|||
* }
|
||||
* </question-form>
|
||||
*
|
||||
* `<ask-question>...</ask-question>` is accepted as an alias for
|
||||
* `<question-form>`, so a model that drifts to the colloquial tag
|
||||
* name still renders correctly instead of leaking raw markup into
|
||||
* prose (see issue #1194).
|
||||
*
|
||||
* Splits a final assistant text payload into ordered segments — prose +
|
||||
* forms — so AssistantMessage can render the form inline.
|
||||
*/
|
||||
|
|
@ -85,15 +90,19 @@ export type FormSegment =
|
|||
| { kind: 'text'; text: string }
|
||||
| { kind: 'form'; form: QuestionForm; raw: string };
|
||||
|
||||
const OPEN_RE = /<question-form\b([^>]*)>/i;
|
||||
const CLOSE_TAG = '</question-form>';
|
||||
// `question-form` is the canonical tag; `ask-question` is an alias the
|
||||
// model occasionally drifts to (issue #1194). The close tag must match
|
||||
// the open tag name, so each match captures the name and computes its
|
||||
// own close-tag string. Treat the lookup case-insensitively at scan
|
||||
// time so `<Question-Form>` and `<ASK-QUESTION>` still parse.
|
||||
const OPEN_RE = /<(question-form|ask-question)\b([^>]*)>/i;
|
||||
|
||||
export function splitOnQuestionForms(input: string): FormSegment[] {
|
||||
const out: FormSegment[] = [];
|
||||
let cursor = 0;
|
||||
// Scan repeatedly for <question-form> opens; for each, locate the
|
||||
// matching close tag and try to parse the JSON body. Anything that
|
||||
// doesn't parse cleanly stays in the prose stream.
|
||||
// Scan repeatedly for question-form / ask-question opens; for each,
|
||||
// locate the matching close tag and try to parse the JSON body.
|
||||
// Anything that doesn't parse cleanly stays in the prose stream.
|
||||
while (cursor < input.length) {
|
||||
const slice = input.slice(cursor);
|
||||
const m = OPEN_RE.exec(slice);
|
||||
|
|
@ -101,9 +110,11 @@ export function splitOnQuestionForms(input: string): FormSegment[] {
|
|||
out.push({ kind: 'text', text: slice });
|
||||
break;
|
||||
}
|
||||
const tagName = (m[1] ?? 'question-form').toLowerCase();
|
||||
const closeTag = `</${tagName}>`;
|
||||
const openStart = cursor + m.index;
|
||||
const openEnd = openStart + m[0].length;
|
||||
const closeIdx = input.indexOf(CLOSE_TAG, openEnd);
|
||||
const closeIdx = findCloseTag(input, openEnd, closeTag);
|
||||
if (closeIdx === -1) {
|
||||
// Unterminated — leave the rest as prose so we don't swallow it.
|
||||
out.push({ kind: 'text', text: slice });
|
||||
|
|
@ -113,19 +124,32 @@ export function splitOnQuestionForms(input: string): FormSegment[] {
|
|||
out.push({ kind: 'text', text: input.slice(cursor, openStart) });
|
||||
}
|
||||
const body = input.slice(openEnd, closeIdx);
|
||||
const attrs = parseAttrs(m[1] ?? '');
|
||||
const attrs = parseAttrs(m[2] ?? '');
|
||||
const form = tryParseForm(body, attrs);
|
||||
const blockEnd = closeIdx + closeTag.length;
|
||||
if (form) {
|
||||
out.push({ kind: 'form', form, raw: input.slice(openStart, closeIdx + CLOSE_TAG.length) });
|
||||
out.push({ kind: 'form', form, raw: input.slice(openStart, blockEnd) });
|
||||
} else {
|
||||
// Malformed — keep raw text so the user can still see it.
|
||||
out.push({ kind: 'text', text: input.slice(openStart, closeIdx + CLOSE_TAG.length) });
|
||||
out.push({ kind: 'text', text: input.slice(openStart, blockEnd) });
|
||||
}
|
||||
cursor = closeIdx + CLOSE_TAG.length;
|
||||
cursor = blockEnd;
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
function findCloseTag(input: string, from: number, closeTag: string): number {
|
||||
const closeLower = closeTag.toLowerCase();
|
||||
const tagLen = closeTag.length;
|
||||
const maxStart = input.length - tagLen;
|
||||
for (let i = from; i <= maxStart; i++) {
|
||||
if (input.slice(i, i + tagLen).toLowerCase() === closeLower) {
|
||||
return i;
|
||||
}
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
function parseAttrs(raw: string): Record<string, string> {
|
||||
const re = /(\w+)\s*=\s*(?:"([^"]*)"|'([^']*)')/g;
|
||||
const out: Record<string, string> = {};
|
||||
|
|
|
|||
|
|
@ -2,6 +2,13 @@ import { describe, expect, it } from 'vitest';
|
|||
|
||||
import { formatFormAnswers, splitOnQuestionForms } from '../../src/artifacts/question-form';
|
||||
|
||||
const VALID_BODY = `{
|
||||
"questions": [
|
||||
{ "id": "platform", "label": "Platform", "type": "radio",
|
||||
"options": ["Mobile", "Desktop", "Responsive"], "required": true }
|
||||
]
|
||||
}`;
|
||||
|
||||
describe('splitOnQuestionForms', () => {
|
||||
it('normalizes string and object question options', () => {
|
||||
const input = [
|
||||
|
|
@ -67,4 +74,52 @@ describe('splitOnQuestionForms', () => {
|
|||
|
||||
expect(text).toContain('- Primary surface: Mobile (iOS/Android) [value: mobile]');
|
||||
});
|
||||
|
||||
it('parses the canonical <question-form> tag', () => {
|
||||
const out = splitOnQuestionForms(`prose\n<question-form id="d" title="T">${VALID_BODY}</question-form>\nmore`);
|
||||
expect(out.map((s) => s.kind)).toEqual(['text', 'form', 'text']);
|
||||
if (out[1]?.kind === 'form') {
|
||||
expect(out[1].form.id).toBe('d');
|
||||
expect(out[1].form.questions).toHaveLength(1);
|
||||
}
|
||||
});
|
||||
|
||||
it('accepts <ask-question> as an alias for <question-form> (#1194)', () => {
|
||||
const out = splitOnQuestionForms(`<ask-question id="brief" title="Quick brief">${VALID_BODY}</ask-question>`);
|
||||
expect(out.map((s) => s.kind)).toEqual(['form']);
|
||||
if (out[0]?.kind === 'form') {
|
||||
expect(out[0].form.id).toBe('brief');
|
||||
expect(out[0].form.title).toBe('Quick brief');
|
||||
expect(out[0].form.questions[0]?.id).toBe('platform');
|
||||
}
|
||||
});
|
||||
|
||||
it('handles mixed casing on the alias (e.g. <Ask-Question>)', () => {
|
||||
const out = splitOnQuestionForms(`<Ask-Question>${VALID_BODY}</Ask-Question>`);
|
||||
expect(out.map((s) => s.kind)).toEqual(['form']);
|
||||
});
|
||||
|
||||
it('does not close one tag with the other tag name', () => {
|
||||
const out = splitOnQuestionForms(`<question-form>${VALID_BODY}</ask-question>`);
|
||||
expect(out.map((s) => s.kind)).toEqual(['text']);
|
||||
});
|
||||
|
||||
it('keeps malformed JSON bodies as raw text', () => {
|
||||
const out = splitOnQuestionForms(`<ask-question>not json</ask-question>`);
|
||||
expect(out.map((s) => s.kind)).toEqual(['text']);
|
||||
});
|
||||
|
||||
it('keeps unterminated tags as prose without swallowing trailing text', () => {
|
||||
const out = splitOnQuestionForms(`leading <ask-question>${VALID_BODY}`);
|
||||
expect(out).toHaveLength(1);
|
||||
expect(out[0]).toMatchObject({ kind: 'text' });
|
||||
});
|
||||
|
||||
it('finds close tags without Unicode index desync (#1194)', () => {
|
||||
const out = splitOnQuestionForms(`prefix İ suffix<ask-question id="x">${VALID_BODY}</ask-question>`);
|
||||
expect(out.map((s) => s.kind)).toEqual(['text', 'form']);
|
||||
if (out[1]?.kind === 'form') {
|
||||
expect(out[1].form.id).toBe('x');
|
||||
}
|
||||
});
|
||||
});
|
||||
|
|
|
|||
Loading…
Reference in a new issue