From cb2ef653b2096b6a45a3387bad9b9cd1ab274bc1 Mon Sep 17 00:00:00 2001 From: opoojkk <33021407+opoojkk@users.noreply.github.com> Date: Mon, 25 May 2026 17:51:54 +0800 Subject: [PATCH 1/3] feat: add html-to-design-spec skill --- skills/html-to-design-spec/SKILL.md | 88 ++++++++ skills/html-to-design-spec/agents/openai.yaml | 4 + .../references/output-schemas.md | 194 ++++++++++++++++++ .../scripts/create_run_dir.py | 61 ++++++ 4 files changed, 347 insertions(+) create mode 100644 skills/html-to-design-spec/SKILL.md create mode 100644 skills/html-to-design-spec/agents/openai.yaml create mode 100644 skills/html-to-design-spec/references/output-schemas.md create mode 100755 skills/html-to-design-spec/scripts/create_run_dir.py diff --git a/skills/html-to-design-spec/SKILL.md b/skills/html-to-design-spec/SKILL.md new file mode 100644 index 000000000..199be05b4 --- /dev/null +++ b/skills/html-to-design-spec/SKILL.md @@ -0,0 +1,88 @@ +--- +name: html-to-design-spec +description: Convert Open Design prototypes, HTML design drafts, and generated web prototypes from tools like Lovable, v0, Bolt, or similar into design specifications and reconstruction-ready product intelligence for AI coding agents. Use when Codex needs to turn HTML or an interactive prototype into design specs, UI flow maps, visual style tokens, layout/component analysis, screenshots, route/modal/drawer/tab/dropdown detection, and assets such as pages.json, flows.json, graph.json, style.json, screenshots, prompts, and specs. +--- + +# HTML to Design Spec + +## Overview + +Turn an interactive web prototype or HTML design draft into design specifications and reconstruction-ready product intelligence. Explore the prototype with available browser or MCP tools, deduplicate UI states, capture evidence, and write outputs that preserve both interaction flow and visual style for reconstruction. + +## Output Location + +Create one result directory per analysis run inside the project being analyzed. + +- If the target is an HTML file, name the result directory from the file stem plus a timestamp, for example `index-20260524-143012`. +- If the target is a running app or route, name it from the app/folder/route plus a timestamp. +- Put these required outputs in that directory: `pages.json`, `flows.json`, `graph.json`, `style.json`, `screenshots/`, `prompts/`, and `specs/`. +- Prefer `scripts/create_run_dir.py` to create the directory skeleton. + +## Workflow + +1. Identify the prototype target. + - Accept local HTML files, local dev server URLs, deployed preview URLs, or a project directory with an obvious app entry. + - If a server is needed and not running, start it using the project's existing scripts. + - Record target URL, viewport sizes, timestamp, and tool/browser used. + +2. Discover navigation surface. + - Enumerate visible links, buttons, menus, nav items, tablists, form controls, route-like anchors, and clickable elements. + - Inspect router hints when available, such as `href`, framework route files, app menus, and client-side navigation state. + - Build an exploration queue of candidate interactions. + +3. Explore UI states. + - Visit each discovered page or route. + - Click meaningful controls to reveal modals, drawers, dropdowns, tabs, toasts, empty states, loading states, and form flows. + - For forms, try safe representative inputs only; do not submit destructive actions or external purchases. + - Stop paths that repeat an already-seen state or exceed a reasonable depth. + +4. Classify and deduplicate states. + - Classify every captured state as one of: `page`, `modal`, `drawer`, `tab`, `dropdown`, `toast`, `loading`, `empty`. + - Deduplicate states by normalized URL, visible text signature, role/ARIA structure, major bounding boxes, and screenshot similarity when available. + - Ignore hover-only states unless hover exposes a persistent menu or meaningful content. + - Normalize dynamic values such as dates, randomized IDs, avatars, counters, generated names, and timestamps. + +5. Capture evidence. + - Capture screenshots for full pages, modal/drawer/dropdown/tab states, notable component states, empty/loading states, and responsive layouts. + - Use at least desktop and mobile viewports unless the user narrows scope. + - Extract DOM structure, bounding boxes, computed styles, layout hierarchy, landmarks, accessible names, and component candidates for each important state. + +6. Extract visual style intelligence. + - Treat style reconstruction as a first-class goal, equal to flow reconstruction. + - Capture design tokens: color palette, typography, spacing scale, radii, shadows, borders, opacity, blur, elevation, icon style, imagery treatment, and motion/transition behavior. + - Preserve literal color values from source HTML, inline styles, CSS variables, stylesheets, and computed styles. When a color can be read as an explicit value, record that exact value in `style.json` and reconstruction prompts, and implement the exact value unless the target platform cannot represent it. + - Do not replace explicit source colors with visual approximations, blended results, semantic theme colors, or inferred glass/overlay composites. If both source and composited visual colors matter, record both and label which one is the implementation source value. + - Before implementation or prompt generation, classify every background token as exactly one of: `app-background`, `content-surface`, `interactive-surface`, `status-surface`, or `chrome-surface`. `style.json` must include this classification plus `sourceCssValue`, `computedValue`, and `nativeImplementationValue` for each background token. Use `nativeImplementationValue` when rebuilding native UI. + - Treat repeated surface roles as shared tokens across pages. Content sections, cards, lists, metric panels, search fields, and detail containers that represent the same `content-surface` role must share the same native implementation token. Do not let separate pages infer separate fills for the same role. + - Reserve native blur/material effects for `chrome-surface` roles such as navigation bars, tab bars, sheets, overlays, and browser/device chrome. Do not add native material or blur overlays to `content-surface` roles unless the prototype explicitly depends on translucent background sampling and the recorded `nativeImplementationValue` preserves the measured fill. + - For each major component, record size, density, alignment, grid/flex behavior, responsive changes, visual variants, states, and exact computed CSS values when available. + - Prefer semantic style descriptions backed by sampled computed values and screenshots; do not rely on generated utility class names alone. + - Compare screenshots across viewports to document layout breakpoints and mobile-specific styling. + +7. Build product intelligence. + - Write `pages.json` for page/state inventory. + - Write `flows.json` for user flows and interaction transitions. + - Write `graph.json` where nodes are UI states and edges are interactions. + - Write `style.json` for design tokens, component styling, responsive styling, and visual references. + - Write one Markdown spec per major page or flow in `specs/`. + - Write AI reconstruction prompts in `prompts/`, scoped by page, flow, and component family. + +See `references/output-schemas.md` for required JSON shapes and spec expectations. + +## Exploration Rules + +- Avoid infinite loops: cap interaction depth, track visited state signatures, and stop repeated transitions. +- Prefer semantic grouping over raw DOM depth when describing components. +- Treat visual evidence as source of truth when DOM names are generic or generated. +- Do not overfit to generated CSS class names from prototype tools. +- Preserve product meaning and presentation: labels, information architecture, hierarchy, affordances, validation behavior, visual style, spacing, typography, color, component density, and responsive behavior matter more than exact implementation details. +- Keep screenshots and JSON references stable with relative paths from the result directory. + +## Reconstruction Prompts + +Generate prompts that a coding agent can act on directly. + +- Include product goal, page role, responsive layout, component hierarchy, key interactions, states, data assumptions, and concrete visual style requirements. +- Reference screenshots and spec files by relative path. +- Avoid asking the implementation agent to inspect the original prototype unless explicitly allowed. +- Separate page prompts from shared component prompts when components repeat across pages. diff --git a/skills/html-to-design-spec/agents/openai.yaml b/skills/html-to-design-spec/agents/openai.yaml new file mode 100644 index 000000000..c946f6b00 --- /dev/null +++ b/skills/html-to-design-spec/agents/openai.yaml @@ -0,0 +1,4 @@ +interface: + display_name: "HTML to Design Spec" + short_description: "Convert HTML prototypes into design specs" + default_prompt: "Use $html-to-design-spec to convert this Open Design or HTML prototype into design specs and reconstruction assets." diff --git a/skills/html-to-design-spec/references/output-schemas.md b/skills/html-to-design-spec/references/output-schemas.md new file mode 100644 index 000000000..9fd6e010b --- /dev/null +++ b/skills/html-to-design-spec/references/output-schemas.md @@ -0,0 +1,194 @@ +# Prototype Intelligence Output Schemas + +Use these shapes as the minimum contract. Add fields when useful, but keep IDs stable across files. + +## Directory + +```text +/ +├── pages.json +├── flows.json +├── graph.json +├── style.json +├── screenshots/ +├── prompts/ +└── specs/ +``` + +## pages.json + +```json +{ + "metadata": { + "target": "http://localhost:3000", + "capturedAt": "2026-05-24T14:30:12+08:00", + "viewports": [ + { "name": "desktop", "width": 1440, "height": 1000 }, + { "name": "mobile", "width": 390, "height": 844 } + ], + "tooling": ["browser-mcp"] + }, + "pages": [ + { + "id": "page-home", + "type": "page", + "name": "Home", + "url": "/", + "routePattern": "/", + "summary": "Primary dashboard for ...", + "screenshots": ["screenshots/page-home-desktop.png"], + "states": ["state-home-empty", "state-home-loading"], + "layout": { + "landmarks": ["header", "main", "nav"], + "hierarchy": [], + "componentCandidates": [] + } + } + ], + "states": [ + { + "id": "state-settings-modal", + "type": "modal", + "parentPageId": "page-home", + "name": "Settings modal", + "trigger": "Click Settings", + "url": "/", + "screenshots": ["screenshots/state-settings-modal-desktop.png"], + "domSignature": "dialog:Settings|button:Save|button:Cancel", + "boundingBoxes": [], + "computedStyleSummary": { + "colors": [], + "typography": [], + "spacing": [] + }, + "content": { + "headings": [], + "labels": [], + "emptyState": null, + "loadingState": null + } + } + ] +} +``` + +## style.json + +```json +{ + "designTokens": { + "colors": [ + { "name": "primary", "value": "#2563eb", "usage": "Primary actions and active states" } + ], + "typography": [ + { "role": "heading-1", "fontFamily": "Inter", "fontSize": "32px", "fontWeight": 700, "lineHeight": "40px" } + ], + "spacing": ["4px", "8px", "12px", "16px"], + "radii": ["4px", "8px"], + "shadows": [], + "borders": [] + }, + "componentStyles": [ + { + "component": "Primary button", + "states": ["default", "hover", "disabled"], + "computedValues": { + "height": "40px", + "padding": "0 16px", + "background": "#2563eb", + "borderRadius": "8px" + }, + "screenshots": ["screenshots/component-primary-button.png"] + } + ], + "responsiveStyle": [ + { + "viewport": "mobile", + "changes": ["Navigation collapses into bottom bar", "Cards become single column"] + } + ], + "visualReferences": [ + { "stateId": "page-home", "screenshot": "screenshots/page-home-desktop.png" } + ] +} +``` + +## flows.json + +```json +{ + "flows": [ + { + "id": "flow-open-settings", + "name": "Open settings", + "summary": "User opens settings from the dashboard.", + "startStateId": "page-home", + "endStateId": "state-settings-modal", + "steps": [ + { + "from": "page-home", + "action": { + "type": "click", + "target": "Settings button", + "selectorHint": "button[aria-label='Settings']" + }, + "to": "state-settings-modal", + "observedResult": "Settings dialog appears over dashboard." + } + ] + } + ] +} +``` + +## graph.json + +```json +{ + "nodes": [ + { + "id": "page-home", + "type": "page", + "label": "Home", + "url": "/", + "screenshot": "screenshots/page-home-desktop.png" + } + ], + "edges": [ + { + "id": "edge-home-settings", + "from": "page-home", + "to": "state-settings-modal", + "interaction": "click", + "target": "Settings button", + "guard": null + } + ] +} +``` + +## Specs + +Create one Markdown file per major page, flow, or component family. Include: + +- Purpose and user intent +- Layout hierarchy +- Component inventory +- Interaction behavior +- State variants +- Responsive differences +- Visual style tokens inferred from computed styles and screenshots +- Component-level styling: colors, typography, spacing, radii, borders, shadows, density, and responsive variants +- Implementation notes and assumptions + +## Prompts + +Create implementation prompts that are standalone. Each prompt should include: + +- Target page/component/flow +- Source screenshots and spec references +- Required behavior +- Required visual style, including concrete design tokens from `style.json` +- Responsive requirements +- Data/model assumptions +- Explicit exclusions for prototype-only artifacts or generated class names diff --git a/skills/html-to-design-spec/scripts/create_run_dir.py b/skills/html-to-design-spec/scripts/create_run_dir.py new file mode 100755 index 000000000..0d5107a41 --- /dev/null +++ b/skills/html-to-design-spec/scripts/create_run_dir.py @@ -0,0 +1,61 @@ +#!/usr/bin/env python3 +"""Create a Prototype Intelligence run directory skeleton.""" + +from __future__ import annotations + +import argparse +import json +import re +from datetime import datetime +from pathlib import Path + + +def slugify(value: str) -> str: + value = value.strip().lower() + value = re.sub(r"\.[a-z0-9]+$", "", value) + value = re.sub(r"[^a-z0-9]+", "-", value) + value = value.strip("-") + return value or "prototype" + + +def main() -> int: + parser = argparse.ArgumentParser() + parser.add_argument("project_dir", help="Directory where the analysis result should be created.") + parser.add_argument("--name", help="HTML file stem, route, app name, or other run label.") + parser.add_argument("--target", help="Prototype URL or file path being analyzed.") + parser.add_argument("--timestamp", help="Timestamp suffix. Defaults to local YYYYMMDD-HHMMSS.") + args = parser.parse_args() + + project_dir = Path(args.project_dir).expanduser().resolve() + timestamp = args.timestamp or datetime.now().strftime("%Y%m%d-%H%M%S") + base_name = args.name or (Path(args.target).name if args.target else project_dir.name) + run_dir = project_dir / f"{slugify(base_name)}-{timestamp}" + + run_dir.mkdir(parents=True, exist_ok=False) + for child in ("screenshots", "prompts", "specs"): + (run_dir / child).mkdir() + + (run_dir / "pages.json").write_text( + json.dumps({"metadata": {"target": args.target}, "pages": [], "states": []}, indent=2) + "\n", + encoding="utf-8", + ) + (run_dir / "flows.json").write_text(json.dumps({"flows": []}, indent=2) + "\n", encoding="utf-8") + (run_dir / "graph.json").write_text(json.dumps({"nodes": [], "edges": []}, indent=2) + "\n", encoding="utf-8") + (run_dir / "style.json").write_text( + json.dumps({"designTokens": {}, "componentStyles": [], "responsiveStyle": [], "visualReferences": []}, indent=2) + "\n", + encoding="utf-8", + ) + + metadata = { + "target": args.target, + "createdAt": timestamp, + "runDirectory": str(run_dir), + "requiredOutputs": ["pages.json", "flows.json", "graph.json", "style.json", "screenshots/", "prompts/", "specs/"], + } + (run_dir / "manifest.json").write_text(json.dumps(metadata, indent=2) + "\n", encoding="utf-8") + print(run_dir) + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) From 14768df9cbd8c87c27bed0c3f5e57f44e166adf9 Mon Sep 17 00:00:00 2001 From: opoojkk <33021407+opoojkk@users.noreply.github.com> Date: Thu, 28 May 2026 18:47:35 +0800 Subject: [PATCH 2/3] feat: add html-to-design-spec skill --- skills/html-to-design-spec/SKILL.md | 61 ++-- skills/html-to-design-spec/agents/openai.yaml | 2 +- .../references/output-schemas.md | 281 ++++++++++++++++-- .../scripts/create_run_dir.py | 77 ++++- 4 files changed, 362 insertions(+), 59 deletions(-) diff --git a/skills/html-to-design-spec/SKILL.md b/skills/html-to-design-spec/SKILL.md index 199be05b4..9cecdb9e9 100644 --- a/skills/html-to-design-spec/SKILL.md +++ b/skills/html-to-design-spec/SKILL.md @@ -1,6 +1,6 @@ --- name: html-to-design-spec -description: Convert Open Design prototypes, HTML design drafts, and generated web prototypes from tools like Lovable, v0, Bolt, or similar into design specifications and reconstruction-ready product intelligence for AI coding agents. Use when Codex needs to turn HTML or an interactive prototype into design specs, UI flow maps, visual style tokens, layout/component analysis, screenshots, route/modal/drawer/tab/dropdown detection, and assets such as pages.json, flows.json, graph.json, style.json, screenshots, prompts, and specs. +description: Convert HTML prototypes into state-linked design specs, screenshots, style tokens, flow maps, and reconstruction prompts. --- # HTML to Design Spec @@ -9,14 +9,25 @@ description: Convert Open Design prototypes, HTML design drafts, and generated w Turn an interactive web prototype or HTML design draft into design specifications and reconstruction-ready product intelligence. Explore the prototype with available browser or MCP tools, deduplicate UI states, capture evidence, and write outputs that preserve both interaction flow and visual style for reconstruction. +Treat every captured UI condition as a state with stable IDs. Pages, flows, graphs, screenshots, style tokens, specs, and prompts should all reference those state IDs so the handoff remains traceable. + ## Output Location Create one result directory per analysis run inside the project being analyzed. - If the target is an HTML file, name the result directory from the file stem plus a timestamp, for example `index-20260524-143012`. - If the target is a running app or route, name it from the app/folder/route plus a timestamp. -- Put these required outputs in that directory: `pages.json`, `flows.json`, `graph.json`, `style.json`, `screenshots/`, `prompts/`, and `specs/`. -- Prefer `scripts/create_run_dir.py` to create the directory skeleton. +- Default required outputs are `pages.json`, `style.json`, and `screenshots/`. Generate `flows.json`, `graph.json`, `prompts/`, and `specs/` when the prototype has multiple routes, meaningful interaction flows, repeated component families, or the user asks for reconstruction-ready output. +- Prefer `scripts/create_run_dir.py` to create the directory skeleton. The helper creates placeholders for both default and conditional outputs so later analysis can fill them when needed, and writes `manifest.json` with run metadata. +- Placeholder files or directories created by the helper do not count as generated analysis. Before finishing, update `manifest.json` so `generatedOutputs`, `placeholderOutputs`, and `omittedOutputs` accurately describe the final run. +- If the user supplies runtime constraints in the prompt or CLI invocation, such as `-p "..."`, follow them for this run and record them in `manifest.json`. Do not turn run-specific constraints into permanent skill rules. + +## Data Contract + +- Treat `pages.json` as the canonical source for UI state IDs, page IDs, state evidence, observations, discovered actions, and dedupe records. +- Treat `manifest.json` as run metadata and output status. If metadata appears in both `manifest.json` and `pages.json.metadata`, keep `target`, `viewports`, `tooling`, and runtime constraints consistent before finishing. +- `flows.json`, `graph.json`, `style.json`, `specs/`, and `prompts/` must only reference state IDs that exist in `pages.json.states[].id`. +- Record discovered but uncaptured interactions, unsafe actions, failed actions, blocked routes, and repeated candidates in `pages.json` observations, `discoveredActions`, or `dedupeLog` instead of dropping them. ## Workflow @@ -28,50 +39,64 @@ Create one result directory per analysis run inside the project being analyzed. 2. Discover navigation surface. - Enumerate visible links, buttons, menus, nav items, tablists, form controls, route-like anchors, and clickable elements. - Inspect router hints when available, such as `href`, framework route files, app menus, and client-side navigation state. - - Build an exploration queue of candidate interactions. + - Build an exploration queue of candidate interactions. Track action type, target label, selector hint, current state, preconditions, safety notes, and expected or observed result. + - Record actions that are skipped, unsafe, blocked, or outside scope in `pages.json` so the reconstruction handoff can distinguish unobserved behavior from missing behavior. 3. Explore UI states. - Visit each discovered page or route. - Click meaningful controls to reveal modals, drawers, dropdowns, tabs, toasts, empty states, loading states, and form flows. - - For forms, try safe representative inputs only; do not submit destructive actions or external purchases. - - Stop paths that repeat an already-seen state or exceed a reasonable depth. + - For forms, try safe representative inputs only. Do not trigger real login, payment, purchase, delete, email/message send, external API mutation, or file upload actions unless the user explicitly confirms that the target is mock/local and non-destructive. + - Stop paths that repeat an already-seen state or exceed the requested scope. If no scope is supplied, use bounded exploration that captures representative states without attempting exhaustive coverage. 4. Classify and deduplicate states. - Classify every captured state as one of: `page`, `modal`, `drawer`, `tab`, `dropdown`, `toast`, `loading`, `empty`. - - Deduplicate states by normalized URL, visible text signature, role/ARIA structure, major bounding boxes, and screenshot similarity when available. + - Deduplicate states by normalized URL, visible text signature, role/ARIA structure, major bounding boxes, and screenshot similarity when available. Record the state signature and dedupe reason when a candidate is skipped. - Ignore hover-only states unless hover exposes a persistent menu or meaningful content. - Normalize dynamic values such as dates, randomized IDs, avatars, counters, generated names, and timestamps. 5. Capture evidence. - Capture screenshots for full pages, modal/drawer/dropdown/tab states, notable component states, empty/loading states, and responsive layouts. - Use at least desktop and mobile viewports unless the user narrows scope. + - Treat viewport captures as evidence variants for the same state unless the UI meaning or available actions change. - Extract DOM structure, bounding boxes, computed styles, layout hierarchy, landmarks, accessible names, and component candidates for each important state. + - Preserve evidence links between screenshots, element or component IDs, selector hints, bounding boxes, source CSS values, computed styles, and sampled visual values. + - Record inaccessible, broken, empty, blocked, or non-responsive routes/actions as observations instead of silently dropping them. 6. Extract visual style intelligence. - Treat style reconstruction as a first-class goal, equal to flow reconstruction. - Capture design tokens: color palette, typography, spacing scale, radii, shadows, borders, opacity, blur, elevation, icon style, imagery treatment, and motion/transition behavior. - - Preserve literal color values from source HTML, inline styles, CSS variables, stylesheets, and computed styles. When a color can be read as an explicit value, record that exact value in `style.json` and reconstruction prompts, and implement the exact value unless the target platform cannot represent it. - - Do not replace explicit source colors with visual approximations, blended results, semantic theme colors, or inferred glass/overlay composites. If both source and composited visual colors matter, record both and label which one is the implementation source value. - - Before implementation or prompt generation, classify every background token as exactly one of: `app-background`, `content-surface`, `interactive-surface`, `status-surface`, or `chrome-surface`. `style.json` must include this classification plus `sourceCssValue`, `computedValue`, and `nativeImplementationValue` for each background token. Use `nativeImplementationValue` when rebuilding native UI. - - Treat repeated surface roles as shared tokens across pages. Content sections, cards, lists, metric panels, search fields, and detail containers that represent the same `content-surface` role must share the same native implementation token. Do not let separate pages infer separate fills for the same role. - - Reserve native blur/material effects for `chrome-surface` roles such as navigation bars, tab bars, sheets, overlays, and browser/device chrome. Do not add native material or blur overlays to `content-surface` roles unless the prototype explicitly depends on translucent background sampling and the recorded `nativeImplementationValue` preserves the measured fill. + - Preserve literal color values from source HTML, inline styles, CSS variables, stylesheets, and computed styles when they can be read. Record explicit source values before visual approximations, and add screenshot-sampled or composited values only as supporting evidence. + - Label style values by source type: `source-css`, `computed-style`, `screenshot-sample`, or `inferred`. Include source state IDs, viewport, selector/component hints, and confidence when available. + - Do not replace explicit source colors with visual approximations, blended results, semantic theme colors, or inferred glass/overlay composites. If both source and composited visual colors matter, record both and label which one is the implementation source value. If a value cannot be resolved because of CSS variables, filters, blend modes, canvas rendering, or image backgrounds, record the uncertainty instead of inventing precision. + - For major reusable background tokens, classify the role where clear: `app-background`, `content-surface`, `interactive-surface`, `status-surface`, or `chrome-surface`. If a token is decorative, mixed, image-based, or ambiguous, record `decorative-background`, `mixed`, or `unknown` instead of forcing a role. Include `sourceCssValue`, `computedValue`, `visualSampleValue`, `implementationValue`, and notes when available. + - Treat repeated surface roles as shared tokens across pages when evidence supports the match. Content sections, cards, lists, metric panels, search fields, and detail containers that represent the same `content-surface` role should share the same implementation token. + - When the reconstruction target is native UI, reserve native blur/material effects for `chrome-surface` roles such as navigation bars, tab bars, sheets, overlays, and browser/device chrome. Do not add native material or blur overlays to `content-surface` roles unless the prototype explicitly depends on translucent background sampling and the recorded implementation value preserves the measured fill. - For each major component, record size, density, alignment, grid/flex behavior, responsive changes, visual variants, states, and exact computed CSS values when available. + - Describe reusable structure as a hierarchy: page, region or section, component, element. Keep component names semantic even when source class names are generated. - Prefer semantic style descriptions backed by sampled computed values and screenshots; do not rely on generated utility class names alone. - Compare screenshots across viewports to document layout breakpoints and mobile-specific styling. 7. Build product intelligence. - Write `pages.json` for page/state inventory. - - Write `flows.json` for user flows and interaction transitions. - - Write `graph.json` where nodes are UI states and edges are interactions. + - Write `flows.json` for user flows and interaction transitions when the prototype has meaningful flows. + - Write `graph.json` where nodes are UI states and edges are interactions when graph structure helps reconstruction. - Write `style.json` for design tokens, component styling, responsive styling, and visual references. - - Write one Markdown spec per major page or flow in `specs/`. - - Write AI reconstruction prompts in `prompts/`, scoped by page, flow, and component family. + - Write one Markdown spec per major page or flow in `specs/` when reconstruction detail is needed. + - Write AI reconstruction prompts in `prompts/`, scoped by page, flow, and component family, when the user needs implementation-ready handoff. + - Prompts must be self-contained and should use only evidence from the run directory unless the user explicitly allows inspecting the original prototype again. -See `references/output-schemas.md` for required JSON shapes and spec expectations. +8. Validate the handoff. + - Confirm JSON files parse successfully. + - Confirm screenshot, spec, and prompt references use relative paths from the result directory and point to existing files. + - Confirm every flow step, graph edge, style token, spec, and prompt state reference points to an existing `pages.json.states[].id`. + - Confirm optional placeholder outputs are either filled or marked in `manifest.json` as placeholders or omitted for this run. + +See `references/output-schemas.md` for JSON shapes, optional output rules, and spec expectations. ## Exploration Rules - Avoid infinite loops: cap interaction depth, track visited state signatures, and stop repeated transitions. +- Follow runtime constraints from the user prompt or `-p` flags for scope, outputs, viewports, and exclusions. - Prefer semantic grouping over raw DOM depth when describing components. - Treat visual evidence as source of truth when DOM names are generic or generated. - Do not overfit to generated CSS class names from prototype tools. @@ -80,7 +105,7 @@ See `references/output-schemas.md` for required JSON shapes and spec expectation ## Reconstruction Prompts -Generate prompts that a coding agent can act on directly. +When implementation-ready handoff is needed, generate prompts that a coding agent can act on directly. - Include product goal, page role, responsive layout, component hierarchy, key interactions, states, data assumptions, and concrete visual style requirements. - Reference screenshots and spec files by relative path. diff --git a/skills/html-to-design-spec/agents/openai.yaml b/skills/html-to-design-spec/agents/openai.yaml index c946f6b00..a5af7aba0 100644 --- a/skills/html-to-design-spec/agents/openai.yaml +++ b/skills/html-to-design-spec/agents/openai.yaml @@ -1,4 +1,4 @@ interface: display_name: "HTML to Design Spec" short_description: "Convert HTML prototypes into design specs" - default_prompt: "Use $html-to-design-spec to convert this Open Design or HTML prototype into design specs and reconstruction assets." + default_prompt: "Use $html-to-design-spec to convert this Open Design or HTML prototype into pages.json, style.json, screenshots/, and optional flows.json, graph.json, prompts/, and specs/ when needed." diff --git a/skills/html-to-design-spec/references/output-schemas.md b/skills/html-to-design-spec/references/output-schemas.md index 9fd6e010b..8c51dd694 100644 --- a/skills/html-to-design-spec/references/output-schemas.md +++ b/skills/html-to-design-spec/references/output-schemas.md @@ -1,24 +1,28 @@ -# Prototype Intelligence Output Schemas +# HTML to Design Spec Output Schemas -Use these shapes as the minimum contract. Add fields when useful, but keep IDs stable across files. +Use these shapes as the minimum contract for files you produce. Add fields when useful, omit optional files when scope is narrow, but keep IDs stable across files. UI states are the canonical graph nodes: pages group states, flows transition between states, style tokens cite states as evidence, and prompts reference states and screenshots. `pages.json` is the canonical source for state IDs; other files must only reference state IDs listed in `pages.json.states[].id`. + +`scripts/create_run_dir.py` may create empty placeholders for optional outputs; leaving those placeholders empty is acceptable only when `manifest.json` records them as placeholders or omitted for the run. Scaffolded placeholder values may use `null` or empty arrays for metadata and evidence that has not been captured yet. Before finishing, update `generatedOutputs`, `placeholderOutputs`, and `omittedOutputs` in `manifest.json` so placeholder files are not mistaken for completed analysis. ## Directory ```text / ├── pages.json -├── flows.json -├── graph.json +├── flows.json # optional for single-state/simple captures +├── graph.json # optional when graph structure is useful ├── style.json ├── screenshots/ -├── prompts/ -└── specs/ +├── prompts/ # optional unless implementation prompts are requested +├── specs/ # optional unless reconstruction specs are requested +└── manifest.json # helper metadata when scripts/create_run_dir.py is used ``` ## pages.json ```json { + "schemaVersion": "html-to-design-spec/v1", "metadata": { "target": "http://localhost:3000", "capturedAt": "2026-05-24T14:30:12+08:00", @@ -26,8 +30,44 @@ Use these shapes as the minimum contract. Add fields when useful, but keep IDs s { "name": "desktop", "width": 1440, "height": 1000 }, { "name": "mobile", "width": 390, "height": 844 } ], - "tooling": ["browser-mcp"] + "tooling": ["browser-mcp"], + "runConstraints": ["Analyze dashboard and settings only"] }, + "discoveredActions": [ + { + "id": "action-home-settings", + "fromStateId": "state-home-default", + "type": "click", + "targetLabel": "Settings", + "selectorHint": "button[aria-label='Settings']", + "precondition": "Home default state is visible", + "safetyNote": null, + "status": "captured", + "resultStateId": "state-settings-modal", + "observedResult": "Settings dialog appears over dashboard." + }, + { + "id": "action-delete-account", + "fromStateId": "state-settings-modal", + "type": "click", + "targetLabel": "Delete account", + "selectorHint": "button:has-text('Delete account')", + "precondition": "Settings modal is visible", + "safetyNote": "Potential destructive account mutation.", + "status": "skipped-unsafe", + "resultStateId": null, + "observedResult": null + } + ], + "dedupeLog": [ + { + "candidateSignature": "url:/|dialog:Settings|button:Save|button:Cancel", + "matchedStateId": "state-settings-modal", + "reason": "Same URL, dialog text, role structure, and screenshot as an already captured state.", + "fromStateId": "state-home-default", + "actionId": "action-home-settings-repeat" + } + ], "pages": [ { "id": "page-home", @@ -37,48 +77,172 @@ Use these shapes as the minimum contract. Add fields when useful, but keep IDs s "routePattern": "/", "summary": "Primary dashboard for ...", "screenshots": ["screenshots/page-home-desktop.png"], - "states": ["state-home-empty", "state-home-loading"], + "states": ["state-home-default", "state-home-empty", "state-home-loading"], "layout": { "landmarks": ["header", "main", "nav"], - "hierarchy": [], - "componentCandidates": [] + "hierarchy": [ + { + "id": "region-dashboard-summary", + "type": "section", + "name": "Dashboard summary", + "children": ["component-kpi-card", "component-activity-list"] + } + ], + "componentCandidates": ["component-primary-button", "component-kpi-card"] } } ], "states": [ + { + "id": "state-home-default", + "type": "page", + "parentPageId": "page-home", + "parentStateId": null, + "name": "Home default", + "url": "/", + "routePattern": "/", + "trigger": null, + "action": null, + "stateSignature": "url:/|h1:Dashboard|nav:Home,Settings", + "dedupe": { + "status": "unique", + "matchedStateId": null, + "reason": null + }, + "screenshots": [ + { + "path": "screenshots/state-home-default-desktop.png", + "viewport": "desktop", + "width": 1440, + "height": 1000 + }, + { + "path": "screenshots/state-home-default-mobile.png", + "viewport": "mobile", + "width": 390, + "height": 844 + } + ], + "evidence": { + "domSignature": "main:Dashboard|button:Settings|nav:Home", + "landmarks": ["header", "main", "nav"], + "elements": [ + { + "id": "el-settings-button", + "role": "button", + "name": "Settings", + "selectorHint": "button[aria-label='Settings']", + "boundingBox": { "x": 1280, "y": 24, "width": 112, "height": 40 }, + "screenshot": "screenshots/state-home-default-desktop.png" + } + ], + "computedStyleSummary": { + "colors": [], + "typography": [], + "spacing": [] + } + }, + "content": { + "headings": ["Dashboard"], + "labels": ["Settings"], + "emptyState": null, + "loadingState": null + }, + "observations": [] + }, { "id": "state-settings-modal", "type": "modal", "parentPageId": "page-home", + "parentStateId": "state-home-default", "name": "Settings modal", "trigger": "Click Settings", + "action": { + "type": "click", + "targetLabel": "Settings", + "selectorHint": "button[aria-label='Settings']", + "inputValue": null, + "precondition": "Home default state is visible", + "safetyNote": null + }, "url": "/", - "screenshots": ["screenshots/state-settings-modal-desktop.png"], - "domSignature": "dialog:Settings|button:Save|button:Cancel", - "boundingBoxes": [], - "computedStyleSummary": { - "colors": [], - "typography": [], - "spacing": [] + "stateSignature": "url:/|dialog:Settings|button:Save|button:Cancel", + "dedupe": { + "status": "unique", + "matchedStateId": null, + "reason": null + }, + "screenshots": [ + { + "path": "screenshots/state-settings-modal-desktop.png", + "viewport": "desktop", + "width": 1440, + "height": 1000 + } + ], + "evidence": { + "domSignature": "dialog:Settings|button:Save|button:Cancel", + "landmarks": ["dialog"], + "elements": [], + "computedStyleSummary": { + "colors": [], + "typography": [], + "spacing": [] + } }, "content": { - "headings": [], - "labels": [], + "headings": ["Settings"], + "labels": ["Save", "Cancel"], "emptyState": null, "loadingState": null - } + }, + "observations": [] } ] } ``` +Use `discoveredActions[].status` values such as `queued`, `captured`, `skipped-deduped`, `skipped-unsafe`, `skipped-out-of-scope`, `failed`, or `blocked`. If a candidate is deduped, record the skipped candidate in `dedupeLog[]` and point `matchedStateId` to an existing state. If an action is unsafe, blocked, or outside scope, keep `resultStateId` null and explain why in `safetyNote` or `observedResult`. + ## style.json ```json { + "schemaVersion": "html-to-design-spec/v1", "designTokens": { "colors": [ - { "name": "primary", "value": "#2563eb", "usage": "Primary actions and active states" } + { + "name": "primary", + "value": "#2563eb", + "usage": "Primary actions and active states", + "sourceType": "source-css", + "sourceCssValue": "var(--color-primary)", + "computedValue": "rgb(37, 99, 235)", + "visualSampleValue": null, + "implementationValue": "#2563eb", + "sourceStateIds": ["state-home-default"], + "sourceElements": ["el-settings-button"], + "viewports": ["desktop", "mobile"], + "confidence": "high", + "notes": "" + } + ], + "backgroundTokens": [ + { + "name": "content-surface", + "role": "content-surface", + "sourceCssValue": "#ffffff", + "computedValue": "rgb(255, 255, 255)", + "visualSampleValue": null, + "implementationValue": "#ffffff", + "sourceType": "source-css", + "sourceStateIds": ["state-home-default"], + "sourceElements": ["component-kpi-card"], + "viewports": ["desktop", "mobile"], + "confidence": "high", + "usage": "Cards and main content panels", + "notes": "" + } ], "typography": [ { "role": "heading-1", "fontFamily": "Inter", "fontSize": "32px", "fontWeight": 700, "lineHeight": "40px" } @@ -98,6 +262,8 @@ Use these shapes as the minimum contract. Add fields when useful, but keep IDs s "background": "#2563eb", "borderRadius": "8px" }, + "sourceStateIds": ["state-home-default"], + "sourceElements": ["el-settings-button"], "screenshots": ["screenshots/component-primary-button.png"] } ], @@ -108,29 +274,35 @@ Use these shapes as the minimum contract. Add fields when useful, but keep IDs s } ], "visualReferences": [ - { "stateId": "page-home", "screenshot": "screenshots/page-home-desktop.png" } + { "stateId": "state-home-default", "screenshot": "screenshots/state-home-default-desktop.png" } ] } ``` +Use `app-background`, `content-surface`, `interactive-surface`, `status-surface`, or `chrome-surface` for `backgroundTokens[].role` when the role is clear. Use `decorative-background`, `mixed`, or `unknown` when the token is image-based, layered, ambiguous, or not reusable. + ## flows.json ```json { + "schemaVersion": "html-to-design-spec/v1", "flows": [ { "id": "flow-open-settings", "name": "Open settings", "summary": "User opens settings from the dashboard.", - "startStateId": "page-home", + "startStateId": "state-home-default", "endStateId": "state-settings-modal", "steps": [ { - "from": "page-home", + "from": "state-home-default", "action": { "type": "click", - "target": "Settings button", - "selectorHint": "button[aria-label='Settings']" + "targetLabel": "Settings", + "selectorHint": "button[aria-label='Settings']", + "inputValue": null, + "precondition": "Home default state is visible", + "safetyNote": null }, "to": "state-settings-modal", "observedResult": "Settings dialog appears over dashboard." @@ -145,31 +317,72 @@ Use these shapes as the minimum contract. Add fields when useful, but keep IDs s ```json { + "schemaVersion": "html-to-design-spec/v1", "nodes": [ { - "id": "page-home", + "id": "state-home-default", "type": "page", "label": "Home", "url": "/", - "screenshot": "screenshots/page-home-desktop.png" + "pageId": "page-home", + "screenshot": "screenshots/state-home-default-desktop.png" + }, + { + "id": "state-settings-modal", + "type": "modal", + "label": "Settings modal", + "url": "/", + "pageId": "page-home", + "screenshot": "screenshots/state-settings-modal-desktop.png" } ], "edges": [ { "id": "edge-home-settings", - "from": "page-home", + "from": "state-home-default", "to": "state-settings-modal", - "interaction": "click", - "target": "Settings button", + "action": { + "type": "click", + "targetLabel": "Settings", + "selectorHint": "button[aria-label='Settings']" + }, "guard": null } ] } ``` +## manifest.json + +`manifest.json` is helper metadata created by `scripts/create_run_dir.py`. It is not required when creating a run directory manually. + +If `manifest.json` exists, keep `target`, `viewports`, `tooling`, and `runConstraints` consistent with `pages.json.metadata`. At the end of the run, `generatedOutputs` should list only files or directories with completed analysis, `placeholderOutputs` should list scaffolded outputs left intentionally empty, and `omittedOutputs` should list optional outputs that were not created or were explicitly removed because they are not applicable. If the helper created an empty optional file or directory, keep it in `placeholderOutputs` rather than `omittedOutputs`. + +```json +{ + "schemaVersion": "html-to-design-spec/v1", + "target": "http://localhost:3000", + "createdAt": "20260524-143012", + "createdAtIso": "2026-05-24T14:30:12+08:00", + "runDirectory": "/path/to/project/index-20260524-143012", + "runConstraints": ["Analyze dashboard and settings only"], + "viewports": [ + { "name": "desktop", "width": 1440, "height": 1000 }, + { "name": "mobile", "width": 390, "height": 844 } + ], + "tooling": ["browser-mcp"], + "defaultOutputs": ["pages.json", "style.json", "screenshots/"], + "conditionalOutputs": ["flows.json", "graph.json", "prompts/", "specs/"], + "generatedOutputs": ["pages.json", "style.json", "screenshots/"], + "placeholderOutputs": ["flows.json", "graph.json", "prompts/", "specs/"], + "omittedOutputs": [], + "validationStatus": "pending" +} +``` + ## Specs -Create one Markdown file per major page, flow, or component family. Include: +When reconstruction specs are needed, create one Markdown file per major page, flow, or component family. Include: - Purpose and user intent - Layout hierarchy @@ -179,11 +392,12 @@ Create one Markdown file per major page, flow, or component family. Include: - Responsive differences - Visual style tokens inferred from computed styles and screenshots - Component-level styling: colors, typography, spacing, radii, borders, shadows, density, and responsive variants +- Source state IDs, screenshots, and evidence references for important claims - Implementation notes and assumptions ## Prompts -Create implementation prompts that are standalone. Each prompt should include: +When implementation prompts are needed, create standalone prompts. Each prompt should include: - Target page/component/flow - Source screenshots and spec references @@ -191,4 +405,5 @@ Create implementation prompts that are standalone. Each prompt should include: - Required visual style, including concrete design tokens from `style.json` - Responsive requirements - Data/model assumptions +- The evidence scope: use files from this run directory unless the user explicitly allows inspecting the source prototype again - Explicit exclusions for prototype-only artifacts or generated class names diff --git a/skills/html-to-design-spec/scripts/create_run_dir.py b/skills/html-to-design-spec/scripts/create_run_dir.py index 0d5107a41..252bebebe 100755 --- a/skills/html-to-design-spec/scripts/create_run_dir.py +++ b/skills/html-to-design-spec/scripts/create_run_dir.py @@ -1,5 +1,5 @@ #!/usr/bin/env python3 -"""Create a Prototype Intelligence run directory skeleton.""" +"""Create a html-to-design-spec run directory skeleton.""" from __future__ import annotations @@ -24,10 +24,19 @@ def main() -> int: parser.add_argument("--name", help="HTML file stem, route, app name, or other run label.") parser.add_argument("--target", help="Prototype URL or file path being analyzed.") parser.add_argument("--timestamp", help="Timestamp suffix. Defaults to local YYYYMMDD-HHMMSS.") + parser.add_argument( + "-p", + "--prompt-constraint", + action="append", + default=[], + help="Runtime constraint from the user prompt. May be provided multiple times.", + ) args = parser.parse_args() project_dir = Path(args.project_dir).expanduser().resolve() - timestamp = args.timestamp or datetime.now().strftime("%Y%m%d-%H%M%S") + now = datetime.now().astimezone() + timestamp = args.timestamp or now.strftime("%Y%m%d-%H%M%S") + created_at_iso = now.isoformat(timespec="seconds") base_name = args.name or (Path(args.target).name if args.target else project_dir.name) run_dir = project_dir / f"{slugify(base_name)}-{timestamp}" @@ -35,22 +44,76 @@ def main() -> int: for child in ("screenshots", "prompts", "specs"): (run_dir / child).mkdir() + pages = { + "schemaVersion": "html-to-design-spec/v1", + "metadata": { + "target": args.target, + "capturedAt": None, + "viewports": [], + "tooling": [], + "runConstraints": args.prompt_constraint, + }, + "discoveredActions": [], + "dedupeLog": [], + "pages": [], + "states": [], + } + style = { + "schemaVersion": "html-to-design-spec/v1", + "designTokens": { + "colors": [], + "backgroundTokens": [], + "typography": [], + "spacing": [], + "radii": [], + "shadows": [], + "borders": [], + }, + "componentStyles": [], + "responsiveStyle": [], + "visualReferences": [], + } + (run_dir / "pages.json").write_text( - json.dumps({"metadata": {"target": args.target}, "pages": [], "states": []}, indent=2) + "\n", + json.dumps(pages, indent=2) + "\n", + encoding="utf-8", + ) + (run_dir / "flows.json").write_text( + json.dumps({"schemaVersion": "html-to-design-spec/v1", "flows": []}, indent=2) + "\n", + encoding="utf-8", + ) + (run_dir / "graph.json").write_text( + json.dumps({"schemaVersion": "html-to-design-spec/v1", "nodes": [], "edges": []}, indent=2) + "\n", encoding="utf-8", ) - (run_dir / "flows.json").write_text(json.dumps({"flows": []}, indent=2) + "\n", encoding="utf-8") - (run_dir / "graph.json").write_text(json.dumps({"nodes": [], "edges": []}, indent=2) + "\n", encoding="utf-8") (run_dir / "style.json").write_text( - json.dumps({"designTokens": {}, "componentStyles": [], "responsiveStyle": [], "visualReferences": []}, indent=2) + "\n", + json.dumps(style, indent=2) + "\n", encoding="utf-8", ) metadata = { + "schemaVersion": "html-to-design-spec/v1", "target": args.target, "createdAt": timestamp, + "createdAtIso": created_at_iso, "runDirectory": str(run_dir), - "requiredOutputs": ["pages.json", "flows.json", "graph.json", "style.json", "screenshots/", "prompts/", "specs/"], + "runConstraints": args.prompt_constraint, + "viewports": [], + "tooling": [], + "defaultOutputs": ["pages.json", "style.json", "screenshots/"], + "conditionalOutputs": ["flows.json", "graph.json", "prompts/", "specs/"], + "generatedOutputs": [], + "placeholderOutputs": [ + "pages.json", + "style.json", + "screenshots/", + "flows.json", + "graph.json", + "prompts/", + "specs/", + ], + "omittedOutputs": [], + "validationStatus": "pending", } (run_dir / "manifest.json").write_text(json.dumps(metadata, indent=2) + "\n", encoding="utf-8") print(run_dir) From 4d2feb20f300d4eb74efd3a0d407cff163f93820 Mon Sep 17 00:00:00 2001 From: opoojkk <33021407+opoojkk@users.noreply.github.com> Date: Thu, 28 May 2026 18:59:37 +0800 Subject: [PATCH 3/3] Fix manifest timestamp consistency for overridden run timestamps --- skills/html-to-design-spec/scripts/create_run_dir.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/skills/html-to-design-spec/scripts/create_run_dir.py b/skills/html-to-design-spec/scripts/create_run_dir.py index 252bebebe..86fbe5658 100755 --- a/skills/html-to-design-spec/scripts/create_run_dir.py +++ b/skills/html-to-design-spec/scripts/create_run_dir.py @@ -18,6 +18,16 @@ def slugify(value: str) -> str: return value or "prototype" +def timestamp_to_iso(timestamp: str, local_timezone) -> str | None: + for fmt in ("%Y%m%d-%H%M%S", "%Y%m%d%H%M%S"): + try: + parsed = datetime.strptime(timestamp, fmt) + except ValueError: + continue + return parsed.replace(tzinfo=local_timezone).isoformat(timespec="seconds") + return None + + def main() -> int: parser = argparse.ArgumentParser() parser.add_argument("project_dir", help="Directory where the analysis result should be created.") @@ -36,7 +46,7 @@ def main() -> int: project_dir = Path(args.project_dir).expanduser().resolve() now = datetime.now().astimezone() timestamp = args.timestamp or now.strftime("%Y%m%d-%H%M%S") - created_at_iso = now.isoformat(timespec="seconds") + created_at_iso = timestamp_to_iso(timestamp, now.tzinfo) if args.timestamp else now.isoformat(timespec="seconds") base_name = args.name or (Path(args.target).name if args.target else project_dir.name) run_dir = project_dir / f"{slugify(base_name)}-{timestamp}"