{
    "$schema": "https://baselinelabs.ai/schemas/tools-outcomes.v1.json",
    "version": 1,
    "updated": "2026-05-06",
    "_doc": "Single source of truth for the Roadmap page. Edit this file to add/remove/restate tools. Page auto-orders rows to minimise mapping crossings — leave order unsorted, the page handles it.",
    "states": [
        "shipped",
        "gap"
    ],
    "leans": [
        "seo",
        "geo",
        "both"
    ],
    "tiers": [
        "outer",
        "inner",
        "custom"
    ],
    "_doc_states": "shipped = live in prod, gap = identified need with no concrete plan yet. Items with no state value are on the build queue (the implicit default).",
    "_doc_tiers": "outer = no site access (we act from outside, e.g. MarkupSchema, plugins, PR). inner = limited site access (SSH or MCP agent inside an existing site). custom = full ownership (we built it). Tiers are NESTED — inner implies outer, custom implies inner+outer.",
    "_doc_problems": "Business-level concerns that audits diagnose. Mostly framing — fewer, broader, customer-readable. Each problem maps to one or more audits.",
    "problems": [
        {
            "id": "site-not-machine",
            "name": "Site doesn't speak machine",
            "band": "hard",
            "lean": "seo",
            "notes": "Schema is missing or wrong; metadata is generic; crawlers can't extract structured info.",
            "description": "Most sites were built for humans only. Schema.org markup is missing, malformed, or limited to the bare minimum. Metadata is generic boilerplate — the same title pattern across 500 pages, no per-page descriptions, no Open Graph data. The result: search engines and AI engines can't reliably extract the structured information they need to feature, summarise, or rank the site well.\n\nThis problem cuts across SEO and GEO — every machine consumer of the web relies on these signals.",
            "tags": [
                "foundational",
                "well-understood",
                "high-leverage"
            ],
            "metrics": {
                "prevalence": {
                    "value": 9,
                    "label": "Industry prevalence",
                    "note": "Almost every non-enterprise site has at least one of these issues"
                },
                "urgency": {
                    "value": 7,
                    "label": "Customer urgency",
                    "note": "Customers don't realise it until they see the audit"
                }
            },
            "seen_in": [
                "The whole SEO industry has been here for 15 years"
            ]
        },
        {
            "id": "ai-cant-find",
            "name": "AI can't find or cite us",
            "band": "hard",
            "lean": "geo",
            "notes": "AI engines don't surface the brand for the queries that should belong to us.",
            "description": "AI engines (ChatGPT, Claude, Gemini, Perplexity) don't surface the brand for the queries that should naturally belong to it. Either the brand isn't indexed at all, or it's indexed but consistently outranked by competitors, or it's mentioned as an aside instead of as the canonical answer.\n\nUsually a compound problem — bot access, schema, entity graph, and content quality all contribute. The audits below diagnose which layer is failing.",
            "metrics": {
                "prevalence": {
                    "value": 9,
                    "label": "Industry prevalence",
                    "note": "Almost universal — even shipped brands rarely have full AI coverage"
                },
                "urgency": {
                    "value": 9,
                    "label": "Customer urgency",
                    "note": "Once they see they're invisible, this becomes the top-of-mind concern"
                }
            },
            "tags": [
                "foundational",
                "compound",
                "high-leverage"
            ]
        },
        {
            "id": "reputation-drift",
            "name": "Reputation goes unmanaged",
            "band": "hard",
            "lean": "both",
            "notes": "Reviews and listings drift; ratings sag; nobody owns the response loop.",
            "description": "Reviews and listings drift over time. Star ratings sag because nobody asks happy customers; bad reviews go unanswered; Google Business Profile information rots; directory listings reference a defunct phone number.\n\nThe AI engines pick this up — sentiment-laden language and outdated facts feed straight into how the brand is summarised. Untreated reputation drift becomes the brand's narrative.",
            "metrics": {
                "prevalence": {
                    "value": 7,
                    "label": "Industry prevalence",
                    "note": "Local businesses much more affected than B2B SaaS"
                },
                "urgency": {
                    "value": 6,
                    "label": "Customer urgency",
                    "note": "Slow burn — invisible until a crisis"
                }
            },
            "tags": [
                "ongoing",
                "human-process"
            ]
        },
        {
            "id": "ai-wrong-brand",
            "name": "AI gets the brand wrong",
            "band": "soft",
            "lean": "geo",
            "notes": "AI summaries describe the business inaccurately or with stale framing.",
            "description": "AI summaries describe the business with stale framing, factual errors, or competitor-borrowed language. The model's training corpus is too old, or it's leaning on third-party sources (Reddit, an old press release, a competitor comparison page) instead of the brand's own current site.\n\nThe fix isn't 'update your About page' — it's making the brand's own current story the most retrievable surface, with the right structured signals so the model trusts it over older sources.",
            "metrics": {
                "prevalence": {
                    "value": 8,
                    "label": "Industry prevalence",
                    "note": "Any brand older than 12 months has this to some degree"
                },
                "urgency": {
                    "value": 7,
                    "label": "Customer urgency",
                    "note": "Founders care a lot when they see the wrong version of their story"
                }
            },
            "tags": [
                "narrative",
                "corrective"
            ]
        },
        {
            "id": "competitors-air",
            "name": "Competitors get the airtime",
            "band": "soft",
            "lean": "both",
            "notes": "AI and search keep recommending competitors instead of us.",
            "description": "When AI is asked about the category, it recommends competitors first (or only). The brand might be objectively better or first, but the model has more retrievable signal about the competitor — more reviews, more mentions, more comparison content, more schema.\n\nThis is the GEO equivalent of losing the SERP for a category keyword.",
            "metrics": {
                "prevalence": {
                    "value": 8,
                    "label": "Industry prevalence",
                    "note": "Almost everyone except category leaders"
                },
                "urgency": {
                    "value": 9,
                    "label": "Customer urgency",
                    "note": "Direct revenue threat — visible in lost deals"
                }
            },
            "tags": [
                "competitive",
                "revenue-threat"
            ]
        },
        {
            "id": "story-doesnt-stick",
            "name": "Story doesn't stick",
            "band": "soft",
            "lean": "geo",
            "notes": "Known for the wrong things; weak category-level mindshare.",
            "description": "AI knows the brand exists but associates it with the wrong things, or with nothing at all at the category level. Asked 'what is X for' the model gives a vague generic answer instead of the positioning the brand has worked to establish.\n\nThis is positioning + content depth — being known for something requires repeated, structured, high-signal coverage of the topic across the site and across third-party surfaces.",
            "metrics": {
                "prevalence": {
                    "value": 7,
                    "label": "Industry prevalence",
                    "note": "Hits brands without strong content motion the hardest"
                },
                "urgency": {
                    "value": 5,
                    "label": "Customer urgency",
                    "note": "Long-game problem; not many founders ask for it directly"
                }
            },
            "tags": [
                "positioning",
                "long-game"
            ]
        },
        {
            "id": "ai-bots-blocked",
            "name": "AI bots are silently blocked",
            "band": "hard",
            "lean": "geo",
            "notes": "robots.txt or WAF/CDN rules accidentally block GPTBot, ClaudeBot, PerplexityBot — site is invisible to AI by default.",
            "description": "The single most common cause of zero AI visibility. Either robots.txt explicitly disallows AI user-agents (often added by a previous SEO consultant 'to protect content'), or — more insidiously — robots.txt allows them but the CDN's bot-management ruleset (Cloudflare Super Bot Fight Mode, AWS WAF AI-scrapers managed rule, Fastly bot mitigation) silently blocks them at the edge.\n\nIndustry research suggests roughly a quarter of B2B sites have at least one of these issues and don't know about it. The brand could have perfect schema, perfect content, perfect llms.txt — and still be 100% invisible in AI answers because retrieval bots literally cannot fetch the pages.\n\nThis is the GEO equivalent of 'is the site live'. It should be the first check of every super audit.",
            "tags": [
                "foundational",
                "high-leverage",
                "binary",
                "underserved"
            ],
            "metrics": {
                "prevalence": {
                    "value": 8,
                    "label": "Industry prevalence",
                    "note": "~27% of B2B sites have at least one accidental block (Mersel/ziptie research)"
                },
                "urgency": {
                    "value": 10,
                    "label": "Customer urgency",
                    "note": "Total invisibility — once they see the result, fixing it is non-negotiable"
                }
            },
            "seen_in": [
                "RankScale crawlability module",
                "Mersel AI bot checker",
                "Cloudflare AI Crawl Control dashboard",
                "BotDetector.io"
            ]
        },
        {
            "id": "no-machine-corpus",
            "name": "No clean machine-readable corpus",
            "band": "hard",
            "lean": "geo",
            "notes": "No llms.txt, no llms-full.txt, no Markdown surface for AI retrieval to work with.",
            "description": "Modern AI crawlers prefer clean, plain-text or Markdown representations of a site over rendered HTML soup. The llms.txt convention (and the fuller llms-full.txt) is the emerging standard — Mintlify auto-generates it, Rank Math 2026 ships native support, GitBook and Fern bake it in.\n\nWithout this corpus, AI engines fall back on whatever they can scrape from rendered HTML — which on JS-heavy sites is often partial or wrong. The brand's actual story never reaches the model.\n\nThe gap nobody has filled: continuously-regenerated llms.txt driven by a real entity model, not a one-time dump.",
            "tags": [
                "emerging-standard",
                "foundational",
                "GPU-leverageable"
            ],
            "metrics": {
                "prevalence": {
                    "value": 9,
                    "label": "Industry prevalence",
                    "note": "Almost no non-docs sites have it; even docs sites often have stale versions"
                },
                "urgency": {
                    "value": 6,
                    "label": "Customer urgency",
                    "note": "Customers don't know to ask for it yet — education problem"
                }
            },
            "seen_in": [
                "Mintlify (auto)",
                "Rank Math 2026",
                "GitBook",
                "Fern",
                "llmstxthub.com"
            ]
        }
    ],
    "outcomes": [
        {
            "id": "traffic",
            "name": "More traffic",
            "band": "hard",
            "icon": "ph-trend-up"
        },
        {
            "id": "rank",
            "name": "Better rank",
            "band": "hard",
            "icon": "ph-ranking"
        },
        {
            "id": "reputation",
            "name": "Stronger reputation",
            "band": "soft",
            "icon": "ph-star"
        },
        {
            "id": "conversion",
            "name": "Higher conversion",
            "band": "soft",
            "icon": "ph-handshake"
        }
    ],
    "audits": [
        {
            "id": "schema-audit",
            "name": "Schema audit",
            "band": "hard",
            "lean": "seo",
            "state": "shipped",
            "notes": "detects missing or invalid JSON-LD",
            "description": "Crawls the site, parses every page, and reports on missing, malformed, or thin schema.org markup. Flags Organization, Product, FAQ, and Article schemas in particular since these have outsized impact on rich-snippet eligibility and AI answer extraction.\n\nRuns on the existing scraper infrastructure — no new infra needed. Outputs a per-page table plus an aggregate score.",
            "tags": [
                "self-serve",
                "scraping-required",
                "deterministic"
            ],
            "metrics": {
                "ease": {
                    "value": 8,
                    "label": "Ease of implementation",
                    "note": "Schema spec is well-defined and stable"
                },
                "prevalence": {
                    "value": 9,
                    "label": "Industry prevalence",
                    "note": "Every commerce/content site has schema concerns"
                },
                "stealability": {
                    "value": 9,
                    "label": "Stealability",
                    "note": "Yoast, RankMath, Ahrefs all do this — patterns are public"
                }
            },
            "links": [
                {
                    "label": "Live tool",
                    "url": "https://markupschema.com/schema-generator"
                },
                {
                    "label": "Source",
                    "url": "https://github.com/Volcanex/baselinelabs/tree/main/markupschema"
                }
            ],
            "owner": "gabriel",
            "estimate": "Shipped — original ~3 weeks",
            "requirements": "Scraper pool only. No GPU needed for the audit itself; GPU is reserved for inference-based generation.",
            "depends_on": [
                "scraper-infra"
            ],
            "seen_in": [
                "Yoast",
                "RankMath",
                "Ahrefs",
                "Schema App",
                "Merkle Schema Markup Generator"
            ]
        },
        {
            "id": "metadata-health",
            "name": "Metadata health",
            "band": "hard",
            "lean": "seo",
            "notes": "title, description, OG, canonical",
            "description": "Crawls every page and reports on the metadata layer: title tag length and uniqueness, meta description presence and quality, Open Graph tags for social previews, Twitter card tags, canonical URLs, hreflang for international sites, and robots directives. Flags duplicate boilerplate (the same title pattern across the whole site) and pages with no description at all.\n\nMetadata is the cheapest improvement available — small per-page edits, large search and AI surface impact.",
            "metrics": {
                "ease": {
                    "value": 9,
                    "label": "Ease of implementation",
                    "note": "Pure HTML parsing; existing scraper handles it"
                },
                "prevalence": {
                    "value": 9,
                    "label": "Industry prevalence",
                    "note": "Every site has at least some pages with weak metadata"
                },
                "stealability": {
                    "value": 10,
                    "label": "Stealability",
                    "note": "Yoast, Screaming Frog, Sitebulb — completely commoditised"
                }
            },
            "tags": [
                "self-serve",
                "deterministic",
                "foundational"
            ],
            "estimate": "~2 weeks for v1; the analysis is the work, not the parsing",
            "requirements": "Scraper pool. HTML parser. Good defaults for \"what is healthy\".",
            "depends_on": [
                "scraper-infra"
            ],
            "seen_in": [
                "Yoast",
                "Screaming Frog",
                "Sitebulb",
                "Ahrefs Site Audit",
                "Semrush Site Audit"
            ]
        },
        {
            "id": "tech-seo-crawl",
            "name": "Technical SEO crawl",
            "band": "hard",
            "lean": "seo",
            "notes": "broken links, indexability, perf",
            "description": "Full technical crawl — broken internal links, broken external links, redirect chains, indexability problems (noindex, blocked by robots.txt, blocked by canonical), Core Web Vitals, mobile usability, mixed content, hreflang errors. The grown-up version of metadata-health.\n\nWidely commoditised; the differentiator here is integration with the rest of the audit suite, not the technical crawl itself.",
            "metrics": {
                "ease": {
                    "value": 6,
                    "label": "Ease of implementation",
                    "note": "Crawler scaling is the hard part; analysis logic is well-known"
                },
                "prevalence": {
                    "value": 9,
                    "label": "Industry prevalence",
                    "note": "Every site has technical issues"
                },
                "stealability": {
                    "value": 10,
                    "label": "Stealability",
                    "note": "Most mature category in SEO tooling — pick a reference"
                }
            },
            "tags": [
                "self-serve",
                "deterministic",
                "commoditised"
            ],
            "estimate": "~6–8 weeks for v1",
            "requirements": "Scaled crawler with rate-limit awareness. Lighthouse-style perf analysis. Indexability rule engine.",
            "depends_on": [
                "scraper-infra"
            ],
            "seen_in": [
                "Screaming Frog",
                "Sitebulb",
                "Ahrefs Site Audit",
                "Semrush Site Audit",
                "OnCrawl",
                "JetOctopus"
            ]
        },
        {
            "id": "ai-citation",
            "name": "AI citation presence",
            "band": "hard",
            "lean": "geo",
            "notes": "who cites the brand in AI answers",
            "description": "Tracks who cites the brand in AI answers — which prompts trigger a citation, which engines cite, what proportion of answers include the brand vs competitors, what surface the citation is built on (own site, Reddit, Wikipedia, news article).\n\nClose cousin of brand-scan, focused specifically on citation events rather than overall sentiment/coverage.",
            "metrics": {
                "ease": {
                    "value": 5,
                    "label": "Ease of implementation",
                    "note": "Multi-engine querying + citation extraction per engine"
                },
                "prevalence": {
                    "value": 8,
                    "label": "Industry prevalence",
                    "note": "Every brand wants to know if they are cited"
                },
                "stealability": {
                    "value": 5,
                    "label": "Stealability",
                    "note": "Profound and Athena have shown the playbook"
                }
            },
            "tags": [
                "multi-engine",
                "core-geo"
            ],
            "estimate": "~4–6 weeks for v1",
            "requirements": "Multi-engine API access. Citation extractor per engine (the engines all return citations differently).",
            "depends_on": [
                "multi-engine-router"
            ],
            "seen_in": [
                "Profound",
                "AthenaHQ",
                "Otterly",
                "Peec AI"
            ]
        },
        {
            "id": "review-landscape",
            "name": "Review landscape",
            "band": "hard",
            "lean": "both",
            "state": "gap",
            "notes": "Google Business Profile, listings, ratings",
            "description": "Maps the brand's review surface — Google Business Profile star rating + recency, Trustpilot/G2/Capterra/Yelp/industry-specific, response rates, sentiment trends. Finds the listings the brand has lost track of.\n\nLess about generating reviews and more about knowing where the rep currently lives, so the fix layer can target the right surfaces.",
            "metrics": {
                "ease": {
                    "value": 6,
                    "label": "Ease of implementation",
                    "note": "Per-platform scraping; some have APIs, some require careful crawling"
                },
                "prevalence": {
                    "value": 6,
                    "label": "Industry prevalence",
                    "note": "Local + B2B SaaS most relevant; B2C eCom less so"
                },
                "stealability": {
                    "value": 7,
                    "label": "Stealability",
                    "note": "BrightLocal, Birdeye, GatherUp all do the local angle"
                }
            },
            "tags": [
                "cross-platform",
                "monitoring"
            ],
            "estimate": "~6–8 weeks for v1 covering the top 5 platforms",
            "requirements": "Per-platform integrations. GBP API (manual setup per customer). Cached scraping for the rest.",
            "depends_on": [
                "scraper-infra"
            ],
            "seen_in": [
                "BrightLocal",
                "Birdeye",
                "GatherUp",
                "Reputation.com",
                "Whitespark"
            ]
        },
        {
            "id": "entity-graph",
            "name": "Entity / KG audit",
            "band": "hard",
            "lean": "geo",
            "state": "gap",
            "notes": "Wikidata, sameAs, organisation graph",
            "description": "Builds and validates the brand's entity graph — Organization, Person (founders, leadership), Product, Service nodes, with sameAs links to Wikidata, LinkedIn, Crunchbase, Companies House, etc. Detects gaps (no LinkedIn link), staleness (Crunchbase founder list out of date), and inconsistencies (different founder name on Wikidata vs the site).\n\nBecomes the substrate for MarkupSchema's per-page output and feeds the Wikidata audit.",
            "metrics": {
                "ease": {
                    "value": 5,
                    "label": "Ease of implementation",
                    "note": "Cross-source reconciliation is the work"
                },
                "prevalence": {
                    "value": 7,
                    "label": "Industry prevalence",
                    "note": "Mid-market brands particularly weak here"
                },
                "stealability": {
                    "value": 4,
                    "label": "Stealability",
                    "note": "Schema App and WordLift have ontology products; not many do automatic discovery"
                }
            },
            "tags": [
                "knowledge-graph",
                "differentiator",
                "platform-foundation"
            ],
            "estimate": "~10–14 weeks for v1",
            "requirements": "Wikidata SPARQL. LinkedIn / Crunchbase scraping (compliance-aware). Schema graph storage.",
            "depends_on": [
                "scraper-infra"
            ],
            "seen_in": [
                "Schema App Entity Hub",
                "WordLift",
                "InLinks"
            ]
        },
        {
            "id": "brand-scan",
            "name": "Brand scan",
            "band": "soft",
            "lean": "geo",
            "state": "shipped",
            "notes": "what AI says about the brand today",
            "description": "Asks AI engines (ChatGPT, Claude, Gemini, Perplexity) what they know about the brand across 50–200 prompts derived from the business profile. Returns sentiment, framing accuracy, competitor co-mentions, and citation surface.\n\nThis is the headline GEO product — the thing customers come for. Outputs both a per-engine breakdown and an aggregate brand-understanding score.",
            "tags": [
                "self-serve",
                "ai-powered",
                "multi-engine"
            ],
            "metrics": {
                "ease": {
                    "value": 6,
                    "label": "Ease of implementation",
                    "note": "Multi-engine orchestration plus a sentiment pipeline"
                },
                "prevalence": {
                    "value": 7,
                    "label": "Industry prevalence",
                    "note": "Every brand should care; few currently do"
                },
                "stealability": {
                    "value": 4,
                    "label": "Stealability",
                    "note": "GEO is new — fewer reference implementations to copy"
                }
            },
            "links": [
                {
                    "label": "Live tool",
                    "url": "https://baselinelabs.ai/brand-reports"
                }
            ],
            "owner": "gabriel",
            "estimate": "Shipped — ongoing prompt-template improvements",
            "requirements": "API access to ChatGPT, Claude, Gemini, Perplexity. Credit budget per scan (50–200 prompts × N engines). Sentiment classifier.",
            "depends_on": [
                "multi-engine-router"
            ],
            "seen_in": [
                "Profound",
                "Otterly.AI",
                "Athena (early)"
            ]
        },
        {
            "id": "ai-summary",
            "name": "AI summary layer",
            "band": "soft",
            "lean": "geo",
            "state": "shipped",
            "notes": "visibility reports across engines",
            "description": "Turns the raw outputs of brand-scan, ai-citation, and visibility-search into a narrative summary — what the AI engines collectively think the brand is, who it's compared to, what story they tell. Used as the headline of every customer-facing report.\n\nThis is where the soft audit pipeline pays off — the synthesis layer the customer actually reads.",
            "metrics": {
                "ease": {
                    "value": 7,
                    "label": "Ease of implementation",
                    "note": "Mostly an LLM prompt over the structured outputs"
                },
                "prevalence": {
                    "value": 9,
                    "label": "Industry prevalence",
                    "note": "Every customer wants 'what does this all mean'"
                },
                "stealability": {
                    "value": 6,
                    "label": "Stealability",
                    "note": "Profound and Otterly do executive summaries; the prompt is the moat"
                }
            },
            "tags": [
                "ai-powered",
                "synthesis",
                "headline-product"
            ],
            "estimate": "Shipped — ongoing prompt iteration",
            "requirements": "LLM access. Structured inputs from brand-scan + ai-citation + visibility-search.",
            "depends_on": [
                "brand-scan"
            ],
            "seen_in": [
                "Profound",
                "Otterly",
                "Peec AI"
            ]
        },
        {
            "id": "share-of-voice",
            "name": "Share of voice",
            "band": "soft",
            "lean": "seo",
            "notes": "competitor presence in target prompts",
            "description": "Across a target prompt set, what % of citations / mentions / recommendations land on the brand vs each named competitor. Tracks trend over time so customers can see whether they're gaining or losing ground.\n\nThe single most repurposable chart in any sales deck — 'we now lead in 7 of 12 category prompts'.",
            "metrics": {
                "ease": {
                    "value": 6,
                    "label": "Ease of implementation",
                    "note": "Builds on the brand-scan + visibility-search infrastructure"
                },
                "prevalence": {
                    "value": 8,
                    "label": "Industry prevalence",
                    "note": "Standard ask once customers grasp GEO"
                },
                "stealability": {
                    "value": 6,
                    "label": "Stealability",
                    "note": "SoV is a standard analytics primitive; AI angle is newer"
                }
            },
            "tags": [
                "competitive",
                "time-series"
            ],
            "estimate": "~4–6 weeks for v1",
            "requirements": "Brand-scan + visibility-search outputs. Competitor list (per customer). Trend storage.",
            "depends_on": [
                "brand-scan",
                "visibility-search"
            ],
            "seen_in": [
                "Profound (share of voice)",
                "AthenaHQ",
                "Brand24"
            ]
        },
        {
            "id": "narrative-mapping",
            "name": "Narrative mapping",
            "band": "soft",
            "lean": "geo",
            "notes": "what story AI tells about the category",
            "description": "Across thousands of AI responses about the brand and the category, clusters the recurring narratives — 'X is positioned as the cheap option', 'X is associated with developer tools', 'X is mentioned alongside Y and Z'. Surfaces drift between the brand's stated positioning and how AI actually frames it.\n\nThe diagnostic that most often produces a 'we need to rewrite our category page' moment.",
            "metrics": {
                "ease": {
                    "value": 5,
                    "label": "Ease of implementation",
                    "note": "Clustering on AI responses; needs an embedding model"
                },
                "prevalence": {
                    "value": 6,
                    "label": "Industry prevalence",
                    "note": "Brands with active positioning concerns; founders love it"
                },
                "stealability": {
                    "value": 4,
                    "label": "Stealability",
                    "note": "Few competitors do narrative clustering specifically"
                }
            },
            "tags": [
                "ai-powered",
                "positioning",
                "differentiator"
            ],
            "estimate": "~6–8 weeks for v1",
            "requirements": "GPU pool (embeddings). Brand-scan output as input. Clustering pipeline.",
            "depends_on": [
                "brand-scan",
                "gpu-pool"
            ],
            "seen_in": [
                "Brandwatch (narrative analytics)",
                "Quid",
                "Sprinklr Insights"
            ]
        },
        {
            "id": "competitor-recommend",
            "name": "Competitor recommend.",
            "band": "soft",
            "lean": "both",
            "notes": "who AI suggests instead",
            "description": "When the brand isn't recommended, who is? Per prompt: which competitor took the slot, what was the rationale the AI gave, what surface was the recommendation built on. Helps prioritise which competitors are eating the brand's lunch and on what dimensions.",
            "metrics": {
                "ease": {
                    "value": 6,
                    "label": "Ease of implementation",
                    "note": "Builds on brand-scan; needs competitor-aware prompt design"
                },
                "prevalence": {
                    "value": 7,
                    "label": "Industry prevalence",
                    "note": "Almost every brand cares about this once they see it"
                },
                "stealability": {
                    "value": 5,
                    "label": "Stealability",
                    "note": "Profound and Peec do competitor analysis; rationale extraction is rarer"
                }
            },
            "tags": [
                "competitive",
                "ai-powered"
            ],
            "estimate": "~4–6 weeks for v1",
            "requirements": "Brand-scan + competitor list per customer. Rationale extractor (LLM).",
            "depends_on": [
                "brand-scan"
            ],
            "seen_in": [
                "Profound",
                "Peec AI",
                "AthenaHQ"
            ]
        },
        {
            "id": "semantic-presence",
            "name": "Semantic presence",
            "band": "soft",
            "lean": "geo",
            "state": "gap",
            "notes": "category-level association strength",
            "description": "Measures category-level association strength — when AI is asked open-ended questions about the category (not the brand), how often does the brand surface? The leading indicator of whether the brand is becoming the default answer in its space.\n\nThe metric category leaders track most carefully; the metric challengers find most painful.",
            "metrics": {
                "ease": {
                    "value": 4,
                    "label": "Ease of implementation",
                    "note": "Hard — defining the prompt set per category is the work"
                },
                "prevalence": {
                    "value": 6,
                    "label": "Industry prevalence",
                    "note": "Most relevant for brands with category ambitions"
                },
                "stealability": {
                    "value": 3,
                    "label": "Stealability",
                    "note": "Nobody productizes category-level brand association well"
                }
            },
            "tags": [
                "differentiator",
                "long-game"
            ],
            "estimate": "~10–14 weeks for v1; the prompt set is the moat",
            "requirements": "Brand-scan infra. Per-category prompt template library (curated). Calibration set per industry.",
            "depends_on": [
                "brand-scan"
            ],
            "seen_in": [
                "Profound (category share)",
                "AthenaHQ (category presence)"
            ]
        },
        {
            "id": "visibility-search",
            "name": "Visibility search",
            "band": "hard",
            "lean": "geo",
            "state": "shipped",
            "notes": "tracks rank across Google, ChatGPT, Perplexity",
            "description": "Tracks where the brand ranks for a curated keyword set across both classic search engines (Google) and AI engines (ChatGPT, Perplexity, with Claude and Gemini on the way). Each scheduled run captures position, the engine's answer text where applicable, and citation surface.\n\nBridges the SEO and GEO worlds: customers see 'rank #3 on Google, #8 on ChatGPT' for the same query, which is the most useful single comparison in the whole platform. The AI summary layer sits on top of this for soft synthesis — that part is how onboarding reports get their narrative.",
            "tags": [
                "self-serve",
                "multi-engine",
                "scheduled",
                "headline-product"
            ],
            "metrics": {
                "ease": {
                    "value": 6,
                    "label": "Ease of implementation",
                    "note": "Multi-engine orchestration plus result parsing per engine"
                },
                "prevalence": {
                    "value": 8,
                    "label": "Industry prevalence",
                    "note": "Every brand wants to know where they rank — for SEO and GEO"
                },
                "stealability": {
                    "value": 6,
                    "label": "Stealability",
                    "note": "Semrush/Ahrefs do classic SERP; Profound/Peec/Athena do AI rank — combining both is less common"
                }
            },
            "links": [
                {
                    "label": "Live tool",
                    "url": "https://baselinelabs.ai/brand-templates"
                }
            ],
            "owner": "gabriel",
            "estimate": "Shipped — ongoing engine coverage",
            "requirements": "API access to Google SERP (via SerpApi or DataForSEO), ChatGPT, Perplexity. Scheduled job runner. Position-extraction logic per engine.",
            "depends_on": [
                "multi-engine-router",
                "scheduler"
            ],
            "seen_in": [
                "Semrush Position Tracking",
                "Ahrefs Rank Tracker + Brand Radar",
                "AccuRanker",
                "Profound",
                "Peec AI",
                "AthenaHQ",
                "SE Visible",
                "Surfer AI Tracker"
            ]
        },
        {
            "id": "ai-bot-access-audit",
            "name": "AI bot access audit",
            "band": "hard",
            "lean": "geo",
            "notes": "tests robots.txt + WAF/CDN against 28+ AI user-agents",
            "description": "Cross-checks robots.txt against actual HTTP fetches with spoofed AI user-agents (GPTBot, OAI-SearchBot, ChatGPT-User, ClaudeBot, Claude-User, Claude-SearchBot, PerplexityBot, Google-Extended, CCBot, Bytespider, Meta-ExternalAgent, AppleBot-Extended, Amazonbot, Cohere-ai, YouBot, etc.). Detects the silent-block scenario where robots.txt allows but the CDN's bot-management layer rejects.\n\nThe single highest-ROI audit Baseline could ship — binary outcome, fixable, and the entire GEO market is sleeping on it as a productized check. Should be a free public lead-gen tool AND the headline finding of every super audit.",
            "tags": [
                "high-leverage",
                "binary",
                "lead-gen",
                "underserved",
                "trojan-horse"
            ],
            "metrics": {
                "ease": {
                    "value": 8,
                    "label": "Ease of implementation",
                    "note": "Scraper already exists; just need to spoof user-agents and parse robots.txt"
                },
                "prevalence": {
                    "value": 9,
                    "label": "Industry prevalence",
                    "note": "~27% of B2B sites have at least one block"
                },
                "stealability": {
                    "value": 9,
                    "label": "Stealability",
                    "note": "RankScale and Mersel published the patterns; nobody owns the productized version"
                }
            },
            "owner": "unassigned",
            "estimate": "~4–6 weeks for v1 including robots.txt parser + 28 user-agent fetch matrix + Cloudflare/AWS/Fastly heuristics",
            "requirements": "Scraper pool (already have). robots.txt parser. List of published AI bot IP ranges + reverse-DNS verification (don't trust user-agent alone). Cloudflare-specific test: detect Super Bot Fight Mode / AI Crawl Control via response signatures.",
            "depends_on": [
                "scraper-infra"
            ],
            "seen_in": [
                "RankScale",
                "Mersel.ai",
                "BotDetector.io",
                "Cloudflare AI Crawl Control",
                "MRS Digital checker"
            ]
        },
        {
            "id": "agent-analytics",
            "name": "Agent analytics",
            "band": "hard",
            "lean": "geo",
            "notes": "server-log AI crawler attribution via CDN integrations",
            "description": "Ingests server logs from the customer's CDN (Vercel one-click, Cloudflare Logpush, Fastly, AWS CloudFront via Firehose, Netlify, Akamai) and attributes traffic by AI bot. Distinguishes retrieval bots (OAI-SearchBot, PerplexityBot, ClaudeBot/Claude-SearchBot — these matter for visibility) from training bots (CCBot, GPTBot, Google-Extended — these matter for inclusion in future model versions).\n\nThis is Profound's killer feature and the most defensible GEO data moat available to a small team. Once a customer connects their CDN, the data flywheel starts and switching costs go up dramatically.\n\nVerifies bot identity via published IP ranges + reverse-DNS, not user-agent strings (which are routinely spoofed by malicious bots).",
            "tags": [
                "data-moat",
                "stickiness",
                "high-leverage",
                "CDN-integration"
            ],
            "metrics": {
                "ease": {
                    "value": 5,
                    "label": "Ease of implementation",
                    "note": "Per-CDN integrations; Vercel and Cloudflare are easiest, Akamai is hardest"
                },
                "prevalence": {
                    "value": 7,
                    "label": "Industry prevalence",
                    "note": "Every site with AI traffic benefits — even brands that don't know they have AI traffic yet"
                },
                "stealability": {
                    "value": 7,
                    "label": "Stealability",
                    "note": "Profound has proven the pattern; CDN log APIs are documented"
                }
            },
            "owner": "unassigned",
            "estimate": "~10–14 weeks for v1 across Vercel + Cloudflare; +3–4 weeks per additional CDN",
            "requirements": "Per-CDN log ingestion pipelines. Bot identity verifier (IP range list + reverse DNS). Time-series storage. Cost-aware sampling for high-traffic sites.",
            "depends_on": [],
            "seen_in": [
                "Profound Agent Analytics",
                "Cloudflare AI Crawl Control dashboards"
            ]
        },
        {
            "id": "citation-attribution",
            "name": "Citation attribution",
            "band": "soft",
            "lean": "geo",
            "notes": "links AI citations back to the page/paragraph/schema that earned them",
            "description": "Closes the loop nobody else has closed. When a brand is cited in a ChatGPT or Perplexity answer, the engine scrapes the cited URL, extracts the paragraph(s) the model is leaning on, matches them against the customer's own pages and against third-party sources (Reddit threads, Wikipedia, YouTube transcripts), and reports: 'ChatGPT cited [page X / Reddit thread Y / Wikipedia paragraph Z] when answering [prompt]. The signal it weighted appears to be [stats / quotation / definition / FAQ schema / freshness].'\n\nThis turns the brand scan from 'you got 23 mentions' into 'you got 23 mentions and here's exactly what to write more of'. It is the bridge between the soft audit and any actionable fix.",
            "tags": [
                "differentiator",
                "closes-the-loop",
                "ai-powered"
            ],
            "metrics": {
                "ease": {
                    "value": 4,
                    "label": "Ease of implementation",
                    "note": "Hard — requires citation extraction, source scraping, paragraph matching, signal classification"
                },
                "prevalence": {
                    "value": 8,
                    "label": "Industry prevalence",
                    "note": "Every brand wants to know what's working — currently impossible to answer"
                },
                "stealability": {
                    "value": 3,
                    "label": "Stealability",
                    "note": "Nobody has shipped this; closest analogues are Profound's source domain reports and Surfer AI Tracker prompt transparency"
                }
            },
            "owner": "unassigned",
            "estimate": "~6 months for v1; this is a flagship build",
            "requirements": "Brand scan output as input. Scraper for citing URLs (have it). Embedding model for paragraph matching. Signal classifier (stats/quotes/definitions/FAQ schema/recency). May need GPU for embeddings at scale.",
            "depends_on": [
                "brand-scan",
                "scraper-infra",
                "gpu-pool"
            ],
            "seen_in": [
                "Surfer AI Tracker (prompt transparency)",
                "Profound (source domain breakdown)"
            ]
        },
        {
            "id": "llms-txt-audit",
            "name": "llms.txt audit",
            "band": "hard",
            "lean": "geo",
            "notes": "presence + spec compliance + drift detection",
            "description": "Checks for llms.txt and llms-full.txt presence at the canonical /llms.txt and /llms-full.txt paths. Validates against the Mintlify-stewarded spec. Detects staleness (when site content has changed but llms.txt hasn't). Compares against the entity graph and flags missing canonical pages.\n\nA cheap, high-signal audit that pairs naturally with the schema audit and feeds the llms.txt push fix.",
            "tags": [
                "self-serve",
                "deterministic",
                "scraping-required"
            ],
            "metrics": {
                "ease": {
                    "value": 9,
                    "label": "Ease of implementation",
                    "note": "Trivial — fetch, parse, compare"
                },
                "prevalence": {
                    "value": 9,
                    "label": "Industry prevalence",
                    "note": "Almost no non-docs sites have it"
                },
                "stealability": {
                    "value": 8,
                    "label": "Stealability",
                    "note": "Otterly, RankScale, Pixelmojo, MRS Digital all check it"
                }
            },
            "owner": "unassigned",
            "estimate": "~1–2 weeks for v1",
            "requirements": "Scraper. Spec parser. (Pairs cleanly with schema audit infra.)",
            "depends_on": [
                "scraper-infra"
            ],
            "seen_in": [
                "Otterly GEO Audit",
                "RankScale",
                "Pixelmojo",
                "MRS Digital",
                "llmstxthub.com"
            ]
        },
        {
            "id": "ai-citation-source-map",
            "name": "AI citation source map",
            "band": "soft",
            "lean": "geo",
            "notes": "Reddit / YouTube / Wikipedia citation surface for the brand",
            "description": "Maps where AI engines pull brand information from when they aren't pulling from the brand's own site. Reddit accounts for around 10–40% of LLM citations depending on the engine and study window; YouTube has been overtaking Reddit through 2026; Wikipedia/Wikidata is foundational for most engines.\n\nFor most brands, the brand's own site is only 5–10% of AI source coverage (Profound's published figure). Knowing which subreddits, channels, and Wikipedia articles AI is leaning on tells the brand exactly where to engage, contribute, or correct — and which third-party surfaces to monitor for crisis signals.",
            "tags": [
                "differentiator",
                "third-party-aware"
            ],
            "metrics": {
                "ease": {
                    "value": 5,
                    "label": "Ease of implementation",
                    "note": "Reddit/YouTube/Wikipedia all have decent APIs; the analysis is the work"
                },
                "prevalence": {
                    "value": 8,
                    "label": "Industry prevalence",
                    "note": "Every brand benefits; few realise it"
                },
                "stealability": {
                    "value": 5,
                    "label": "Stealability",
                    "note": "Profound surfaces source domains; nobody breaks down by community-specific structure"
                }
            },
            "owner": "unassigned",
            "estimate": "~6–8 weeks for v1 covering Reddit + YouTube + Wikipedia",
            "requirements": "Reddit API, YouTube Data API, Wikipedia/Wikidata API. Sentiment classifier (already have). Ranking heuristic for engagement priority.",
            "depends_on": [
                "brand-scan"
            ],
            "seen_in": [
                "Brand24",
                "Brandwatch",
                "Profound source-domain reports"
            ]
        },
        {
            "id": "wikidata-audit",
            "name": "Wikidata / KG audit",
            "band": "hard",
            "lean": "geo",
            "notes": "Q-ID presence, sameAs alignment, edit briefs",
            "description": "Detects whether the brand has a Wikidata Q-ID and a Knowledge Panel. Cross-checks the schema graph's sameAs links against Wikidata reality. Surfaces inconsistencies (e.g. founder name on the site differs from Wikidata, address out of date, missing Crunchbase or LinkedIn linkage).\n\nProduces an edit brief — never auto-edits, since paid editing violates Wikimedia's terms of use. The brief is something a brand's PR or comms team can hand to a contracted Wikipedia editor or use to update Wikidata themselves with proper conflict-of-interest disclosure.\n\nProductizing this is genuinely novel — current solutions are high-touch services (Kalicube, Reputation X, Schema App's Entity Hub).",
            "tags": [
                "differentiator",
                "knowledge-graph",
                "compliance-aware"
            ],
            "metrics": {
                "ease": {
                    "value": 6,
                    "label": "Ease of implementation",
                    "note": "Wikidata SPARQL is well-documented; Knowledge Panel detection is the harder part"
                },
                "prevalence": {
                    "value": 6,
                    "label": "Industry prevalence",
                    "note": "Mid-market brands often have weak/no entity coverage"
                },
                "stealability": {
                    "value": 4,
                    "label": "Stealability",
                    "note": "No SaaS competitor productizes this — services only"
                }
            },
            "owner": "unassigned",
            "estimate": "~6–10 weeks for v1",
            "requirements": "Wikidata SPARQL endpoint. Knowledge Panel detection (rendered Google SERP scrape or DataForSEO). Schema graph (depends on entity-graph audit). Edit-brief template.",
            "depends_on": [
                "entity-graph"
            ],
            "seen_in": [
                "Kalicube",
                "Schema App Entity Hub",
                "Wikibusines (service)"
            ]
        },
        {
            "id": "geo-content-score",
            "name": "GEO content scorer",
            "band": "hard",
            "lean": "geo",
            "notes": "scores pages on AI-citation correlated factors",
            "description": "Scores a page on the factors that the published academic research (Princeton/Georgia Tech KDD 2024 GEO study) found correlate with AI citation: statistic density (+41% visibility), quotation density (+37%), definitional clarity, comparative explicitness, recency, authorship credentials, and structured-data presence.\n\nDifferent from Surfer/Clearscope/MarketMuse — those reverse-engineer the SERP and tell you what to add to rank on Google. This one reverse-engineers AI answers and tells you what to add to be cited. Same idea, different optimization target.",
            "tags": [
                "differentiator",
                "research-grounded",
                "ai-powered"
            ],
            "metrics": {
                "ease": {
                    "value": 5,
                    "label": "Ease of implementation",
                    "note": "Each factor is its own classifier; the scoring rubric is the work"
                },
                "prevalence": {
                    "value": 7,
                    "label": "Industry prevalence",
                    "note": "Every content team wants this"
                },
                "stealability": {
                    "value": 5,
                    "label": "Stealability",
                    "note": "SERP-trained content scoring is commoditized; AI-citation-trained scoring is open"
                }
            },
            "owner": "unassigned",
            "estimate": "~10–14 weeks for v1",
            "requirements": "Scraper. Per-factor classifiers (some via GPU, some via heuristics). Citation-correlation calibration set.",
            "depends_on": [
                "scraper-infra",
                "gpu-pool"
            ],
            "seen_in": [
                "Surfer SEO (SERP-trained equivalent)",
                "Clearscope",
                "MarketMuse",
                "Frase",
                "Princeton/GT KDD 2024 GEO research paper"
            ]
        },
        {
            "id": "site-pulse",
            "name": "Site Pulse",
            "band": "hard",
            "lean": "both",
            "state": "shipped",
            "notes": "per-page technical scan; modular checks across SEO + GEO surfaces",
            "description": "Crawls the site and runs a configurable bundle of deterministic modules — schema validity, structured-data coverage, link health, robots/sitemap/llms.txt presence, indexability, render parity, and more. Each module produces pass/warn/fail/info findings; results aggregate to per-category scores plus an overall site score.\n\nUnlike Schema audit which focuses on JSON-LD, Site Pulse covers the wider machine-readability surface — the technical hygiene that determines whether AI engines can crawl, parse, and trust the site.",
            "tags": [
                "self-serve",
                "scraping-required",
                "deterministic",
                "modular"
            ],
            "metrics": {
                "ease": {
                    "value": 7,
                    "label": "Ease of implementation",
                    "note": "Module pattern lets new checks slot in cheaply"
                },
                "prevalence": {
                    "value": 9,
                    "label": "Industry prevalence",
                    "note": "Every site has at least a few hygiene gaps"
                },
                "stealability": {
                    "value": 7,
                    "label": "Stealability",
                    "note": "Plenty of crawl-and-rule SEO tools; the GEO modules are newer"
                }
            },
            "links": [
                {
                    "label": "Live tool",
                    "url": "https://baselinelabs.ai/site-pulse-reports"
                }
            ],
            "owner": "gabriel",
            "estimate": "Shipped — Phase 2 modules ongoing",
            "requirements": "Scraper pool. Module rule packs. Optional JS render for SPA-heavy sites.",
            "depends_on": [
                "scraper-infra"
            ]
        }
    ],
    "fixes": [
        {
            "id": "markupschema",
            "name": "MarkupSchema",
            "band": "hard",
            "lean": "seo",
            "tiers": [
                "outer"
            ],
            "state": "shipped",
            "bundles": [
                "MarkupSchema"
            ],
            "notes": "inject schema from outside",
            "description": "Generates schema.org JSON-LD for any URL using a Qwen3-VL fine-tune. Operates entirely from outside the site — the customer drops a script tag or syncs via the API and we serve the schema as a side-channel.\n\nThe outer-tier flagship: works on any site without code access, which is the lowest-friction wedge into the SEO toolchain market.",
            "tags": [
                "self-serve",
                "ai-powered",
                "open-api",
                "outer-tier",
                "billable-api"
            ],
            "metrics": {
                "ease": {
                    "value": 5,
                    "label": "Ease of implementation",
                    "note": "Inference is the hard part; UI is straightforward"
                },
                "prevalence": {
                    "value": 9,
                    "label": "Industry prevalence",
                    "note": "Every site benefits from better schema"
                },
                "stealability": {
                    "value": 7,
                    "label": "Stealability",
                    "note": "Many tools do schema gen — but ours is generative VL, most are template-based"
                }
            },
            "links": [
                {
                    "label": "Product",
                    "url": "https://markupschema.com"
                },
                {
                    "label": "Generator",
                    "url": "https://markupschema.com/schema-generator"
                },
                {
                    "label": "Source",
                    "url": "https://github.com/Volcanex/baselinelabs/tree/main/markupschema"
                }
            ],
            "owner": "gabriel",
            "estimate": "Shipped — ongoing inference cost optimisation",
            "requirements": "GPU pool (Vast.ai workers running Qwen3-VL-4B-fp8). 1.5–17s inference per page. Scraper for HTML extraction. R2 for cleaned HTML + JSON-LD outputs.",
            "depends_on": [
                "scraper-infra",
                "gpu-pool"
            ],
            "seen_in": [
                "Schema App",
                "Merkle generators",
                "WordLift (pricier, ontology-heavy)"
            ]
        },
        {
            "id": "ssh-agent",
            "name": "SSH agent",
            "band": "hard",
            "lean": "seo",
            "tiers": [
                "outer",
                "inner"
            ],
            "notes": "edit head/meta via SSH",
            "bundles": [
                "GEO Suite"
            ],
            "description": "Edits the customer's site directly via SSH. Reads the head, updates metadata, injects schema, fixes obvious technical SEO issues. Operates with a per-customer service account and a strict allowlist of file/path patterns.\n\nThe inner-tier workhorse for sites where the customer is willing to give shell access but doesn't want to maintain the changes themselves.",
            "metrics": {
                "ease": {
                    "value": 6,
                    "label": "Ease of implementation",
                    "note": "SSH is a solved problem; safety guardrails are the work"
                },
                "prevalence": {
                    "value": 6,
                    "label": "Industry prevalence",
                    "note": "Self-hosted sites and infra-savvy customers"
                },
                "stealability": {
                    "value": 7,
                    "label": "Stealability",
                    "note": "Many SEO consultants do this manually; the SaaS layer is rare"
                }
            },
            "tags": [
                "inner-tier",
                "managed"
            ],
            "estimate": "~6–8 weeks for v1 with safety controls",
            "requirements": "SSH key management, per-customer service account, file/path allowlist, dry-run mode, audit log.",
            "depends_on": [],
            "seen_in": [
                "SearchPilot (testing)",
                "Pretty Links (single-site WP)"
            ]
        },
        {
            "id": "mcp-code-agent",
            "name": "MCP code agent",
            "band": "hard",
            "lean": "seo",
            "tiers": [
                "outer",
                "inner"
            ],
            "notes": "structured codebase edits via MCP",
            "bundles": [
                "GEO Suite"
            ],
            "description": "Connects to the customer's repo via the Model Context Protocol — opens PRs that fix metadata, add schema, restructure content, normalise file naming. Customer reviews and merges; we never push directly.\n\nHigher-trust than SSH because changes are explicit and reviewable. The right inner-tier fit for engineering-led customers.",
            "metrics": {
                "ease": {
                    "value": 5,
                    "label": "Ease of implementation",
                    "note": "MCP integration + per-stack code understanding"
                },
                "prevalence": {
                    "value": 7,
                    "label": "Industry prevalence",
                    "note": "B2B SaaS, dev tools, ai-native companies"
                },
                "stealability": {
                    "value": 4,
                    "label": "Stealability",
                    "note": "MCP is brand-new; nobody productized this for SEO yet"
                }
            },
            "tags": [
                "inner-tier",
                "mcp",
                "differentiator",
                "pr-based"
            ],
            "estimate": "~10–14 weeks for v1",
            "requirements": "MCP server. Per-stack codemod templates (Next.js, Astro, Hugo, etc.). PR generation. Code review LLM.",
            "depends_on": [],
            "seen_in": [
                "nobody — MCP is too new"
            ]
        },
        {
            "id": "wp-plugin",
            "name": "WordPress plugin",
            "band": "hard",
            "lean": "seo",
            "tiers": [
                "outer"
            ],
            "notes": "drop-in for the WP majority",
            "description": "A WordPress plugin that wraps MarkupSchema and other outer-tier fixes. Customer installs the plugin, links their Baseline account, and structured data + metadata are continuously kept in sync.\n\nWordPress is ~40% of the web — this is the highest-leverage distribution channel for outer-tier fixes that don't require code access.",
            "tags": [
                "distribution",
                "self-serve",
                "wordpress"
            ],
            "metrics": {
                "ease": {
                    "value": 7,
                    "label": "Ease of implementation",
                    "note": "WP plugin SDK is mature; review process is the main delay"
                },
                "prevalence": {
                    "value": 10,
                    "label": "Industry prevalence",
                    "note": "WP runs 40%+ of the web"
                },
                "stealability": {
                    "value": 9,
                    "label": "Stealability",
                    "note": "Yoast and RankMath have set the playbook for years"
                }
            },
            "owner": "unassigned",
            "estimate": "~3–4 weeks for v1 + WP review cycle",
            "requirements": "WordPress dev account, plugin review submission. Existing MarkupSchema API as the backend. PHP knowledge or ChatGPT-assisted PHP for the plugin shell.",
            "depends_on": [
                "markupschema"
            ],
            "seen_in": [
                "Yoast SEO",
                "RankMath",
                "All in One SEO",
                "Schema Pro"
            ],
            "bundles": [
                "GEO Suite"
            ],
            "parent_id": "markupschema"
        },
        {
            "id": "cf-worker",
            "name": "Cloudflare worker",
            "band": "hard",
            "lean": "seo",
            "tiers": [
                "outer"
            ],
            "notes": "edge-injected fixes",
            "bundles": [
                "MarkupSchema",
                "GEO Suite"
            ],
            "description": "Cloudflare Worker that intercepts every page response and injects schema, metadata, and bot-allowlist responses at the edge. Zero changes to the origin. Customer adds Baseline as a Worker route for their domain and we handle everything from there.\n\nMost flexible outer-tier surface — Workers can do almost anything, including the bot-mirror moonshot eventually.",
            "metrics": {
                "ease": {
                    "value": 7,
                    "label": "Ease of implementation",
                    "note": "Workers SDK is mature; per-customer routing is the operational work"
                },
                "prevalence": {
                    "value": 6,
                    "label": "Industry prevalence",
                    "note": "Cloudflare-fronted sites only — but that is a lot"
                },
                "stealability": {
                    "value": 7,
                    "label": "Stealability",
                    "note": "Sloth.cloud and edgeseo.pro published the patterns"
                }
            },
            "tags": [
                "outer-tier",
                "edge",
                "cloudflare"
            ],
            "estimate": "~6–8 weeks for v1",
            "requirements": "Cloudflare Workers account, per-customer Worker deploy pipeline, schema/metadata generation upstream.",
            "depends_on": [],
            "seen_in": [
                "Sloth.cloud",
                "edgeseo.pro"
            ],
            "parent_id": "markupschema"
        },
        {
            "id": "llms-txt-push",
            "name": "llms.txt push",
            "band": "hard",
            "lean": "geo",
            "tiers": [
                "outer"
            ],
            "notes": "publish llms.txt + sitemap to AI crawlers",
            "bundles": [
                "GEO Suite"
            ],
            "description": "When the customer can't host llms.txt themselves (often the case on hosted CMS platforms), we host it on Baseline's CDN and they CNAME llms.baselinelabs.ai → their domain or use the MarkupSchema delivery channel.\n\nOuter-tier delivery for the llms-txt-generate fix.",
            "metrics": {
                "ease": {
                    "value": 8,
                    "label": "Ease of implementation",
                    "note": "CDN routing + CNAME setup"
                },
                "prevalence": {
                    "value": 8,
                    "label": "Industry prevalence",
                    "note": "Sites on closed CMSes that can't edit root paths"
                },
                "stealability": {
                    "value": 6,
                    "label": "Stealability",
                    "note": "CDN routing patterns are well-known"
                }
            },
            "tags": [
                "outer-tier"
            ],
            "estimate": "~3–4 weeks for v1",
            "requirements": "CDN with CNAME support. Per-customer routing config. llms-txt-generate as upstream.",
            "depends_on": [
                "llms-txt-generate"
            ],
            "seen_in": [
                "MarkupSchema delivery infra (own analogue)"
            ]
        },
        {
            "id": "site-llms-txt",
            "name": "Site llms.txt",
            "band": "hard",
            "lean": "geo",
            "tiers": [
                "outer",
                "inner"
            ],
            "notes": "host llms.txt on the site directly (vs. hosted by Baseline)",
            "description": "When the customer has enough access to host llms.txt at their own root path (rather than relying on Baseline's CDN delivery), this fix pushes the generated file directly. WordPress plugin / SSH / MCP agent variants all deliver to the same end-state.",
            "depends_on": [
                "llms-txt-generate"
            ],
            "seen_in": [
                "Rank Math 2026",
                "Website LLMs.txt WP plugin",
                "Drupal recipe",
                "Shopify (via theme edit)"
            ],
            "bundles": [
                "GEO Suite"
            ],
            "metrics": {
                "ease": {
                    "value": 7,
                    "label": "Ease of implementation",
                    "note": "Per-CMS variants (WP plugin, SSH, MCP) — once one ships the rest are templating"
                },
                "prevalence": {
                    "value": 7,
                    "label": "Industry prevalence",
                    "note": "Sites where customer can host root paths"
                },
                "stealability": {
                    "value": 7,
                    "label": "Stealability",
                    "note": "Rank Math 2026, Website LLMs.txt WP plugin"
                }
            },
            "tags": [
                "inner-tier"
            ],
            "estimate": "~2 weeks per CMS variant",
            "requirements": "Per-CMS write access (plugin / SSH / MCP). llms-txt-generate as the upstream content source.",
            "owner": "unassigned"
        },
        {
            "id": "ai-first-arch",
            "name": "AI-first arch",
            "band": "hard",
            "lean": "geo",
            "tiers": [
                "custom"
            ],
            "notes": "site shaped for AI crawlers from day one",
            "bundles": [
                "GEO Suite"
            ],
            "description": "Custom-tier rebuild with AI consumption as the primary design constraint. Server-rendered HTML at every URL, complete schema graph, llms-full.txt as a first-class surface, semantic URL structure, content modelled around prompts the brand wants to win.\n\nWhat 'a site built for AI' actually looks like in practice. Until customers ask for it, this is more of a reference architecture we publish than a product line.",
            "metrics": {
                "ease": {
                    "value": 3,
                    "label": "Ease of implementation",
                    "note": "Real architecture work; only sells with custom-tier"
                },
                "prevalence": {
                    "value": 4,
                    "label": "Industry prevalence",
                    "note": "Edge case today, default in 5 years"
                },
                "stealability": {
                    "value": 4,
                    "label": "Stealability",
                    "note": "Reference architectures emerging; no productised version"
                }
            },
            "tags": [
                "custom-tier",
                "reference-architecture",
                "long-bet"
            ],
            "estimate": "Project-based; sold with site-build",
            "requirements": "Site-build engagement. Entity graph as substrate. GPU pool for content generation if needed.",
            "depends_on": [
                "entity-graph-build"
            ],
            "seen_in": [
                "Mintlify (docs only)",
                "Vercel AI templates"
            ]
        },
        {
            "id": "citation-build",
            "name": "Citation building",
            "band": "hard",
            "lean": "geo",
            "tiers": [
                "outer"
            ],
            "state": "gap",
            "notes": "PR + source-targeting campaigns",
            "bundles": [
                "GEO Suite"
            ],
            "description": "Active campaign to earn AI-relevant citations — guest posts, third-party listings, expert quotes, podcast appearances, Wikipedia/Wikidata edit campaigns. Targets the source surfaces the AI citation source map identifies as high-leverage.\n\nManaged service for now; the analytics layer is the productizable bit.",
            "metrics": {
                "ease": {
                    "value": 4,
                    "label": "Ease of implementation",
                    "note": "Mostly people work; the targeting layer is the productisable bit"
                },
                "prevalence": {
                    "value": 6,
                    "label": "Industry prevalence",
                    "note": "Brands with budget for PR, content, partnerships"
                },
                "stealability": {
                    "value": 8,
                    "label": "Stealability",
                    "note": "Every digital PR firm does this"
                }
            },
            "tags": [
                "outer-tier",
                "managed-service"
            ],
            "estimate": "Per-engagement; offered as add-on",
            "requirements": "Outreach infrastructure (CRM, mail). AI citation source map for targeting. PR or content team.",
            "depends_on": [
                "ai-citation-source-map"
            ],
            "seen_in": [
                "Muck Rack",
                "Help A B2B Writer",
                "Featured",
                "Digital PR agencies"
            ]
        },
        {
            "id": "review-ops",
            "name": "Review / listing ops",
            "band": "hard",
            "lean": "both",
            "tiers": [
                "outer"
            ],
            "state": "gap",
            "notes": "response workflows, listing sync",
            "bundles": [
                "GEO Suite"
            ],
            "description": "Workflow for managing the review surface — scheduled prompts to happy customers (post-purchase, post-resolution), templated responses with brand-tone tuning, listing-sync agent for keeping the same NAP across directories, sentiment alerting for crisis signals.\n\nPaired with the review landscape audit.",
            "metrics": {
                "ease": {
                    "value": 5,
                    "label": "Ease of implementation",
                    "note": "Per-platform integrations are the work"
                },
                "prevalence": {
                    "value": 7,
                    "label": "Industry prevalence",
                    "note": "Local + B2B SaaS particularly"
                },
                "stealability": {
                    "value": 8,
                    "label": "Stealability",
                    "note": "Birdeye, Reputation.com, Podium all do this"
                }
            },
            "tags": [
                "outer-tier",
                "workflow",
                "cross-platform"
            ],
            "estimate": "~10–12 weeks for v1 covering top 5 platforms",
            "requirements": "Review-landscape audit. Per-platform writeable APIs (GBP, Trustpilot, etc.). Tone-tuning prompts.",
            "depends_on": [
                "review-landscape"
            ],
            "seen_in": [
                "Birdeye",
                "Reputation.com",
                "Podium",
                "GatherUp"
            ]
        },
        {
            "id": "pr-reviews",
            "name": "PR & reviews",
            "band": "soft",
            "lean": "geo",
            "tiers": [
                "outer"
            ],
            "notes": "earned mentions in trusted sources",
            "bundles": [
                "GEO Suite"
            ],
            "description": "Earned mentions in trusted third-party sources. Identifies the publications, blogs, podcasts, and review sites that the AI citation source map shows are influencing the brand's visibility. Generates pitch drafts and tracks placements.\n\nGuard-railed against spam — Muck Rack-style volume warnings.",
            "metrics": {
                "ease": {
                    "value": 5,
                    "label": "Ease of implementation",
                    "note": "Pitch generation is LLM; the targeting layer is the work"
                },
                "prevalence": {
                    "value": 7,
                    "label": "Industry prevalence",
                    "note": "Almost every brand wants more earned mentions"
                },
                "stealability": {
                    "value": 7,
                    "label": "Stealability",
                    "note": "Plenty of PR tools to learn from"
                }
            },
            "tags": [
                "outer-tier",
                "earned-media"
            ],
            "estimate": "~6–8 weeks for v1",
            "requirements": "AI citation source map. LLM for pitch generation. Per-publication outreach hygiene rules.",
            "depends_on": [
                "ai-citation-source-map"
            ],
            "seen_in": [
                "Muck Rack",
                "Help A Reporter Out (HARO)",
                "Featured"
            ]
        },
        {
            "id": "reply-mgmt",
            "name": "Reply mgmt",
            "band": "soft",
            "lean": "geo",
            "tiers": [
                "outer",
                "inner"
            ],
            "notes": "respond to reviews and Q&A",
            "bundles": [
                "GEO Suite"
            ],
            "description": "Inner-tier reply workflow — direct in-platform responses to reviews, AMAs on Reddit, comments on YouTube. Tone-tuned to brand voice. Volume-limited so customers don't trip platform anti-spam.\n\nMore intrusive than pr-reviews because it speaks AS the brand; needs explicit per-platform sign-off.",
            "metrics": {
                "ease": {
                    "value": 6,
                    "label": "Ease of implementation",
                    "note": "Per-platform writeable APIs; tone tuning per brand"
                },
                "prevalence": {
                    "value": 6,
                    "label": "Industry prevalence",
                    "note": "B2C and local more than B2B"
                },
                "stealability": {
                    "value": 7,
                    "label": "Stealability",
                    "note": "Every social management tool has reply features"
                }
            },
            "tags": [
                "inner-tier",
                "platform-writeable"
            ],
            "estimate": "~8–10 weeks for v1",
            "requirements": "Per-platform credentials with reply scope. Brand voice prompt. Volume guardrails.",
            "depends_on": [
                "review-landscape"
            ],
            "seen_in": [
                "Sprout Social",
                "Hootsuite",
                "Birdeye Engage"
            ]
        },
        {
            "id": "owned-channel",
            "name": "Owned channel",
            "band": "soft",
            "lean": "geo",
            "tiers": [
                "outer",
                "inner",
                "custom"
            ],
            "notes": "build a channel we control",
            "bundles": [
                "GEO Suite"
            ],
            "description": "Builds an owned community for the brand — a forum, a Slack/Discord, a dedicated subreddit, a podcast. Custom-tier because it requires sustained effort and brand commitment. Long-term moat against competitors and against AI sentiment drift (because the brand controls the corpus).",
            "metrics": {
                "ease": {
                    "value": 3,
                    "label": "Ease of implementation",
                    "note": "Mostly people effort, not engineering"
                },
                "prevalence": {
                    "value": 5,
                    "label": "Industry prevalence",
                    "note": "Brands with strong identity and budget"
                },
                "stealability": {
                    "value": 6,
                    "label": "Stealability",
                    "note": "Every community-led brand has done some version"
                }
            },
            "tags": [
                "custom-tier",
                "long-game",
                "people-led"
            ],
            "estimate": "Per-engagement; multi-month ramp",
            "requirements": "Community manager. Platform choice. Content engine. Long-term commitment.",
            "depends_on": [],
            "seen_in": [
                "Notion community",
                "Webflow forums",
                "Cloudflare community"
            ]
        },
        {
            "id": "narrative-ops",
            "name": "Narrative ops",
            "band": "soft",
            "lean": "geo",
            "tiers": [
                "outer"
            ],
            "notes": "consistent positioning across surfaces",
            "bundles": [
                "GEO Suite"
            ],
            "description": "Keeps the brand's positioning consistent across every surface that AI sees — site, social, third-party press, knowledge graph entries. Cross-surface drift is a primary cause of AI mis-describing the brand; this fix actively reconciles.\n\nPaired with narrative-mapping.",
            "metrics": {
                "ease": {
                    "value": 5,
                    "label": "Ease of implementation",
                    "note": "Cross-surface monitoring + diff alerting + correction workflow"
                },
                "prevalence": {
                    "value": 6,
                    "label": "Industry prevalence",
                    "note": "Brands with multi-surface presence"
                },
                "stealability": {
                    "value": 4,
                    "label": "Stealability",
                    "note": "Few productize cross-surface narrative consistency"
                }
            },
            "tags": [
                "outer-tier",
                "differentiator"
            ],
            "estimate": "~8–10 weeks for v1",
            "requirements": "Narrative-mapping audit. Cross-surface scrapers. Drift detection.",
            "depends_on": [
                "narrative-mapping"
            ],
            "seen_in": [
                "Brandwatch (loose analogue)",
                "Sprinklr"
            ]
        },
        {
            "id": "editorial-cal",
            "name": "Editorial calendar",
            "band": "soft",
            "lean": "geo",
            "tiers": [
                "outer",
                "inner"
            ],
            "notes": "scheduled content ops",
            "bundles": [
                "GEO Suite"
            ],
            "description": "Shared editorial calendar that pulls from the audit pipeline — what topics need depth (story-doesnt-stick), what surfaces need fresh content (visibility-search drops), what narratives need reinforcing (narrative-mapping). Plus reminders for evergreen pieces to re-publish.\n\nInner-tier because it integrates with the customer's CMS to schedule posts.",
            "metrics": {
                "ease": {
                    "value": 6,
                    "label": "Ease of implementation",
                    "note": "Standard editorial tooling + audit-driven recommendations"
                },
                "prevalence": {
                    "value": 7,
                    "label": "Industry prevalence",
                    "note": "Any brand with a content team"
                },
                "stealability": {
                    "value": 8,
                    "label": "Stealability",
                    "note": "Airtable, Notion, ContentCal — all done"
                }
            },
            "tags": [
                "inner-tier",
                "workflow"
            ],
            "estimate": "~6–8 weeks for v1",
            "requirements": "CMS integrations (WP, Webflow, Sanity, Contentful). Audit pipeline outputs.",
            "depends_on": [],
            "seen_in": [
                "Airtable",
                "Notion",
                "CoSchedule",
                "StoryChief"
            ]
        },
        {
            "id": "brand-site",
            "name": "Brand site",
            "band": "soft",
            "lean": "geo",
            "tiers": [
                "custom"
            ],
            "notes": "rebuild the brand surface",
            "bundles": [
                "GEO Suite"
            ],
            "description": "Custom-tier rebuild of the brand's primary marketing site, with audit findings baked in from day one and ongoing maintenance under our control. Often the right call when the existing site is so weak that outer-tier fixes can't compensate.",
            "metrics": {
                "ease": {
                    "value": 3,
                    "label": "Ease of implementation",
                    "note": "Real engineering project per customer"
                },
                "prevalence": {
                    "value": 4,
                    "label": "Industry prevalence",
                    "note": "High-value customers only"
                },
                "stealability": {
                    "value": 8,
                    "label": "Stealability",
                    "note": "Every web agency does this"
                }
            },
            "tags": [
                "custom-tier",
                "high-touch"
            ],
            "estimate": "Project-based",
            "requirements": "Engineering, design, content. Hosting infra. Same as site-build.",
            "depends_on": [],
            "seen_in": [
                "Every digital agency"
            ]
        },
        {
            "id": "ad-copy-gen",
            "name": "Ad copy gen",
            "band": "soft",
            "lean": "seo",
            "tiers": [
                "outer"
            ],
            "notes": "AI-generated ad variants",
            "bundles": [
                "GEO Suite"
            ],
            "description": "AI-generated ad copy variants for Google, Meta, LinkedIn, with structured ABC testing and feedback loops to whichever variants convert. Driven by the audit pipeline so the copy reflects what AI actually responds to about the brand.",
            "metrics": {
                "ease": {
                    "value": 7,
                    "label": "Ease of implementation",
                    "note": "LLM generation + ad platform APIs"
                },
                "prevalence": {
                    "value": 7,
                    "label": "Industry prevalence",
                    "note": "Anyone running paid"
                },
                "stealability": {
                    "value": 8,
                    "label": "Stealability",
                    "note": "Every AI copywriting tool does this"
                }
            },
            "tags": [
                "outer-tier",
                "paid-channels"
            ],
            "estimate": "~6–8 weeks for v1",
            "requirements": "Ad platform API access (Google Ads, Meta, LinkedIn). LLM. Conversion tracking.",
            "depends_on": [],
            "seen_in": [
                "Jasper",
                "Anyword",
                "Copy.ai",
                "Smartly.io"
            ]
        },
        {
            "id": "landing-pages",
            "name": "Landing pages",
            "band": "soft",
            "lean": "seo",
            "tiers": [
                "outer",
                "inner"
            ],
            "notes": "targeted intent landings",
            "bundles": [
                "GEO Suite"
            ],
            "description": "Inner-tier — drops a few targeted landing pages onto the customer's site, optimised for specific intents that the audit pipeline identifies as opportunities. Schema-rich, llms.txt-aware, ready to convert.",
            "metrics": {
                "ease": {
                    "value": 6,
                    "label": "Ease of implementation",
                    "note": "Templating + per-CMS integration"
                },
                "prevalence": {
                    "value": 8,
                    "label": "Industry prevalence",
                    "note": "Almost everyone needs more landing pages"
                },
                "stealability": {
                    "value": 8,
                    "label": "Stealability",
                    "note": "Unbounce, Instapage, Webflow"
                }
            },
            "tags": [
                "inner-tier",
                "conversion"
            ],
            "estimate": "~8–10 weeks for v1",
            "requirements": "Per-CMS deploy paths. Landing-page template library. A/B framework.",
            "depends_on": [],
            "seen_in": [
                "Unbounce",
                "Instapage",
                "Webflow",
                "Carrd"
            ]
        },
        {
            "id": "funnel-design",
            "name": "Funnel design",
            "band": "soft",
            "lean": "seo",
            "tiers": [
                "custom"
            ],
            "notes": "end-to-end conversion funnel",
            "bundles": [
                "GEO Suite"
            ],
            "description": "Custom-tier conversion funnel work — landing pages, email sequences, retargeting flows, attribution. Sold as a project; not a SaaS product line. Typically the next step for customers we already build sites for.",
            "metrics": {
                "ease": {
                    "value": 4,
                    "label": "Ease of implementation",
                    "note": "Conversion analysis + creative + execution"
                },
                "prevalence": {
                    "value": 5,
                    "label": "Industry prevalence",
                    "note": "High-value customers"
                },
                "stealability": {
                    "value": 7,
                    "label": "Stealability",
                    "note": "Every CRO agency does this"
                }
            },
            "tags": [
                "custom-tier",
                "high-touch"
            ],
            "estimate": "Project-based",
            "requirements": "CRO expertise, design, dev capacity, analytics setup.",
            "depends_on": [],
            "seen_in": [
                "ConversionXL",
                "Speero",
                "Conversion Rate Experts"
            ]
        },
        {
            "id": "keyword-strat",
            "name": "Keyword strategy",
            "band": "soft",
            "lean": "seo",
            "tiers": [
                "outer"
            ],
            "notes": "what to rank for and why",
            "bundles": [
                "GEO Suite"
            ],
            "description": "Strategic keyword set covering both classic SERP intent and AI prompt patterns. The substrate for every audit, scoring, and content fix downstream — wrong keyword set, wrong everything.\n\nProductised version of what every SEO consultant delivers in week one.",
            "metrics": {
                "ease": {
                    "value": 7,
                    "label": "Ease of implementation",
                    "note": "Keyword research APIs + AI prompt patterns"
                },
                "prevalence": {
                    "value": 9,
                    "label": "Industry prevalence",
                    "note": "Foundational for any SEO/GEO engagement"
                },
                "stealability": {
                    "value": 9,
                    "label": "Stealability",
                    "note": "Ahrefs, Semrush, Moz — fully commoditised"
                }
            },
            "tags": [
                "outer-tier",
                "foundational"
            ],
            "estimate": "~3–4 weeks for v1",
            "requirements": "DataForSEO or Ahrefs API. Per-industry templates. AI prompt-pattern library.",
            "depends_on": [],
            "seen_in": [
                "Ahrefs",
                "Semrush",
                "Moz",
                "KeywordsEverywhere"
            ]
        },
        {
            "id": "content-plan",
            "name": "Content plan",
            "band": "soft",
            "lean": "seo",
            "tiers": [
                "outer",
                "inner"
            ],
            "notes": "content map aligned to keywords",
            "bundles": [
                "GEO Suite"
            ],
            "description": "Inner-tier — turns the keyword strategy and audit findings into a concrete content plan. What pages to write, in what order, with what structure, against what prompts. Plugs into the editorial calendar.",
            "metrics": {
                "ease": {
                    "value": 7,
                    "label": "Ease of implementation",
                    "note": "Synthesis layer over existing audit + keyword outputs"
                },
                "prevalence": {
                    "value": 8,
                    "label": "Industry prevalence",
                    "note": "Standard for content-led brands"
                },
                "stealability": {
                    "value": 7,
                    "label": "Stealability",
                    "note": "Every content tool does this"
                }
            },
            "tags": [
                "inner-tier",
                "planning"
            ],
            "estimate": "~4–6 weeks for v1",
            "requirements": "Keyword strategy. Audit pipeline. Output spec aligned with editorial calendar format.",
            "depends_on": [
                "keyword-strat"
            ],
            "seen_in": [
                "Frase",
                "MarketMuse",
                "Clearscope",
                "Surfer"
            ]
        },
        {
            "id": "topic-authority",
            "name": "Topic authority",
            "band": "soft",
            "lean": "seo",
            "tiers": [
                "outer",
                "inner",
                "custom"
            ],
            "notes": "long-form depth across the cluster",
            "bundles": [
                "GEO Suite"
            ],
            "description": "Custom-tier long-form content depth — multi-page topic clusters, original research, signature pieces designed to be cited. The fix that addresses 'story doesn't stick' at root: be the place AI quotes when answering category-level questions.",
            "metrics": {
                "ease": {
                    "value": 4,
                    "label": "Ease of implementation",
                    "note": "Real content production; not standardisable"
                },
                "prevalence": {
                    "value": 6,
                    "label": "Industry prevalence",
                    "note": "Brands with content ambition"
                },
                "stealability": {
                    "value": 6,
                    "label": "Stealability",
                    "note": "Every content marketing agency does some version"
                }
            },
            "tags": [
                "custom-tier",
                "long-game"
            ],
            "estimate": "Per-engagement; multi-month",
            "requirements": "Content team. Subject expertise. Original research budget.",
            "depends_on": [
                "content-plan"
            ],
            "seen_in": [
                "Foundation Inc.",
                "Animalz",
                "Grow and Convert"
            ]
        },
        {
            "id": "ai-bot-allowlist-fix",
            "name": "AI bot allowlist fix",
            "band": "hard",
            "lean": "geo",
            "tiers": [
                "outer"
            ],
            "notes": "one-click Cloudflare worker / robots.txt fix for blocked AI bots",
            "description": "Pairs with the AI bot access audit. When the audit detects a block, this fix generates a ready-to-deploy Cloudflare Worker (or Vercel edge function, or robots.txt patch) that allowlists the major retrieval bots while leaving training-bot policy under the customer's control.\n\nDistinguishes retrieval bots (must allow for visibility) from training bots (customer's choice — content licensing question). Outputs Sloth-style worker code or a plain robots.txt diff.\n\nThe defining outer-tier GEO fix. Should be the first fix prospects see in the funnel.",
            "tags": [
                "self-serve",
                "outer-tier",
                "lead-gen",
                "high-leverage"
            ],
            "bundles": [
                "MarkupSchema",
                "GEO Suite"
            ],
            "metrics": {
                "ease": {
                    "value": 8,
                    "label": "Ease of implementation",
                    "note": "Code generation only; deployment is the customer's lift"
                },
                "prevalence": {
                    "value": 9,
                    "label": "Industry prevalence",
                    "note": "~27% of B2B sites need this"
                },
                "stealability": {
                    "value": 8,
                    "label": "Stealability",
                    "note": "Sloth.cloud and edgeseo.pro have published the patterns"
                }
            },
            "owner": "unassigned",
            "estimate": "~3–4 weeks alongside the AI bot access audit",
            "requirements": "AI bot access audit must run first. Cloudflare Worker template. Vercel edge function template. robots.txt patch generator.",
            "depends_on": [
                "ai-bot-access-audit"
            ],
            "seen_in": [
                "Sloth.cloud",
                "edgeseo.pro",
                "Cloudflare AI Crawl Control"
            ]
        },
        {
            "id": "ai-bot-mirror",
            "name": "AI bot mirror (edge)",
            "band": "hard",
            "lean": "geo",
            "tiers": [
                "outer"
            ],
            "state": "gap",
            "notes": "static, schema-rich shadow site served only to AI bots",
            "description": "MOONSHOT. A Cloudflare Worker (or Fastly Compute, Vercel edge function) that detects retrieval-bot user-agents (verified by IP) and serves a parallel, statically-rendered, schema-rich, llms-full.txt-anchored Markdown view of the customer's site. Human visitors and Googlebot see the original site untouched.\n\nThe mirror is regenerated continuously by the GPU pool from the customer's primary content. For JS-heavy sites (Shopify, Webflow, dynamic Next.js apps) this is the difference between zero AI visibility and full AI visibility — and the customer never has to touch their main codebase.\n\nThis is the logical end-state of the outer tier: full AI optimization with literally no site access. Nothing in the market does this today.",
            "tags": [
                "moonshot",
                "differentiator",
                "GPU-leverageable",
                "outer-tier"
            ],
            "bundles": [
                "GEO Suite"
            ],
            "metrics": {
                "ease": {
                    "value": 2,
                    "label": "Ease of implementation",
                    "note": "Hard — content extraction + continuous regen + edge routing + bot verification"
                },
                "prevalence": {
                    "value": 7,
                    "label": "Industry prevalence",
                    "note": "Massive value for any JS-heavy site"
                },
                "stealability": {
                    "value": 2,
                    "label": "Stealability",
                    "note": "Nobody is doing this — no reference implementation"
                }
            },
            "owner": "unassigned",
            "estimate": "12–18 months; flagship moonshot",
            "requirements": "Scraper. GPU pool. Edge worker deploy pipeline. Content-to-Markdown transformer. Schema graph. Bot identity verifier (IP + reverse DNS). May need to provide R2-hosted mirrors that the worker proxies to.",
            "depends_on": [
                "scraper-infra",
                "gpu-pool",
                "entity-graph"
            ],
            "seen_in": [
                "Nobody — would be category-defining"
            ]
        },
        {
            "id": "entity-graph-build",
            "name": "Entity graph build",
            "band": "hard",
            "lean": "geo",
            "tiers": [
                "outer"
            ],
            "notes": "continuous Organization + Person + Product + sameAs graph",
            "description": "Generalises MarkupSchema from per-page JSON-LD into a continuously-maintained entity graph for the brand. Links Organization → Person (founders, leadership) → Product → FAQ → Article → sameAs (Wikidata Q-ID, LinkedIn, Crunchbase, Companies House, SRA, etc.).\n\nVersioned, so customers can roll back. Served from the GPU pool, deployable as JSON-LD via the existing MarkupSchema delivery channel — no new infra needed on the customer side.\n\nBecomes the identity layer of the super audit and the substrate for the Wikidata audit, the AI bot mirror, and citation attribution.",
            "tags": [
                "differentiator",
                "GPU-leverageable",
                "outer-tier",
                "platform-foundation"
            ],
            "bundles": [
                "MarkupSchema",
                "GEO Suite"
            ],
            "metrics": {
                "ease": {
                    "value": 5,
                    "label": "Ease of implementation",
                    "note": "Extends MarkupSchema; harder than per-page JSON-LD because of cross-page consistency"
                },
                "prevalence": {
                    "value": 8,
                    "label": "Industry prevalence",
                    "note": "Every brand benefits"
                },
                "stealability": {
                    "value": 5,
                    "label": "Stealability",
                    "note": "Schema App and WordLift have ontology products; this would be more automatic"
                }
            },
            "owner": "unassigned",
            "estimate": "~12–16 weeks for v1",
            "requirements": "MarkupSchema infra. Scraper. GPU pool. Wikidata API. Versioned storage (R2 + index).",
            "depends_on": [
                "markupschema",
                "scraper-infra",
                "gpu-pool"
            ],
            "seen_in": [
                "Schema App Entity Hub",
                "WordLift",
                "InLinks"
            ]
        },
        {
            "id": "llms-txt-generate",
            "name": "llms.txt generate",
            "band": "hard",
            "lean": "geo",
            "tiers": [
                "outer"
            ],
            "notes": "GPU-generated llms.txt + llms-full.txt, regenerated continuously",
            "description": "Generates and continuously refreshes llms.txt and llms-full.txt for the customer's site, driven by the entity graph rather than a one-time crawl. Hosted on Baseline's CDN with a CNAME or served back via the MarkupSchema delivery channel for sites without enough access to host it themselves.\n\nDifferentiated from the bunch of one-shot generators (Firecrawl, dotenv's llmstxt CLI, WP plugins) by being entity-aware and continuously-regenerated. Pairs with the llms.txt audit and the entity graph build.",
            "tags": [
                "self-serve",
                "outer-tier",
                "GPU-leverageable"
            ],
            "bundles": [
                "GEO Suite"
            ],
            "metrics": {
                "ease": {
                    "value": 7,
                    "label": "Ease of implementation",
                    "note": "Generation is straightforward; continuous regen is the lift"
                },
                "prevalence": {
                    "value": 9,
                    "label": "Industry prevalence",
                    "note": "Almost no sites have it"
                },
                "stealability": {
                    "value": 8,
                    "label": "Stealability",
                    "note": "Many one-shot tools to learn from"
                }
            },
            "owner": "unassigned",
            "estimate": "~6–8 weeks for v1",
            "requirements": "Scraper. GPU pool. R2 hosting. CDN delivery. Optional CNAME setup for customers who want to serve from their own domain.",
            "depends_on": [
                "scraper-infra",
                "gpu-pool",
                "entity-graph-build"
            ],
            "seen_in": [
                "Mintlify (auto, docs only)",
                "Firecrawl",
                "dotenv llmstxt CLI",
                "Website LLMs.txt WP plugin",
                "GitBook",
                "Fern"
            ]
        },
        {
            "id": "wikidata-edit-brief",
            "name": "Wikidata edit brief",
            "band": "hard",
            "lean": "geo",
            "tiers": [
                "outer"
            ],
            "notes": "produce a Wikidata/Wikipedia edit brief, not auto-edit",
            "description": "Pairs with the Wikidata audit. Produces a structured edit brief identifying claims to add, properties to update, and sameAs links to establish — formatted for a contracted Wikipedia editor or for the customer's own use with proper conflict-of-interest disclosure.\n\nNever auto-edits. Paid editing violates Wikimedia's terms of use; the product line is assistive only. This is the compliance-aware way to productize a service category that's been pure consultancy until now.",
            "tags": [
                "differentiator",
                "compliance-aware",
                "outer-tier"
            ],
            "bundles": [
                "GEO Suite"
            ],
            "metrics": {
                "ease": {
                    "value": 7,
                    "label": "Ease of implementation",
                    "note": "Mostly templating once the audit runs"
                },
                "prevalence": {
                    "value": 5,
                    "label": "Industry prevalence",
                    "note": "Mid-market brands; less relevant for tiny brands"
                },
                "stealability": {
                    "value": 4,
                    "label": "Stealability",
                    "note": "Services exist; productized version doesn't"
                }
            },
            "owner": "unassigned",
            "estimate": "~3–4 weeks once Wikidata audit ships",
            "requirements": "Wikidata audit must run first. Brief template. Conflict-of-interest disclosure templates.",
            "depends_on": [
                "wikidata-audit"
            ],
            "seen_in": [
                "Kalicube",
                "Reputation X",
                "Wikibusines (services)"
            ]
        },
        {
            "id": "reddit-engagement",
            "name": "Reddit / YouTube engagement plan",
            "band": "soft",
            "lean": "geo",
            "tiers": [
                "outer"
            ],
            "notes": "where to engage in third-party communities AI is citing",
            "description": "Pairs with the AI citation source map. For each high-citation third-party surface (subreddit, YouTube channel, niche forum), produces an engagement recommendation: which threads to join, what content to publish, what voice to use, what NOT to do.\n\nGuard-railed — Muck Rack's per-pitch volume warnings are the right model. Stops customers from spamming and getting banned, which would actively harm AI visibility.",
            "tags": [
                "differentiator",
                "third-party-aware",
                "outer-tier"
            ],
            "bundles": [
                "GEO Suite"
            ],
            "metrics": {
                "ease": {
                    "value": 6,
                    "label": "Ease of implementation",
                    "note": "Engagement recommendations are mostly LLM-generated; guardrails are the work"
                },
                "prevalence": {
                    "value": 7,
                    "label": "Industry prevalence",
                    "note": "Almost every brand could benefit"
                },
                "stealability": {
                    "value": 5,
                    "label": "Stealability",
                    "note": "Brand24/Mention recommend monitoring; engagement-targeting is rarer"
                }
            },
            "owner": "unassigned",
            "estimate": "~6–8 weeks for v1",
            "requirements": "AI citation source map must run first. LLM for recommendation generation. Guardrail rules per platform.",
            "depends_on": [
                "ai-citation-source-map"
            ],
            "seen_in": [
                "Brand24 alerts",
                "Muck Rack pitch tools",
                "Sprinklr"
            ]
        }
    ],
    "_doc_mappings": "Edges in the canonical flow problem→audit→fix→outcome. strength = primary | secondary. Page draws primary thicker. Order doesn't matter; the auto-layout reorders to minimise crossings.",
    "mappings": [
        {
            "from": "site-not-machine",
            "to": "schema-audit",
            "strength": "primary"
        },
        {
            "from": "site-not-machine",
            "to": "metadata-health",
            "strength": "primary"
        },
        {
            "from": "site-not-machine",
            "to": "tech-seo-crawl",
            "strength": "primary"
        },
        {
            "from": "ai-cant-find",
            "to": "ai-citation",
            "strength": "primary"
        },
        {
            "from": "ai-cant-find",
            "to": "entity-graph",
            "strength": "primary"
        },
        {
            "from": "reputation-drift",
            "to": "review-landscape",
            "strength": "primary"
        },
        {
            "from": "ai-wrong-brand",
            "to": "brand-scan",
            "strength": "primary"
        },
        {
            "from": "ai-wrong-brand",
            "to": "ai-summary",
            "strength": "primary"
        },
        {
            "from": "competitors-air",
            "to": "share-of-voice",
            "strength": "primary"
        },
        {
            "from": "competitors-air",
            "to": "competitor-recommend",
            "strength": "primary"
        },
        {
            "from": "story-doesnt-stick",
            "to": "narrative-mapping",
            "strength": "primary"
        },
        {
            "from": "story-doesnt-stick",
            "to": "semantic-presence",
            "strength": "primary"
        },
        {
            "from": "schema-audit",
            "to": "markupschema",
            "strength": "primary"
        },
        {
            "from": "schema-audit",
            "to": "wp-plugin",
            "strength": "secondary"
        },
        {
            "from": "schema-audit",
            "to": "cf-worker",
            "strength": "secondary"
        },
        {
            "from": "metadata-health",
            "to": "ssh-agent",
            "strength": "primary"
        },
        {
            "from": "metadata-health",
            "to": "mcp-code-agent",
            "strength": "secondary"
        },
        {
            "from": "tech-seo-crawl",
            "to": "mcp-code-agent",
            "strength": "primary"
        },
        {
            "from": "ai-citation",
            "to": "llms-txt-push",
            "strength": "primary"
        },
        {
            "from": "ai-citation",
            "to": "site-llms-txt",
            "strength": "secondary"
        },
        {
            "from": "ai-citation",
            "to": "ai-first-arch",
            "strength": "secondary"
        },
        {
            "from": "review-landscape",
            "to": "review-ops",
            "strength": "primary"
        },
        {
            "from": "entity-graph",
            "to": "citation-build",
            "strength": "secondary"
        },
        {
            "from": "entity-graph",
            "to": "ai-first-arch",
            "strength": "secondary"
        },
        {
            "from": "brand-scan",
            "to": "pr-reviews",
            "strength": "primary"
        },
        {
            "from": "brand-scan",
            "to": "reply-mgmt",
            "strength": "secondary"
        },
        {
            "from": "ai-summary",
            "to": "narrative-ops",
            "strength": "primary"
        },
        {
            "from": "ai-summary",
            "to": "editorial-cal",
            "strength": "secondary"
        },
        {
            "from": "share-of-voice",
            "to": "ad-copy-gen",
            "strength": "primary"
        },
        {
            "from": "share-of-voice",
            "to": "landing-pages",
            "strength": "secondary"
        },
        {
            "from": "share-of-voice",
            "to": "funnel-design",
            "strength": "secondary"
        },
        {
            "from": "narrative-mapping",
            "to": "keyword-strat",
            "strength": "primary"
        },
        {
            "from": "narrative-mapping",
            "to": "content-plan",
            "strength": "secondary"
        },
        {
            "from": "narrative-mapping",
            "to": "topic-authority",
            "strength": "secondary"
        },
        {
            "from": "competitor-recommend",
            "to": "narrative-ops",
            "strength": "primary"
        },
        {
            "from": "competitor-recommend",
            "to": "owned-channel",
            "strength": "secondary"
        },
        {
            "from": "semantic-presence",
            "to": "topic-authority",
            "strength": "primary"
        },
        {
            "from": "semantic-presence",
            "to": "brand-site",
            "strength": "secondary"
        },
        {
            "from": "markupschema",
            "to": "rank",
            "strength": "primary"
        },
        {
            "from": "markupschema",
            "to": "traffic",
            "strength": "secondary"
        },
        {
            "from": "ssh-agent",
            "to": "rank",
            "strength": "primary"
        },
        {
            "from": "mcp-code-agent",
            "to": "rank",
            "strength": "primary"
        },
        {
            "from": "wp-plugin",
            "to": "rank",
            "strength": "primary"
        },
        {
            "from": "cf-worker",
            "to": "traffic",
            "strength": "primary"
        },
        {
            "from": "llms-txt-push",
            "to": "traffic",
            "strength": "primary"
        },
        {
            "from": "site-llms-txt",
            "to": "traffic",
            "strength": "primary"
        },
        {
            "from": "ai-first-arch",
            "to": "traffic",
            "strength": "primary"
        },
        {
            "from": "citation-build",
            "to": "rank",
            "strength": "primary"
        },
        {
            "from": "review-ops",
            "to": "rank",
            "strength": "primary"
        },
        {
            "from": "pr-reviews",
            "to": "reputation",
            "strength": "primary"
        },
        {
            "from": "reply-mgmt",
            "to": "reputation",
            "strength": "primary"
        },
        {
            "from": "owned-channel",
            "to": "reputation",
            "strength": "primary"
        },
        {
            "from": "narrative-ops",
            "to": "reputation",
            "strength": "primary"
        },
        {
            "from": "editorial-cal",
            "to": "reputation",
            "strength": "primary"
        },
        {
            "from": "brand-site",
            "to": "reputation",
            "strength": "secondary"
        },
        {
            "from": "brand-site",
            "to": "conversion",
            "strength": "primary"
        },
        {
            "from": "ad-copy-gen",
            "to": "conversion",
            "strength": "primary"
        },
        {
            "from": "landing-pages",
            "to": "conversion",
            "strength": "primary"
        },
        {
            "from": "funnel-design",
            "to": "conversion",
            "strength": "primary"
        },
        {
            "from": "keyword-strat",
            "to": "conversion",
            "strength": "secondary"
        },
        {
            "from": "content-plan",
            "to": "reputation",
            "strength": "secondary"
        },
        {
            "from": "topic-authority",
            "to": "reputation",
            "strength": "primary"
        },
        {
            "from": "ai-bots-blocked",
            "to": "ai-bot-access-audit",
            "strength": "primary"
        },
        {
            "from": "ai-bots-blocked",
            "to": "agent-analytics",
            "strength": "secondary"
        },
        {
            "from": "no-machine-corpus",
            "to": "llms-txt-audit",
            "strength": "primary"
        },
        {
            "from": "no-machine-corpus",
            "to": "schema-audit",
            "strength": "secondary"
        },
        {
            "from": "ai-cant-find",
            "to": "visibility-search",
            "strength": "primary"
        },
        {
            "from": "ai-cant-find",
            "to": "ai-bot-access-audit",
            "strength": "secondary"
        },
        {
            "from": "ai-cant-find",
            "to": "agent-analytics",
            "strength": "primary"
        },
        {
            "from": "ai-cant-find",
            "to": "llms-txt-audit",
            "strength": "secondary"
        },
        {
            "from": "ai-cant-find",
            "to": "wikidata-audit",
            "strength": "primary"
        },
        {
            "from": "ai-wrong-brand",
            "to": "citation-attribution",
            "strength": "primary"
        },
        {
            "from": "ai-wrong-brand",
            "to": "ai-citation-source-map",
            "strength": "primary"
        },
        {
            "from": "competitors-air",
            "to": "visibility-search",
            "strength": "secondary"
        },
        {
            "from": "competitors-air",
            "to": "ai-citation-source-map",
            "strength": "secondary"
        },
        {
            "from": "story-doesnt-stick",
            "to": "geo-content-score",
            "strength": "primary"
        },
        {
            "from": "reputation-drift",
            "to": "ai-citation-source-map",
            "strength": "secondary"
        },
        {
            "from": "ai-bot-access-audit",
            "to": "ai-bot-allowlist-fix",
            "strength": "primary"
        },
        {
            "from": "agent-analytics",
            "to": "ai-bot-allowlist-fix",
            "strength": "primary"
        },
        {
            "from": "agent-analytics",
            "to": "ai-bot-mirror",
            "strength": "secondary"
        },
        {
            "from": "citation-attribution",
            "to": "reddit-engagement",
            "strength": "secondary"
        },
        {
            "from": "llms-txt-audit",
            "to": "llms-txt-generate",
            "strength": "primary"
        },
        {
            "from": "llms-txt-audit",
            "to": "site-llms-txt",
            "strength": "secondary"
        },
        {
            "from": "ai-citation-source-map",
            "to": "reddit-engagement",
            "strength": "primary"
        },
        {
            "from": "ai-citation-source-map",
            "to": "narrative-ops",
            "strength": "secondary"
        },
        {
            "from": "wikidata-audit",
            "to": "wikidata-edit-brief",
            "strength": "primary"
        },
        {
            "from": "wikidata-audit",
            "to": "entity-graph-build",
            "strength": "secondary"
        },
        {
            "from": "entity-graph",
            "to": "entity-graph-build",
            "strength": "primary"
        },
        {
            "from": "schema-audit",
            "to": "entity-graph-build",
            "strength": "secondary"
        },
        {
            "from": "ai-bot-allowlist-fix",
            "to": "traffic",
            "strength": "primary"
        },
        {
            "from": "ai-bot-mirror",
            "to": "traffic",
            "strength": "primary"
        },
        {
            "from": "ai-bot-mirror",
            "to": "rank",
            "strength": "secondary"
        },
        {
            "from": "entity-graph-build",
            "to": "rank",
            "strength": "primary"
        },
        {
            "from": "entity-graph-build",
            "to": "reputation",
            "strength": "secondary"
        },
        {
            "from": "llms-txt-generate",
            "to": "traffic",
            "strength": "primary"
        },
        {
            "from": "wikidata-edit-brief",
            "to": "reputation",
            "strength": "primary"
        },
        {
            "from": "wikidata-edit-brief",
            "to": "rank",
            "strength": "secondary"
        },
        {
            "from": "reddit-engagement",
            "to": "reputation",
            "strength": "primary"
        },
        {
            "from": "reddit-engagement",
            "to": "traffic",
            "strength": "secondary"
        },
        {
            "from": "visibility-search",
            "to": "keyword-strat",
            "strength": "primary"
        },
        {
            "from": "visibility-search",
            "to": "content-plan",
            "strength": "secondary"
        },
        {
            "from": "visibility-search",
            "to": "ad-copy-gen",
            "strength": "secondary"
        },
        {
            "from": "geo-content-score",
            "to": "content-plan",
            "strength": "primary"
        },
        {
            "from": "geo-content-score",
            "to": "topic-authority",
            "strength": "secondary"
        },
        {
            "from": "geo-content-score",
            "to": "keyword-strat",
            "strength": "secondary"
        },
        {
            "from": "citation-attribution",
            "to": "content-plan",
            "strength": "primary"
        },
        {
            "from": "citation-attribution",
            "to": "topic-authority",
            "strength": "secondary"
        },
        {
            "from": "review-landscape",
            "to": "reply-mgmt",
            "strength": "secondary"
        },
        {
            "from": "tech-seo-crawl",
            "to": "ai-first-arch",
            "strength": "secondary"
        },
        {
            "from": "site-not-machine",
            "to": "site-pulse",
            "strength": "primary"
        },
        {
            "from": "no-machine-corpus",
            "to": "site-pulse",
            "strength": "primary"
        },
        {
            "from": "ai-bots-blocked",
            "to": "site-pulse",
            "strength": "secondary"
        },
        {
            "from": "site-pulse",
            "to": "markupschema",
            "strength": "secondary"
        },
        {
            "from": "site-pulse",
            "to": "llms-txt-generate",
            "strength": "secondary"
        }
    ],
    "_doc_parent_id": "parent_id (optional, on fixes only): when set, this fix is a child / delivery variant of the named parent fix. Children render indented under their parent and auto-ordering keeps them grouped. Parents can have their own mappings; children have their own too. Hover on a parent traces all children's chains as well."
}
