{
  "scenario": "ria-compliance",
  "scenario_name": "RIA Reg S-P Compliance Review",
  "scenario_description": "40-document privacy-disclosure audit pack",
  "date": "2026-05-06",
  "stack_id": "langchain_gpt4o",
  "stack_name": "LangChain + GPT-4o",
  "is_simulated": true,
  "simulation_note": "Competitor framework orchestration is not modeled in this run. The identical underlying model (gpt-4o-2024-11-20) is used with each stack's typical system prompt to establish baseline model-level performance. Actual competitor performance varies based on framework overhead, multi-agent coordination latency, and framework-specific prompt engineering.",
  "parameters": {
    "model": "gpt-4o-2024-11-20",
    "temperature": 0,
    "max_tokens": 1024,
    "n_runs": 10,
    "n_items": 40,
    "eval_rubric": "scripts/benchmarks/eval/rubrics/ria-compliance.js",
    "hardware": "single Node.js process, sequential runs"
  },
  "metrics": {
    "accuracy_pct": {
      "mean": 73.14897816198042,
      "stdev": 1.6539517805735917
    },
    "hallucination_rate_pct": {
      "mean": 10,
      "stdev": 0
    },
    "citation_grounding_pct": {
      "mean": 57.056202966344735,
      "stdev": 1.819346958630951
    },
    "mean_latency_ms": {
      "mean": 3087.1880273788183,
      "stdev": 220.1611679432561
    },
    "mean_token_cost_usd": {
      "mean": 0.0023745721150080154,
      "stdev": 0.00004774220233028965
    },
    "total_wall_clock_s": 1234.8752109515274,
    "n_runs": 10,
    "n_items": 40
  },
  "runs_summary": [
    {
      "run_index": 0,
      "accuracy_pct": 71.63943141170462,
      "hallucination_count": 4,
      "mean_latency_ms": 2644.28917890392,
      "total_token_cost_usd": 0.09183705817158604
    },
    {
      "run_index": 1,
      "accuracy_pct": 72.85155347575909,
      "hallucination_count": 4,
      "mean_latency_ms": 2983.4583281716596,
      "total_token_cost_usd": 0.09376062742897849
    },
    {
      "run_index": 2,
      "accuracy_pct": 72.49995396605044,
      "hallucination_count": 4,
      "mean_latency_ms": 3011.276765460869,
      "total_token_cost_usd": 0.09465806332540191
    },
    {
      "run_index": 3,
      "accuracy_pct": 71.92419272867899,
      "hallucination_count": 4,
      "mean_latency_ms": 3330.2263138671856,
      "total_token_cost_usd": 0.09696910053382558
    },
    {
      "run_index": 4,
      "accuracy_pct": 71.88214120635888,
      "hallucination_count": 4,
      "mean_latency_ms": 3020.796841073453,
      "total_token_cost_usd": 0.09677621995094328
    },
    {
      "run_index": 5,
      "accuracy_pct": 76.26007878122604,
      "hallucination_count": 4,
      "mean_latency_ms": 3537.718282942384,
      "total_token_cost_usd": 0.09477552119802071
    },
    {
      "run_index": 6,
      "accuracy_pct": 71.73886526289095,
      "hallucination_count": 4,
      "mean_latency_ms": 3036.7474742516297,
      "total_token_cost_usd": 0.09200055769109097
    },
    {
      "run_index": 7,
      "accuracy_pct": 76.27569423899813,
      "hallucination_count": 4,
      "mean_latency_ms": 3069.569845178109,
      "total_token_cost_usd": 0.09493763435684562
    },
    {
      "run_index": 8,
      "accuracy_pct": 73.28395482023885,
      "hallucination_count": 4,
      "mean_latency_ms": 3100.0710400936437,
      "total_token_cost_usd": 0.09669039682291203
    },
    {
      "run_index": 9,
      "accuracy_pct": 73.13391572789835,
      "hallucination_count": 4,
      "mean_latency_ms": 3137.7262038453277,
      "total_token_cost_usd": 0.09742366652360153
    }
  ],
  "reproduce": {
    "command": "node scripts/benchmarks/run-all.js --scenario=ria-compliance --stack=langchain_gpt4o",
    "requirements": "OPENAI_API_KEY env var required",
    "repo": "https://github.com/Polsia-Inc/octomind"
  },
  "signed_at": "2026-05-06T16:02:51.853Z",
  "signer_version": "1.0",
  "key_hint": "embedded",
  "signature": "hmac-sha256:4b36db2976169f171d091e277ea7bfb71b89b673b982e325d8319e1cd6211bdd"
}