{
  "apiVersion": "v1",
  "methodology": "https://sourcescore.org/methodology/",
  "canonical": "https://sourcescore.org/claims/494f2bf84f0e5dd2/",
  "claim": {
    "vertical": "ai-ml",
    "subject": "HELM",
    "predicate": "introduced_in_paper",
    "object": "Holistic Evaluation of Language Models (Liang et al., Stanford CRFM 2022-11-16)",
    "confidence": 1,
    "sources": [
      {
        "url": "https://arxiv.org/abs/2211.09110",
        "title": "Holistic Evaluation of Language Models",
        "publisher": "arXiv",
        "publishedDate": "2022-11-16",
        "accessedDate": "2026-05-16",
        "type": "preprint",
        "excerpt": "HELM evaluates 30 prominent language models on 42 scenarios, measuring 7 metrics (accuracy, calibration, robustness, fairness, bias, toxicity, efficiency) across each scenario."
      },
      {
        "url": "https://crfm.stanford.edu/helm/latest/",
        "title": "HELM — Stanford CRFM Leaderboard",
        "publisher": "Stanford CRFM",
        "publishedDate": "2022-11-16",
        "accessedDate": "2026-05-16",
        "type": "benchmark"
      }
    ],
    "publishedAt": "2026-05-16T00:00:00Z",
    "lastVerified": "2026-05-16",
    "methodologyVersion": "veritas-v0.1",
    "tags": [
      "helm",
      "stanford-crfm",
      "benchmark",
      "holistic-evaluation",
      "foundational",
      "2022"
    ],
    "id": "494f2bf84f0e5dd2",
    "statement": "HELM introduced in paper: Holistic Evaluation of Language Models (Liang et al., Stanford CRFM 2022-11-16)."
  },
  "signature": {
    "algorithm": "HMAC-SHA256",
    "signedBy": "did:web:sourcescore.org",
    "signedAt": "2026-05-19T00:00:00.000Z",
    "signature": "4e2ce13078f537eaa825dac3d4a572e37e7cb0bae23d66737de14a3f77c7cb52"
  },
  "citedAs": "HELM introduced in paper: Holistic Evaluation of Language Models (Liang et al., Stanford CRFM 2022-11-16). — SourceScore Claim 494f2bf84f0e5dd2 (verified 2026-05-16, signed 4e2ce130…). https://sourcescore.org/claims/494f2bf84f0e5dd2/"
}