{
  "apiVersion": "v1",
  "methodology": "https://sourcescore.org/methodology/",
  "canonical": "https://sourcescore.org/claims/428d754e7c651be6/",
  "claim": {
    "vertical": "ai-ml",
    "subject": "MMLU benchmark",
    "predicate": "introduced_in_paper",
    "object": "Measuring Massive Multitask Language Understanding (Hendrycks et al., 2020)",
    "confidence": 1,
    "sources": [
      {
        "url": "https://arxiv.org/abs/2009.03300",
        "title": "Measuring Massive Multitask Language Understanding",
        "publisher": "arXiv (Hendrycks et al.)",
        "publishedDate": "2020-09-07",
        "accessedDate": "2026-05-16",
        "type": "preprint",
        "excerpt": "We propose a new test to measure a text model's multitask accuracy. The test covers 57 tasks including elementary mathematics, US history, computer science, law, and more."
      },
      {
        "url": "https://openreview.net/forum?id=d7KBjmI3GmQ",
        "title": "Measuring Massive Multitask Language Understanding (ICLR 2021)",
        "publisher": "OpenReview / ICLR",
        "publishedDate": "2021-05-04",
        "accessedDate": "2026-05-16",
        "type": "peer-reviewed"
      }
    ],
    "publishedAt": "2026-05-16T00:00:00Z",
    "lastVerified": "2026-05-16",
    "methodologyVersion": "veritas-v0.1",
    "tags": [
      "mmlu",
      "benchmark",
      "hendrycks",
      "2020",
      "iclr",
      "evaluation"
    ],
    "id": "428d754e7c651be6",
    "statement": "MMLU benchmark introduced in paper: Measuring Massive Multitask Language Understanding (Hendrycks et al., 2020)."
  },
  "signature": {
    "algorithm": "HMAC-SHA256",
    "signedBy": "did:web:sourcescore.org",
    "signedAt": "2026-05-16T00:00:00.000Z",
    "signature": "c8776f336d93c9ce35bdcc4247d7d90b6f4a42ebc36c9670cf7d5797b8e87758"
  },
  "citedAs": "MMLU benchmark introduced in paper: Measuring Massive Multitask Language Understanding (Hendrycks et al., 2020). — SourceScore Claim 428d754e7c651be6 (verified 2026-05-16, signed c8776f33…). https://sourcescore.org/claims/428d754e7c651be6/"
}