{
  "apiVersion": "v1",
  "methodology": "https://sourcescore.org/methodology/",
  "canonical": "https://sourcescore.org/claims/2df92e0b0e4c891b/",
  "claim": {
    "vertical": "ai-ml",
    "subject": "MMLU-Pro benchmark",
    "predicate": "introduced_in_paper",
    "object": "MMLU-Pro: A More Robust and Challenging Multi-Task Language Understanding Benchmark (Wang et al., 2024)",
    "confidence": 0.92,
    "sources": [
      {
        "url": "https://arxiv.org/abs/2406.01574",
        "title": "MMLU-Pro: A More Robust and Challenging Multi-Task Language Understanding Benchmark",
        "publisher": "arXiv (Yubo Wang et al. — TIGER-Lab)",
        "publishedDate": "2024-06-03",
        "accessedDate": "2026-05-31",
        "type": "preprint",
        "excerpt": "This paper introduces MMLU-Pro, an enhanced dataset designed to extend the mostly knowledge-driven MMLU benchmark by integrating more challenging, reasoning-focused questions"
      },
      {
        "url": "https://github.com/TIGER-AI-Lab/MMLU-Pro",
        "title": "MMLU-Pro official repository (NeurIPS 2024)",
        "publisher": "TIGER-AI-Lab",
        "publishedDate": "2024-06-03",
        "accessedDate": "2026-05-31",
        "type": "github-release"
      },
      {
        "url": "https://huggingface.co/datasets/TIGER-Lab/MMLU-Pro",
        "title": "MMLU-Pro dataset card",
        "publisher": "Hugging Face",
        "accessedDate": "2026-05-31",
        "type": "model-card"
      }
    ],
    "publishedAt": "2026-05-31T00:00:00Z",
    "lastVerified": "2026-05-31",
    "methodologyVersion": "veritas-v0.1",
    "tags": [
      "mmlu-pro",
      "benchmark",
      "evaluation",
      "reasoning",
      "wang",
      "2024"
    ],
    "id": "2df92e0b0e4c891b",
    "statement": "MMLU-Pro benchmark introduced in paper: MMLU-Pro: A More Robust and Challenging Multi-Task Language Understanding Benchmark (Wang et al., 2024)."
  },
  "signature": {
    "algorithm": "HMAC-SHA256",
    "signedBy": "did:web:sourcescore.org",
    "signedAt": "2026-05-31T00:00:00.000Z",
    "signature": "2d2a1273d8eb9c2c41ca7c406ba6457cc51eec390366942eb1ea55c2d3ca8ec3"
  },
  "citedAs": "MMLU-Pro benchmark introduced in paper: MMLU-Pro: A More Robust and Challenging Multi-Task Language Understanding Benchmark (Wang et al., 2024). — SourceScore Claim 2df92e0b0e4c891b (verified 2026-05-31, signed 2d2a1273…). https://sourcescore.org/claims/2df92e0b0e4c891b/"
}