{
  "apiVersion": "v1",
  "methodology": "https://sourcescore.org/methodology/",
  "canonical": "https://sourcescore.org/claims/b3f34e83dd0c53b9/",
  "claim": {
    "vertical": "ai-ml",
    "subject": "HellaSwag benchmark",
    "predicate": "introduced_in_paper",
    "object": "HellaSwag: Can a Machine Really Finish Your Sentence? (Zellers et al., 2019)",
    "confidence": 0.92,
    "sources": [
      {
        "url": "https://arxiv.org/abs/1905.07830",
        "title": "HellaSwag: Can a Machine Really Finish Your Sentence?",
        "publisher": "arXiv (Zellers, Holtzman, Bisk, Farhadi, Choi — UW + Allen AI)",
        "publishedDate": "2019-05-19",
        "accessedDate": "2026-05-31",
        "type": "preprint",
        "excerpt": "In this paper, we show that commonsense inference still proves difficult for even state-of-the-art models, by presenting HellaSwag, a new challenge dataset."
      },
      {
        "url": "https://github.com/rowanz/hellaswag",
        "title": "HellaSwag dataset repository",
        "publisher": "Rowan Zellers (rowanz)",
        "publishedDate": "2019-05-19",
        "accessedDate": "2026-05-31",
        "type": "github-release"
      },
      {
        "url": "https://huggingface.co/papers/1905.07830",
        "title": "HellaSwag (Hugging Face Papers)",
        "publisher": "Hugging Face",
        "accessedDate": "2026-05-31",
        "type": "docs"
      }
    ],
    "publishedAt": "2026-05-31T00:00:00Z",
    "lastVerified": "2026-05-31",
    "methodologyVersion": "veritas-v0.1",
    "tags": [
      "hellaswag",
      "benchmark",
      "evaluation",
      "commonsense",
      "nli",
      "zellers",
      "2019"
    ],
    "id": "b3f34e83dd0c53b9",
    "statement": "HellaSwag benchmark introduced in paper: HellaSwag: Can a Machine Really Finish Your Sentence? (Zellers et al., 2019)."
  },
  "signature": {
    "algorithm": "HMAC-SHA256",
    "signedBy": "did:web:sourcescore.org",
    "signedAt": "2026-05-31T00:00:00.000Z",
    "signature": "2c93e86015c67f88ed6703f55e1e47ca27c282611774471310cd7cee1480fbd0"
  },
  "citedAs": "HellaSwag benchmark introduced in paper: HellaSwag: Can a Machine Really Finish Your Sentence? (Zellers et al., 2019). — SourceScore Claim b3f34e83dd0c53b9 (verified 2026-05-31, signed 2c93e860…). https://sourcescore.org/claims/b3f34e83dd0c53b9/"
}