{
  "apiVersion": "v1",
  "methodology": "https://sourcescore.org/methodology/",
  "canonical": "https://sourcescore.org/claims/71ec42731d2c9e0c/",
  "claim": {
    "vertical": "ai-ml",
    "subject": "HumanEval benchmark",
    "predicate": "introduced_in_paper",
    "object": "Evaluating Large Language Models Trained on Code (Chen et al., 2021)",
    "confidence": 1,
    "sources": [
      {
        "url": "https://arxiv.org/abs/2107.03374",
        "title": "Evaluating Large Language Models Trained on Code",
        "publisher": "arXiv (Chen et al., OpenAI)",
        "publishedDate": "2021-07-07",
        "accessedDate": "2026-05-16",
        "type": "preprint",
        "excerpt": "We introduce Codex, a GPT language model fine-tuned on publicly available code from GitHub, and study its Python code-writing capabilities."
      },
      {
        "url": "https://github.com/openai/human-eval",
        "title": "openai/human-eval repository",
        "publisher": "OpenAI",
        "publishedDate": "2021-07-07",
        "accessedDate": "2026-05-16",
        "type": "github-release"
      }
    ],
    "publishedAt": "2026-05-16T00:00:00Z",
    "lastVerified": "2026-05-16",
    "methodologyVersion": "veritas-v0.1",
    "tags": [
      "humaneval",
      "benchmark",
      "codex",
      "openai",
      "chen",
      "2021",
      "code-generation"
    ],
    "id": "71ec42731d2c9e0c",
    "statement": "HumanEval benchmark introduced in paper: Evaluating Large Language Models Trained on Code (Chen et al., 2021)."
  },
  "signature": {
    "algorithm": "HMAC-SHA256",
    "signedBy": "did:web:sourcescore.org",
    "signedAt": "2026-05-16T00:00:00.000Z",
    "signature": "314b578ecb09b84dbc742766fe02b03ef6c54971bad2c5ef25a28d6fcbdf20a7"
  },
  "citedAs": "HumanEval benchmark introduced in paper: Evaluating Large Language Models Trained on Code (Chen et al., 2021). — SourceScore Claim 71ec42731d2c9e0c (verified 2026-05-16, signed 314b578e…). https://sourcescore.org/claims/71ec42731d2c9e0c/"
}