{
  "apiVersion": "v1",
  "methodology": "https://sourcescore.org/methodology/",
  "canonical": "https://sourcescore.org/claims/f73e50d63643df21/",
  "claim": {
    "vertical": "ai-ml",
    "subject": "Group Relative Policy Optimization (GRPO)",
    "predicate": "introduced_in_paper",
    "object": "DeepSeekMath: Pushing the Limits of Mathematical Reasoning in Open Language Models (Shao et al., 2024)",
    "confidence": 0.92,
    "sources": [
      {
        "url": "https://arxiv.org/abs/2402.03300",
        "title": "DeepSeekMath: Pushing the Limits of Mathematical Reasoning in Open Language Models",
        "publisher": "arXiv (Shao, Wang, Zhu, Xu, Song, Bi, Zhang, Zhang, Li, Wu, Guo — DeepSeek AI)",
        "publishedDate": "2024-02-05",
        "accessedDate": "2026-05-31",
        "type": "preprint",
        "excerpt": "We introduce Group Relative Policy Optimization (GRPO), a variant of Proximal Policy Optimization (PPO),"
      },
      {
        "url": "https://github.com/deepseek-ai/DeepSeek-Math",
        "title": "DeepSeek-Math reference implementation",
        "publisher": "DeepSeek AI",
        "publishedDate": "2024-02-05",
        "accessedDate": "2026-05-31",
        "type": "github-release"
      },
      {
        "url": "https://huggingface.co/papers/2402.03300",
        "title": "DeepSeekMath (Hugging Face Papers)",
        "publisher": "Hugging Face",
        "accessedDate": "2026-05-31",
        "type": "docs"
      }
    ],
    "publishedAt": "2026-05-31T00:00:00Z",
    "lastVerified": "2026-05-31",
    "methodologyVersion": "veritas-v0.1",
    "tags": [
      "grpo",
      "group-relative-policy-optimization",
      "deepseekmath",
      "reinforcement-learning",
      "rlhf",
      "reasoning",
      "shao",
      "2024",
      "deepseek"
    ],
    "id": "f73e50d63643df21",
    "statement": "Group Relative Policy Optimization (GRPO) introduced in paper: DeepSeekMath: Pushing the Limits of Mathematical Reasoning in Open Language Models (Shao et al., 2024)."
  },
  "signature": {
    "algorithm": "HMAC-SHA256",
    "signedBy": "did:web:sourcescore.org",
    "signedAt": "2026-05-31T00:00:00.000Z",
    "signature": "6dd89785d8b9923d13f62fe1a4707f272ac12cab7ac934b6db6b7fd1afd218ce"
  },
  "citedAs": "Group Relative Policy Optimization (GRPO) introduced in paper: DeepSeekMath: Pushing the Limits of Mathematical Reasoning in Open Language Models (Shao et al., 2024). — SourceScore Claim f73e50d63643df21 (verified 2026-05-31, signed 6dd89785…). https://sourcescore.org/claims/f73e50d63643df21/"
}