{
  "apiVersion": "v1",
  "methodology": "https://sourcescore.org/methodology/",
  "canonical": "https://sourcescore.org/claims/a3e691683a4577af/",
  "claim": {
    "vertical": "ai-ml",
    "subject": "Direct Preference Optimization (DPO)",
    "predicate": "introduced_in_paper",
    "object": "Direct Preference Optimization: Your Language Model is Secretly a Reward Model (Rafailov et al., 2023)",
    "confidence": 1,
    "sources": [
      {
        "url": "https://arxiv.org/abs/2305.18290",
        "title": "Direct Preference Optimization: Your Language Model is Secretly a Reward Model",
        "publisher": "arXiv (Rafailov, Sharma, Mitchell, Ermon, Manning, Finn)",
        "publishedDate": "2023-05-29",
        "accessedDate": "2026-05-16",
        "type": "preprint",
        "excerpt": "In this paper, we introduce a new parameterization of the reward model in RLHF that enables extraction of the corresponding optimal policy in closed form, allowing us to solve the standard RLHF problem with only a simple classification loss."
      },
      {
        "url": "https://papers.nips.cc/paper_files/paper/2023/hash/a85b405ed65c6477a4fe8302b5e06ce7-Abstract-Conference.html",
        "title": "Direct Preference Optimization (NeurIPS 2023 proceedings)",
        "publisher": "NeurIPS Foundation",
        "publishedDate": "2023-12-10",
        "accessedDate": "2026-05-16",
        "type": "peer-reviewed"
      }
    ],
    "publishedAt": "2026-05-16T00:00:00Z",
    "lastVerified": "2026-05-16",
    "methodologyVersion": "veritas-v0.1",
    "tags": [
      "dpo",
      "alignment",
      "foundational",
      "rafailov",
      "2023",
      "nips",
      "stanford"
    ],
    "id": "a3e691683a4577af",
    "statement": "Direct Preference Optimization (DPO) introduced in paper: Direct Preference Optimization: Your Language Model is Secretly a Reward Model (Rafailov et al., 2023)."
  },
  "signature": {
    "algorithm": "HMAC-SHA256",
    "signedBy": "did:web:sourcescore.org",
    "signedAt": "2026-05-16T00:00:00.000Z",
    "signature": "6a5c1788062e32d01b2708858e9d50a21afe43a0bca0dbe50ab3eff1496da667"
  },
  "citedAs": "Direct Preference Optimization (DPO) introduced in paper: Direct Preference Optimization: Your Language Model is Secretly a Reward Model (Rafailov et al., 2023). — SourceScore Claim a3e691683a4577af (verified 2026-05-16, signed 6a5c1788…). https://sourcescore.org/claims/a3e691683a4577af/"
}