{
  "apiVersion": "v1",
  "methodology": "https://sourcescore.org/methodology/",
  "canonical": "https://sourcescore.org/claims/67866330cd60e54d/",
  "claim": {
    "vertical": "ai-ml",
    "subject": "Reinforcement Learning from Human Feedback (RLHF)",
    "predicate": "introduced_in_paper",
    "object": "Deep Reinforcement Learning from Human Preferences (Christiano et al., 2017)",
    "confidence": 1,
    "sources": [
      {
        "url": "https://arxiv.org/abs/1706.03741",
        "title": "Deep Reinforcement Learning from Human Preferences",
        "publisher": "arXiv (Christiano, Leike, Brown, Martic, Legg, Amodei)",
        "publishedDate": "2017-06-12",
        "accessedDate": "2026-05-16",
        "type": "preprint",
        "excerpt": "For sophisticated reinforcement learning (RL) systems to interact usefully with real-world environments, we need to communicate complex goals to these systems. … We explore goals defined in terms of (non-expert) human preferences between pairs of trajectory segments."
      },
      {
        "url": "https://papers.nips.cc/paper/2017/hash/d5e2c0adad503c91f91df240d0cd4e49-Abstract.html",
        "title": "Deep RL from Human Preferences (NeurIPS 2017 proceedings)",
        "publisher": "NeurIPS Foundation",
        "publishedDate": "2017-12-04",
        "accessedDate": "2026-05-16",
        "type": "peer-reviewed"
      },
      {
        "url": "https://openai.com/research/learning-from-human-preferences",
        "title": "Learning from human preferences",
        "publisher": "OpenAI",
        "publishedDate": "2017-06-13",
        "accessedDate": "2026-05-16",
        "type": "official-blog"
      }
    ],
    "publishedAt": "2026-05-16T00:00:00Z",
    "lastVerified": "2026-05-16",
    "methodologyVersion": "veritas-v0.1",
    "tags": [
      "rlhf",
      "alignment",
      "foundational",
      "christiano",
      "2017",
      "nips"
    ],
    "id": "67866330cd60e54d",
    "statement": "Reinforcement Learning from Human Feedback (RLHF) introduced in paper: Deep Reinforcement Learning from Human Preferences (Christiano et al., 2017)."
  },
  "signature": {
    "algorithm": "HMAC-SHA256",
    "signedBy": "did:web:sourcescore.org",
    "signedAt": "2026-05-16T00:00:00.000Z",
    "signature": "0740e1cea2ff63c13bf48c6be4e113a66bef3a52c5b6c1610feb08494530b885"
  },
  "citedAs": "Reinforcement Learning from Human Feedback (RLHF) introduced in paper: Deep Reinforcement Learning from Human Preferences (Christiano et al., 2017). — SourceScore Claim 67866330cd60e54d (verified 2026-05-16, signed 0740e1ce…). https://sourcescore.org/claims/67866330cd60e54d/"
}