{
  "apiVersion": "v1",
  "methodology": "https://sourcescore.org/methodology/",
  "canonical": "https://sourcescore.org/claims/ea8b7be3a49101be/",
  "claim": {
    "vertical": "ai-ml",
    "subject": "RedPajama dataset",
    "predicate": "released_on",
    "object": "2023-04-17",
    "confidence": 0.95,
    "sources": [
      {
        "url": "https://www.together.ai/blog/redpajama",
        "title": "RedPajama: An Open Source Recipe to Reproduce LLaMA training dataset",
        "publisher": "Together AI",
        "publishedDate": "2023-04-17",
        "accessedDate": "2026-05-16",
        "type": "official-blog",
        "excerpt": "Today, we release RedPajama, a project to create leading open-source models, starts by reproducing LLaMA training dataset of over 1.2 trillion tokens."
      },
      {
        "url": "https://github.com/togethercomputer/RedPajama-Data",
        "title": "togethercomputer/RedPajama-Data — GitHub",
        "publisher": "Together",
        "publishedDate": "2023-04-17",
        "accessedDate": "2026-05-16",
        "type": "github-release"
      }
    ],
    "publishedAt": "2026-05-16T00:00:00Z",
    "lastVerified": "2026-05-16",
    "methodologyVersion": "veritas-v0.1",
    "tags": [
      "redpajama",
      "dataset",
      "pretraining",
      "together",
      "2023",
      "open-source"
    ],
    "id": "ea8b7be3a49101be",
    "statement": "RedPajama dataset released on: 2023-04-17."
  },
  "signature": {
    "algorithm": "HMAC-SHA256",
    "signedBy": "did:web:sourcescore.org",
    "signedAt": "2026-05-16T00:00:00.000Z",
    "signature": "2983bbdbf68be45213c849a98aafd12c3956e882b37b30872e25dae494215b8d"
  },
  "citedAs": "RedPajama dataset released on: 2023-04-17. — SourceScore Claim ea8b7be3a49101be (verified 2026-05-16, signed 2983bbdb…). https://sourcescore.org/claims/ea8b7be3a49101be/"
}