{
  "apiVersion": "v1",
  "methodology": "https://sourcescore.org/methodology/",
  "canonical": "https://sourcescore.org/claims/0d24c97977ebd744/",
  "claim": {
    "vertical": "ai-ml",
    "subject": "C4 (Colossal Clean Crawled Corpus)",
    "predicate": "introduced_in_paper",
    "object": "Exploring the Limits of Transfer Learning with a Unified Text-to-Text Transformer (Raffel et al., 2019)",
    "confidence": 1,
    "sources": [
      {
        "url": "https://arxiv.org/abs/1910.10683",
        "title": "Exploring the Limits of Transfer Learning with a Unified Text-to-Text Transformer",
        "publisher": "arXiv (Raffel et al.)",
        "publishedDate": "2019-10-23",
        "accessedDate": "2026-05-16",
        "type": "preprint",
        "excerpt": "We call the resulting dataset the 'Colossal Clean Crawled Corpus' (or C4 for short)."
      },
      {
        "url": "https://www.tensorflow.org/datasets/catalog/c4",
        "title": "c4 — TensorFlow Datasets catalog",
        "publisher": "Google / TensorFlow",
        "accessedDate": "2026-05-16",
        "type": "docs"
      }
    ],
    "publishedAt": "2026-05-16T00:00:00Z",
    "lastVerified": "2026-05-16",
    "methodologyVersion": "veritas-v0.1",
    "tags": [
      "c4",
      "dataset",
      "pretraining",
      "google",
      "2019"
    ],
    "id": "0d24c97977ebd744",
    "statement": "C4 (Colossal Clean Crawled Corpus) introduced in paper: Exploring the Limits of Transfer Learning with a Unified Text-to-Text Transformer (Raffel et al., 2019)."
  },
  "signature": {
    "algorithm": "HMAC-SHA256",
    "signedBy": "did:web:sourcescore.org",
    "signedAt": "2026-05-16T00:00:00.000Z",
    "signature": "bdfe0946f6a182bf318196d19fe1ea26e74e2830bfea4f13fd3324e34348b604"
  },
  "citedAs": "C4 (Colossal Clean Crawled Corpus) introduced in paper: Exploring the Limits of Transfer Learning with a Unified Text-to-Text Transformer (Raffel et al., 2019). — SourceScore Claim 0d24c97977ebd744 (verified 2026-05-16, signed bdfe0946…). https://sourcescore.org/claims/0d24c97977ebd744/"
}