{
  "apiVersion": "v1",
  "methodology": "https://sourcescore.org/methodology/",
  "canonical": "https://sourcescore.org/claims/0d47bb8eb637a2e4/",
  "claim": {
    "vertical": "ai-ml",
    "subject": "SentencePiece tokenizer",
    "predicate": "introduced_in_paper",
    "object": "SentencePiece: A simple and language independent subword tokenizer and detokenizer for Neural Text Processing (Kudo & Richardson, 2018)",
    "confidence": 1,
    "sources": [
      {
        "url": "https://arxiv.org/abs/1808.06226",
        "title": "SentencePiece: A simple and language independent subword tokenizer and detokenizer for Neural Text Processing",
        "publisher": "arXiv (Kudo, Richardson)",
        "publishedDate": "2018-08-19",
        "accessedDate": "2026-05-16",
        "type": "preprint",
        "excerpt": "This paper describes SentencePiece, a language-independent subword tokenizer and detokenizer designed for Neural-based text processing, including Neural Machine Translation."
      },
      {
        "url": "https://github.com/google/sentencepiece",
        "title": "google/sentencepiece — official implementation",
        "publisher": "Google",
        "publishedDate": "2018-08-19",
        "accessedDate": "2026-05-16",
        "type": "github-release"
      }
    ],
    "publishedAt": "2026-05-16T00:00:00Z",
    "lastVerified": "2026-05-16",
    "methodologyVersion": "veritas-v0.1",
    "tags": [
      "sentencepiece",
      "tokenization",
      "google",
      "foundational",
      "2018"
    ],
    "id": "0d47bb8eb637a2e4",
    "statement": "SentencePiece tokenizer introduced in paper: SentencePiece: A simple and language independent subword tokenizer and detokenizer for Neural Text Processing (Kudo & Richardson, 2018)."
  },
  "signature": {
    "algorithm": "HMAC-SHA256",
    "signedBy": "did:web:sourcescore.org",
    "signedAt": "2026-05-16T00:00:00.000Z",
    "signature": "bc0f1a2101f2ef31530133d8f9ff51d53763b173ec766f4d78c5baf53cb2aa0f"
  },
  "citedAs": "SentencePiece tokenizer introduced in paper: SentencePiece: A simple and language independent subword tokenizer and detokenizer for Neural Text Processing (Kudo & Richardson, 2018). — SourceScore Claim 0d47bb8eb637a2e4 (verified 2026-05-16, signed bc0f1a21…). https://sourcescore.org/claims/0d47bb8eb637a2e4/"
}