Fix docstring, type annotation for private method (#19)

This commit is contained in:
Arvid Lunnemark 2023-01-19 14:51:15 -08:00 committed by GitHub
parent 40d9b1f14e
commit cf385cada0
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -104,7 +104,7 @@ class Encoding:
This is equivalent to `encode_batch(text, disallowed_special=())` (but slightly faster).
```
>>> enc.encode_batch(["hello world", "goodbye world"])
>>> enc.encode_ordinary_batch(["hello world", "goodbye world"])
[[31373, 995], [11274, 16390, 995]]
```
"""
@ -285,7 +285,7 @@ class Encoding:
text_or_bytes = text_or_bytes.encode("utf-8")
return self._core_bpe.encode_single_piece(text_or_bytes)
def _encode_only_native_bpe(self, text: str) -> list[str]:
def _encode_only_native_bpe(self, text: str) -> list[int]:
"""Encodes a string into tokens, but do regex splitting in Python."""
_unused_pat = regex.compile(self._pat_str)
ret = []