diff --git a/.github/workflows/build_wheels.yml b/.github/workflows/build_wheels.yml
deleted file mode 100644
index cd0cddf..0000000
--- a/.github/workflows/build_wheels.yml
+++ /dev/null
@@ -1,83 +0,0 @@
-name: Build wheels
-
-on: [push, pull_request, workflow_dispatch]
-
-concurrency:
-  group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
-  cancel-in-progress: true
-
-jobs:
-  build_wheels:
-    name: py${{ matrix.python-version }} on ${{ matrix.os }}
-    runs-on: ${{ matrix.os }}
-    strategy:
-      fail-fast: false
-      matrix:
-        # cibuildwheel builds linux wheels inside a manylinux container
-        # it also takes care of procuring the correct python version for us
-        os: [ubuntu-latest, windows-latest, macos-latest]
-        python-version: [38, 39, 310, 311]
-
-    steps:
-      - uses: actions/checkout@v3
-
-      - uses: pypa/cibuildwheel@v2.11.3
-        env:
-          CIBW_BUILD: "cp${{ matrix.python-version}}-*"
-
-      - uses: actions/upload-artifact@v3
-        with:
-          name: dist
-          path: ./wheelhouse/*.whl
-
-  build_wheels_aarch64:
-    name: py${{ matrix.python-version }} on ${{ matrix.os }} (aarch64)
-    runs-on: ${{ matrix.os }}
-    strategy:
-      fail-fast: false
-      matrix:
-        os: [ubuntu-latest]
-        python-version: [38, 39, 310, 311]
-
-    steps:
-      - uses: actions/checkout@v3
-
-      - name: Setup up QEMU
-        uses: docker/setup-qemu-action@v2
-        with:
-          platforms: arm64
-
-      - name: Build wheels
-        uses: pypa/cibuildwheel@v2.11.3
-        env:
-          CIBW_BUILD: "cp${{ matrix.python-version}}-*"
-          CIBW_ARCHS: aarch64
-          CIBW_BUILD_VERBOSITY: 3
-          # https://github.com/rust-lang/cargo/issues/10583
-          CIBW_ENVIRONMENT_LINUX: PATH="$PATH:$HOME/.cargo/bin" CARGO_NET_GIT_FETCH_WITH_CLI=true
-      - uses: actions/upload-artifact@v3
-        with:
-          name: dist
-          path: ./wheelhouse/*.whl
-
-  build_sdist:
-    name: sdist
-    runs-on: ubuntu-latest
-    steps:
-      - uses: actions/checkout@v3
-      - uses: actions/setup-python@v4
-        name: Install Python
-        with:
-          python-version: "3.9"
-      - name: Run check-manifest
-        run: |
-          pip install check-manifest
-          check-manifest -v
-      - name: Build sdist
-        run: |
-          pip install --upgrade build
-          python -m build --sdist
-      - uses: actions/upload-artifact@v3
-        with:
-          name: dist
-          path: ./dist/*.tar.gz
diff --git a/CHANGELOG.md b/CHANGELOG.md
deleted file mode 100644
index d0365b8..0000000
--- a/CHANGELOG.md
+++ /dev/null
@@ -1,35 +0,0 @@
-# Changelog
-
-This is the changelog for the open source version of tiktoken.
-
-## [v0.3.2]
-- Add encoding for GPT-4
-
-## [v0.3.1]
-- Build aarch64 wheels
-- Make `blobfile` an optional dependency
-
-Thank you to @messense for the environment variable that makes cargo not OOM under emulation!
-
-## [v0.3.0]
-- Improve performance by 5-20%; thank you to @nistath!
-- Add `gpt-3.5-turbo` models to `encoding_for_model`
-- Add prefix matching to `encoding_for_model` to better support future model versions
-- Fix a bug in the README instructions on extending tiktoken
-- Update the set of available encodings
-- Add packaging metadata
-
-## [v0.2.0]
-- Add ``tiktoken.encoding_for_model`` to get the encoding for a specific model
-- Improve portability of caching logic
-
-Thank you to @fritzo, @arvid220u, @khanhvu207, @henriktorget for various small corrections
-
-## [v0.1.2]
-- Avoid use of `blobfile` for public files
-- Add support for Python 3.8
-- Add py.typed
-- Improve the public tests
-
-## [v0.1.1]
-- Initial release
diff --git a/Cargo.toml b/Cargo.toml
index 07182cd..fc3cddb 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -5,17 +5,14 @@ edition = "2021"
 rust-version = "1.57.0"
 
 [lib]
-name = "_tiktoken"
-crate-type = ["cdylib"]
+name = "tiktoken"
 
 [dependencies]
-pyo3 = { version = "0.17.3", features = ["extension-module"] }
-
-# tiktoken dependencies
-fancy-regex = "0.10.0"
+fancy-regex = "0.11.0"
 regex = "1.7.0"
 rustc-hash = "1.1.0"
 bstr = "1.0.1"
+anyhow = "1.0.70"
 
 [profile.release]
 incremental = true
diff --git a/MANIFEST.in b/MANIFEST.in
deleted file mode 100644
index 7f25b27..0000000
--- a/MANIFEST.in
+++ /dev/null
@@ -1,8 +0,0 @@
-include *.svg
-include *.toml
-include *.md
-include Makefile
-global-include py.typed
-recursive-include scripts *.py
-recursive-include tests *.py
-recursive-include src *.rs
diff --git a/perf.svg b/perf.svg
deleted file mode 100644
index 723036c..0000000
--- a/perf.svg
+++ /dev/null
@@ -1,374 +0,0 @@
-<?xml version="1.0" encoding="UTF-8" standalone="no"?>
-<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
-<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" version="1.1" width="569.334pt" height="328.0869pt" viewBox="0 0 569.334 328.0869">
-<rect width="100%" height="100%" fill="white"/>
-<g enable-background="new">
-<g>
-<clipPath id="cp0">
-<path transform="matrix(1,0,0,-1,0,328.0869)" d="M 0 0 L 569.334 0 L 569.334 328.0869 L 0 328.0869 Z "/>
-</clipPath>
-<g clip-path="url(#cp0)">
-<path stroke-width=".5" stroke-linecap="butt" stroke-miterlimit="4" stroke-linejoin="miter" fill="none" stroke="#b8b8b8" d="M 79 219.5869 L 569 219.5869 "/>
-<path stroke-width=".5" stroke-linecap="butt" stroke-miterlimit="4" stroke-linejoin="miter" fill="none" stroke="#b8b8b8" d="M 79 150.5869 L 569 150.5869 "/>
-<path stroke-width=".5" stroke-linecap="butt" stroke-miterlimit="4" stroke-linejoin="miter" fill="none" stroke="#b8b8b8" d="M 79 82.58685 L 569 82.58685 "/>
-<path stroke-width=".5" stroke-linecap="butt" stroke-miterlimit="4" stroke-linejoin="miter" fill="none" stroke="#b8b8b8" d="M 79 13.58685 L 569 13.58685 "/>
-</g>
-<clipPath id="cp1">
-<path transform="matrix(1,0,0,-1,0,328.0869)" d="M 0 39.64996 L -.0000120107 314.4229 L 20.496 314.4229 L 20.49601 39.64996 Z "/>
-</clipPath>
-<g clip-path="url(#cp1)">
-<clipPath id="cp2">
-<path transform="matrix(1,0,0,-1,0,328.0869)" d="M 0 0 L 569.334 0 L 569.334 328.0869 L 0 328.0869 Z "/>
-</clipPath>
-<g clip-path="url(#cp2)">
-<text xml:space="preserve" transform="matrix(0 -1 1 0 11.10803 182.06452)" font-size="12" font-family="HelveticaNeue"><tspan y="0" x="0 6.888 13.560001 17.340003 24.228003 30.900004 37.788003 44.460004 51.576005 58.248006">Throughput</tspan></text>
-</g>
-</g>
-<clipPath id="cp3">
-<path transform="matrix(1,0,0,-1,0,328.0869)" d="M 26.168 31.81796 L 67.843997 31.81796 L 67.843997 47.48196 L 26.168 47.48196 Z "/>
-</clipPath>
-<g clip-path="url(#cp3)">
-<text xml:space="preserve" transform="matrix(1 0 -0 1 27.668 292.71299)" font-size="12" font-family="HelveticaNeue"><tspan y="0" x="0 6.672001 10.008001 20.460003 28.680005 32.676004">0 MB/s</tspan></text>
-</g>
-<clipPath id="cp4">
-<path transform="matrix(1,0,0,-1,0,328.0869)" d="M 19.496 100.5112 L 67.844 100.5112 L 67.844 116.1752 L 19.496 116.1752 Z "/>
-</clipPath>
-<g clip-path="url(#cp4)">
-<text xml:space="preserve" transform="matrix(1 0 -0 1 20.996 224.01972)" font-size="12" font-family="HelveticaNeue"><tspan y="0" x="0 6.672001 13.344002 16.680003 27.132004 35.352006 39.348005">10 MB/s</tspan></text>
-</g>
-<clipPath id="cp5">
-<path transform="matrix(1,0,0,-1,0,328.0869)" d="M 19.496 169.2044 L 67.844 169.2044 L 67.844 184.86841 L 19.496 184.86841 Z "/>
-</clipPath>
-<g clip-path="url(#cp5)">
-<text xml:space="preserve" transform="matrix(1 0 -0 1 20.996 155.3265)" font-size="12" font-family="HelveticaNeue"><tspan y="0" x="0 6.672001 13.344002 16.680003 27.132004 35.352006 39.348005">20 MB/s</tspan></text>
-</g>
-<clipPath id="cp6">
-<path transform="matrix(1,0,0,-1,0,328.0869)" d="M 19.496 237.8976 L 67.844 237.8976 L 67.844 253.5616 L 19.496 253.5616 Z "/>
-</clipPath>
-<g clip-path="url(#cp6)">
-<text xml:space="preserve" transform="matrix(1 0 -0 1 20.996 86.633319)" font-size="12" font-family="HelveticaNeue"><tspan y="0" x="0 6.672001 13.344002 16.680003 27.132004 35.352006 39.348005">30 MB/s</tspan></text>
-</g>
-<clipPath id="cp7">
-<path transform="matrix(1,0,0,-1,0,328.0869)" d="M 19.496 306.5909 L 67.844 306.5909 L 67.844 322.2549 L 19.496 322.2549 Z "/>
-</clipPath>
-<g clip-path="url(#cp7)">
-<text xml:space="preserve" transform="matrix(1 0 -0 1 20.996 17.940125)" font-size="12" font-family="HelveticaNeue"><tspan y="0" x="0 6.672001 13.344002 16.680003 27.132004 35.352006 39.348005">40 MB/s</tspan></text>
-</g>
-<clipPath id="cp8">
-<path transform="matrix(1,0,0,-1,0,328.0869)" d="M 0 0 L 569.334 0 L 569.334 328.0869 L 0 328.0869 Z "/>
-</clipPath>
-<g clip-path="url(#cp8)">
-<path stroke-width="1" stroke-linecap="square" stroke-miterlimit="4" stroke-linejoin="miter" fill="none" stroke="#000000" d="M 78.5 288.5869 L 568.5 288.5869 "/>
-</g>
-<clipPath id="cp9">
-<path transform="matrix(1,0,0,-1,0,328.0869)" d="M 78.83396 0 L 568.834 0 L 568.834 20.496 L 78.83396 20.496 Z "/>
-</clipPath>
-<g clip-path="url(#cp9)">
-<text xml:space="preserve" transform="matrix(1 0 -0 1 288.266 325.53096)" font-size="12" font-family="HelveticaNeue"><tspan y="0" x="0 6.888 13.560001 17.340003 23.784003 30.228003 37.344 40.68 47.124 54.012 60.684003 67.356">Thread count</tspan></text>
-</g>
-<clipPath id="cp10">
-<path transform="matrix(1,0,0,-1,0,328.0869)" d="M 108.998 19.496 L 118.67 19.496 L 118.67 35.16 L 108.998 35.16 Z "/>
-</clipPath>
-<g clip-path="url(#cp10)">
-<text xml:space="preserve" transform="matrix(1 0 -0 1 110.498 305.03495)" font-size="12" font-family="HelveticaNeue"><tspan y="0" x="0">1</tspan></text>
-</g>
-<clipPath id="cp11">
-<path transform="matrix(1,0,0,-1,0,328.0869)" d="M 178.998 19.496 L 188.67 19.496 L 188.67 35.16 L 178.998 35.16 Z "/>
-</clipPath>
-<g clip-path="url(#cp11)">
-<text xml:space="preserve" transform="matrix(1 0 -0 1 180.498 305.03495)" font-size="12" font-family="HelveticaNeue"><tspan y="0" x="0">2</tspan></text>
-</g>
-<clipPath id="cp12">
-<path transform="matrix(1,0,0,-1,0,328.0869)" d="M 248.998 19.496 L 258.67 19.496 L 258.67 35.16 L 248.998 35.16 Z "/>
-</clipPath>
-<g clip-path="url(#cp12)">
-<text xml:space="preserve" transform="matrix(1 0 -0 1 250.498 305.03495)" font-size="12" font-family="HelveticaNeue"><tspan y="0" x="0">4</tspan></text>
-</g>
-<clipPath id="cp13">
-<path transform="matrix(1,0,0,-1,0,328.0869)" d="M 318.998 19.496 L 328.66999 19.496 L 328.66999 35.16 L 318.998 35.16 Z "/>
-</clipPath>
-<g clip-path="url(#cp13)">
-<text xml:space="preserve" transform="matrix(1 0 -0 1 320.498 305.03495)" font-size="12" font-family="HelveticaNeue"><tspan y="0" x="0">8</tspan></text>
-</g>
-<clipPath id="cp14">
-<path transform="matrix(1,0,0,-1,0,328.0869)" d="M 385.662 19.496 L 402.00599 19.496 L 402.00599 35.16 L 385.662 35.16 Z "/>
-</clipPath>
-<g clip-path="url(#cp14)">
-<text xml:space="preserve" transform="matrix(1 0 -0 1 387.162 305.03495)" font-size="12" font-family="HelveticaNeue"><tspan y="0" x="0 6.672001">16</tspan></text>
-</g>
-<clipPath id="cp15">
-<path transform="matrix(1,0,0,-1,0,328.0869)" d="M 455.662 19.496 L 472.00599 19.496 L 472.00599 35.16 L 455.662 35.16 Z "/>
-</clipPath>
-<g clip-path="url(#cp15)">
-<text xml:space="preserve" transform="matrix(1 0 -0 1 457.162 305.03495)" font-size="12" font-family="HelveticaNeue"><tspan y="0" x="0 6.672001">32</tspan></text>
-</g>
-<clipPath id="cp16">
-<path transform="matrix(1,0,0,-1,0,328.0869)" d="M 525.662 19.496 L 542.006 19.496 L 542.006 35.16 L 525.662 35.16 Z "/>
-</clipPath>
-<g clip-path="url(#cp16)">
-<text xml:space="preserve" transform="matrix(1 0 -0 1 527.162 305.03495)" font-size="12" font-family="HelveticaNeue"><tspan y="0" x="0 6.672001">64</tspan></text>
-</g>
-<clipPath id="cp17">
-<path transform="matrix(1,0,0,-1,0,328.0869)" d="M 115 40 L 143 40 L 143 52 L 115 52 Z "/>
-</clipPath>
-<g clip-path="url(#cp17)">
-<g>
-<clipPath id="cp18">
-<path transform="matrix(1,0,0,1,115,276.0869)" d="M 0 0 L 28 0 L 28 12 L 0 12 Z "/>
-</clipPath>
-<g clip-path="url(#cp18)">
-<clipPath id="cp19">
-<path transform="matrix(1,0,0,1,115,276.0869)" d="M 0 0 L 28 0 L 28 12 L 0 12 Z "/>
-</clipPath>
-<g clip-path="url(#cp19)">
-<path transform="matrix(1,0,0,1,115,276.0869)" d="M 0 0 L 28 0 L 28 12 L 0 12 Z " fill="#61d836"/>
-</g>
-</g>
-</g>
-</g>
-<clipPath id="cp20">
-<path transform="matrix(1,0,0,-1,0,328.0869)" d="M 185 40 L 213 40 L 213 61 L 185 61 Z "/>
-</clipPath>
-<g clip-path="url(#cp20)">
-<g>
-<clipPath id="cp21">
-<path transform="matrix(1,0,0,1,185,267.0869)" d="M 0 0 L 28 0 L 28 21 L 0 21 Z "/>
-</clipPath>
-<g clip-path="url(#cp21)">
-<clipPath id="cp22">
-<path transform="matrix(1,0,0,1,185,267.0869)" d="M 0 0 L 28 0 L 28 21 L 0 21 Z "/>
-</clipPath>
-<g clip-path="url(#cp22)">
-<path transform="matrix(1,0,0,1,185,267.0869)" d="M 0 0 L 28 0 L 28 21 L 0 21 Z " fill="#61d836"/>
-</g>
-</g>
-</g>
-</g>
-<clipPath id="cp23">
-<path transform="matrix(1,0,0,-1,0,328.0869)" d="M 255 40 L 283 40 L 283 74 L 255 74 Z "/>
-</clipPath>
-<g clip-path="url(#cp23)">
-<g>
-<clipPath id="cp24">
-<path transform="matrix(1,0,0,1,255,254.08692)" d="M 0 0 L 28 0 L 28 34 L 0 34 Z "/>
-</clipPath>
-<g clip-path="url(#cp24)">
-<clipPath id="cp25">
-<path transform="matrix(1,0,0,1,255,254.08692)" d="M 0 0 L 28 0 L 28 34 L 0 34 Z "/>
-</clipPath>
-<g clip-path="url(#cp25)">
-<path transform="matrix(1,0,0,1,255,254.08692)" d="M 0 0 L 28 0 L 28 34 L 0 34 Z " fill="#61d836"/>
-</g>
-</g>
-</g>
-</g>
-<clipPath id="cp26">
-<path transform="matrix(1,0,0,-1,0,328.0869)" d="M 325 40 L 353 40 L 353 80 L 325 80 Z "/>
-</clipPath>
-<g clip-path="url(#cp26)">
-<g>
-<clipPath id="cp27">
-<path transform="matrix(1,0,0,1,325,248.08692)" d="M 0 0 L 28 0 L 28 40 L 0 40 Z "/>
-</clipPath>
-<g clip-path="url(#cp27)">
-<clipPath id="cp28">
-<path transform="matrix(1,0,0,1,325,248.08692)" d="M 0 0 L 28 0 L 28 40 L 0 40 Z "/>
-</clipPath>
-<g clip-path="url(#cp28)">
-<path transform="matrix(1,0,0,1,325,248.08692)" d="M 0 0 L 28 0 L 28 40 L 0 40 Z " fill="#61d836"/>
-</g>
-</g>
-</g>
-</g>
-<clipPath id="cp29">
-<path transform="matrix(1,0,0,-1,0,328.0869)" d="M 395 40 L 423 40 L 423 83 L 395 83 Z "/>
-</clipPath>
-<g clip-path="url(#cp29)">
-<g>
-<clipPath id="cp30">
-<path transform="matrix(1,0,0,1,395,245.08692)" d="M 0 0 L 28 0 L 28 43 L 0 43 Z "/>
-</clipPath>
-<g clip-path="url(#cp30)">
-<clipPath id="cp31">
-<path transform="matrix(1,0,0,1,395,245.08692)" d="M 0 0 L 28 0 L 28 43 L 0 43 Z "/>
-</clipPath>
-<g clip-path="url(#cp31)">
-<path transform="matrix(1,0,0,1,395,245.08692)" d="M 0 0 L 28 0 L 28 43 L 0 43 Z " fill="#61d836"/>
-</g>
-</g>
-</g>
-</g>
-<clipPath id="cp32">
-<path transform="matrix(1,0,0,-1,0,328.0869)" d="M 465 40 L 493 40 L 493 85 L 465 85 Z "/>
-</clipPath>
-<g clip-path="url(#cp32)">
-<g>
-<clipPath id="cp33">
-<path transform="matrix(1,0,0,1,465,243.08692)" d="M 0 0 L 28 0 L 28 45 L 0 45 Z "/>
-</clipPath>
-<g clip-path="url(#cp33)">
-<clipPath id="cp34">
-<path transform="matrix(1,0,0,1,465,243.08692)" d="M 0 0 L 28 0 L 28 45 L 0 45 Z "/>
-</clipPath>
-<g clip-path="url(#cp34)">
-<path transform="matrix(1,0,0,1,465,243.08692)" d="M 0 0 L 28 0 L 28 45 L 0 45 Z " fill="#61d836"/>
-</g>
-</g>
-</g>
-</g>
-<clipPath id="cp35">
-<path transform="matrix(1,0,0,-1,0,328.0869)" d="M 535 40 L 563 40 L 563 88 L 535 88 Z "/>
-</clipPath>
-<g clip-path="url(#cp35)">
-<g>
-<clipPath id="cp36">
-<path transform="matrix(1,0,0,1,535,240.08692)" d="M 0 0 L 28 0 L 28 48 L 0 48 Z "/>
-</clipPath>
-<g clip-path="url(#cp36)">
-<clipPath id="cp37">
-<path transform="matrix(1,0,0,1,535,240.08692)" d="M 0 0 L 28 0 L 28 48 L 0 48 Z "/>
-</clipPath>
-<g clip-path="url(#cp37)">
-<path transform="matrix(1,0,0,1,535,240.08692)" d="M 0 0 L 28 0 L 28 48 L 0 48 Z " fill="#61d836"/>
-</g>
-</g>
-</g>
-</g>
-<clipPath id="cp38">
-<path transform="matrix(1,0,0,-1,0,328.0869)" d="M 84 40 L 112 40 L 112 84 L 84 84 Z "/>
-</clipPath>
-<g clip-path="url(#cp38)">
-<g>
-<clipPath id="cp39">
-<path transform="matrix(1,0,0,1,84,244.08692)" d="M 0 0 L 28 0 L 28 44 L 0 44 Z "/>
-</clipPath>
-<g clip-path="url(#cp39)">
-<clipPath id="cp40">
-<path transform="matrix(1,0,0,1,84,244.08692)" d="M 0 0 L 28 0 L 28 44 L 0 44 Z "/>
-</clipPath>
-<g clip-path="url(#cp40)">
-<path transform="matrix(1,0,0,1,84,244.08692)" d="M 0 0 L 28 0 L 28 44 L 0 44 Z " fill="#00a2ff"/>
-</g>
-</g>
-</g>
-</g>
-<clipPath id="cp41">
-<path transform="matrix(1,0,0,-1,0,328.0869)" d="M 154 40 L 182 40 L 182 123 L 154 123 Z "/>
-</clipPath>
-<g clip-path="url(#cp41)">
-<g>
-<clipPath id="cp42">
-<path transform="matrix(1,0,0,1,154,205.08692)" d="M 0 0 L 28 0 L 28 83 L 0 83 Z "/>
-</clipPath>
-<g clip-path="url(#cp42)">
-<clipPath id="cp43">
-<path transform="matrix(1,0,0,1,154,205.08692)" d="M 0 0 L 28 0 L 28 83 L 0 83 Z "/>
-</clipPath>
-<g clip-path="url(#cp43)">
-<path transform="matrix(1,0,0,1,154,205.08692)" d="M 0 0 L 28 0 L 28 83 L 0 83 Z " fill="#00a2ff"/>
-</g>
-</g>
-</g>
-</g>
-<clipPath id="cp44">
-<path transform="matrix(1,0,0,-1,0,328.0869)" d="M 224 40 L 252 40 L 252 189 L 224 189 Z "/>
-</clipPath>
-<g clip-path="url(#cp44)">
-<g>
-<clipPath id="cp45">
-<path transform="matrix(1,0,0,1,224,139.08692)" d="M 0 0 L 28 0 L 28 149 L 0 149 Z "/>
-</clipPath>
-<g clip-path="url(#cp45)">
-<clipPath id="cp46">
-<path transform="matrix(1,0,0,1,224,139.08692)" d="M 0 0 L 28 0 L 28 149 L 0 149 Z "/>
-</clipPath>
-<g clip-path="url(#cp46)">
-<path transform="matrix(1,0,0,1,224,139.08692)" d="M 0 0 L 28 0 L 28 149 L 0 149 Z " fill="#00a2ff"/>
-</g>
-</g>
-</g>
-</g>
-<clipPath id="cp47">
-<path transform="matrix(1,0,0,-1,0,328.0869)" d="M 294 40 L 322 40 L 322 258 L 294 258 Z "/>
-</clipPath>
-<g clip-path="url(#cp47)">
-<g>
-<clipPath id="cp48">
-<path transform="matrix(1,0,0,1,294,70.086917)" d="M 0 0 L 28 0 L 28 218 L 0 218 Z "/>
-</clipPath>
-<g clip-path="url(#cp48)">
-<clipPath id="cp49">
-<path transform="matrix(1,0,0,1,294,70.086917)" d="M 0 0 L 28 0 L 28 218 L 0 218 Z "/>
-</clipPath>
-<g clip-path="url(#cp49)">
-<path transform="matrix(1,0,0,1,294,70.086917)" d="M 0 0 L 28 0 L 28 218 L 0 218 Z " fill="#00a2ff"/>
-</g>
-</g>
-</g>
-</g>
-<clipPath id="cp50">
-<path transform="matrix(1,0,0,-1,0,328.0869)" d="M 364 40 L 392 40 L 392 303 L 364 303 Z "/>
-</clipPath>
-<g clip-path="url(#cp50)">
-<g>
-<clipPath id="cp51">
-<path transform="matrix(1,0,0,1,364,25.086915)" d="M 0 0 L 28 0 L 28 263 L 0 263 Z "/>
-</clipPath>
-<g clip-path="url(#cp51)">
-<clipPath id="cp52">
-<path transform="matrix(1,0,0,1,364,25.086915)" d="M 0 0 L 28 0 L 28 263 L 0 263 Z "/>
-</clipPath>
-<g clip-path="url(#cp52)">
-<path transform="matrix(1,0,0,1,364,25.086915)" d="M 0 0 L 28 0 L 28 263 L 0 263 Z " fill="#00a2ff"/>
-</g>
-</g>
-</g>
-</g>
-<clipPath id="cp53">
-<path transform="matrix(1,0,0,-1,0,328.0869)" d="M 434 40 L 462 40 L 462 203 L 434 203 Z "/>
-</clipPath>
-<g clip-path="url(#cp53)">
-<g>
-<clipPath id="cp54">
-<path transform="matrix(1,0,0,1,434,125.086917)" d="M 0 0 L 28 0 L 28 163 L 0 163 Z "/>
-</clipPath>
-<g clip-path="url(#cp54)">
-<clipPath id="cp55">
-<path transform="matrix(1,0,0,1,434,125.086917)" d="M 0 0 L 28 0 L 28 163 L 0 163 Z "/>
-</clipPath>
-<g clip-path="url(#cp55)">
-<path transform="matrix(1,0,0,1,434,125.086917)" d="M 0 0 L 28 0 L 28 163 L 0 163 Z " fill="#00a2ff"/>
-</g>
-</g>
-</g>
-</g>
-<clipPath id="cp56">
-<path transform="matrix(1,0,0,-1,0,328.0869)" d="M 504 40 L 532 40 L 532 195 L 504 195 Z "/>
-</clipPath>
-<g clip-path="url(#cp56)">
-<g>
-<clipPath id="cp57">
-<path transform="matrix(1,0,0,1,504,133.08692)" d="M 0 0 L 28 0 L 28 155 L 0 155 Z "/>
-</clipPath>
-<g clip-path="url(#cp57)">
-<clipPath id="cp58">
-<path transform="matrix(1,0,0,1,504,133.08692)" d="M 0 0 L 28 0 L 28 155 L 0 155 Z "/>
-</clipPath>
-<g clip-path="url(#cp58)">
-<path transform="matrix(1,0,0,1,504,133.08692)" d="M 0 0 L 28 0 L 28 155 L 0 155 Z " fill="#00a2ff"/>
-</g>
-</g>
-</g>
-</g>
-<clipPath id="cp59">
-<path transform="matrix(1,0,0,-1,0,328.0869)" d="M 0 0 L 569.334 0 L 569.334 328.0869 L 0 328.0869 Z "/>
-</clipPath>
-<g clip-path="url(#cp59)">
-<path transform="matrix(1,0,0,-1,0,328.0869)" d="M 459 291 L 471 291 L 471 280 L 459 280 Z " fill="#00a2ff"/>
-<text xml:space="preserve" transform="matrix(1 0 -0 1 477.8753 46.772127)" font-size="12" font-family="HelveticaNeue"><tspan y="0" x="0 3.7800003 6.4440004 12.672001 16.452002 23.340003 29.568003 36.012">tiktoken</tspan></text>
-<path transform="matrix(1,0,0,-1,0,328.0869)" d="M 459 278 L 471 278 L 471 266 L 459 266 Z " fill="#61d836"/>
-<text xml:space="preserve" transform="matrix(1 0 -0 1 477.8753 60.436128)" font-size="12" font-family="HelveticaNeue"><tspan y="0" x="0 6.672001 13.344002 20.232003 27.120003 29.784003 36.456 43.344 46.896005 53.340005 59.784006">huggingface</tspan></text>
-</g>
-</g>
-</g>
-</svg>
diff --git a/pyproject.toml b/pyproject.toml
deleted file mode 100644
index 739d295..0000000
--- a/pyproject.toml
+++ /dev/null
@@ -1,41 +0,0 @@
-[project]
-name = "tiktoken"
-version = "0.3.2"
-description = "tiktoken is a fast BPE tokeniser for use with OpenAI's models"
-readme = "README.md"
-license = {file = "LICENSE"}
-authors = [{name = "Shantanu Jain"}, {email = "shantanu@openai.com"}]
-dependencies = ["regex>=2022.1.18", "requests>=2.26.0"]
-optional-dependencies = {blobfile = ["blobfile>=2"]}
-requires-python = ">=3.8"
-
-[project.urls]
-homepage = "https://github.com/openai/tiktoken"
-repository = "https://github.com/openai/tiktoken"
-changelog = "https://github.com/openai/tiktoken/blob/main/CHANGELOG.md"
-
-[build-system]
-build-backend = "setuptools.build_meta"
-requires = ["setuptools>=62.4", "wheel", "setuptools-rust>=1.5.2"]
-
-[tool.cibuildwheel]
-build-frontend = "build"
-build-verbosity = 1
-
-linux.before-all = "curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y"
-linux.environment = { PATH = "$PATH:$HOME/.cargo/bin" }
-macos.before-all = "rustup target add aarch64-apple-darwin"
-
-skip = [
-  "*-manylinux_i686",
-  "*-musllinux_i686",
-  "*-win32",
-]
-macos.archs = ["x86_64", "arm64"]
-# When cross-compiling on Intel, it is not possible to test arm64 wheels.
-# Warnings will be silenced with following CIBW_TEST_SKIP
-test-skip = "*-macosx_arm64"
-
-before-test = "pip install pytest"
-test-command = "pytest {project}/tests"
-
diff --git a/scripts/benchmark.py b/scripts/benchmark.py
deleted file mode 100644
index 4d679fa..0000000
--- a/scripts/benchmark.py
+++ /dev/null
@@ -1,39 +0,0 @@
-import base64
-import functools
-import gzip
-import json
-import os
-import random
-import time
-from typing import Any, cast
-
-import blobfile
-
-import tiktoken
-
-
-def benchmark_batch(documents: list[str]) -> None:
-    num_threads = int(os.environ["RAYON_NUM_THREADS"])
-    num_bytes = sum(map(len, map(str.encode, documents)))
-    print(f"num_threads: {num_threads}, num_bytes: {num_bytes}")
-
-    enc = tiktoken.get_encoding("gpt2")
-    enc.encode("warmup")
-
-    start = time.perf_counter_ns()
-    enc.encode_ordinary_batch(documents, num_threads=num_threads)
-    end = time.perf_counter_ns()
-    print(f"tiktoken \t{num_bytes / (end - start) * 1e9} bytes / s")
-
-    import transformers
-
-    hf_enc = cast(Any, transformers).GPT2TokenizerFast.from_pretrained("gpt2")
-    hf_enc.model_max_length = 1e30  # silence!
-    hf_enc.encode("warmup")
-
-    start = time.perf_counter_ns()
-    hf_enc(documents)
-    end = time.perf_counter_ns()
-    print(f"huggingface \t{num_bytes / (end - start) * 1e9} bytes / s")
-
-
diff --git a/scripts/redact.py b/scripts/redact.py
deleted file mode 100644
index d82db32..0000000
--- a/scripts/redact.py
+++ /dev/null
@@ -1,67 +0,0 @@
-import argparse
-import re
-import subprocess
-from pathlib import Path
-
-
-def redact_file(path: Path, dry_run: bool) -> None:
-    if not path.exists() or path.is_dir():
-        return
-
-    text = path.read_text()
-    if not text:
-        return
-
-    first_line = text.splitlines()[0]
-    if "redact" in first_line:
-        if not dry_run:
-            path.unlink()
-        print(f"Deleted {path}")
-        return
-
-    pattern = "|".join(
-        re.escape(x)
-        for x in [
-            "# ===== redact-beg =====\n",
-            "# ===== redact-end =====\n",
-            "<!--- redact-beg -->\n",
-            "<!--- redact-end -->\n",
-        ]
-    )
-
-    if re.search(pattern, text):
-        redacted_text = "".join(re.split(pattern, text)[::2])
-        if not dry_run:
-            path.write_text(redacted_text)
-        print(f"Redacted {path}")
-        return
-
-    print(f"Skipped {path}")
-
-
-def redact(dry_run: bool) -> None:
-    tiktoken_root = Path(__file__).parent.parent
-    assert tiktoken_root.name == "tiktoken"
-    assert (tiktoken_root / "pyproject.toml").exists()
-
-    try:
-        output = subprocess.check_output(["git", "ls-files"], cwd=tiktoken_root, text=True)
-        paths = [Path(p) for p in output.splitlines()]
-    except subprocess.CalledProcessError:
-        paths = list(tiktoken_root.glob("**/*"))
-
-    for path in paths:
-        redact_file(path, dry_run=dry_run)
-
-
-def main() -> None:
-    parser = argparse.ArgumentParser()
-    parser.add_argument("--dry-run", type=lambda x: not x or x[0].lower() != "f", default=True)
-    args = parser.parse_args()
-    redact(args.dry_run)
-    if args.dry_run:
-        print("Dry run, use --dry-run=false to actually redact files")
-
-
-if __name__ == "__main__":
-    main()
diff --git a/setup.py b/setup.py
deleted file mode 100644
index a22e8e5..0000000
--- a/setup.py
+++ /dev/null
@@ -1,18 +0,0 @@
-from setuptools import setup
-from setuptools_rust import Binding, RustExtension
-
-setup(
-    name="tiktoken",
-    rust_extensions=[
-        RustExtension(
-            "tiktoken._tiktoken",
-            binding=Binding.PyO3,
-            # Between our use of editable installs and wanting to use Rust for performance sensitive
-            # code, it makes sense to just always use --release
-            debug=False,
-        )
-    ],
-    package_data={"tiktoken": ["py.typed"]},
-    packages=["tiktoken", "tiktoken_ext"],
-    zip_safe=False,
-)
diff --git a/src/lib.rs b/src/lib.rs
index 70009d2..d202a4b 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -5,10 +5,6 @@ use std::collections::HashSet;
 use std::thread;
 
 use fancy_regex::Regex;
-use pyo3::exceptions;
-use pyo3::prelude::*;
-use pyo3::types::{PyBytes, PyList, PyTuple};
-use pyo3::PyResult;
 use rustc_hash::FxHashMap as HashMap;
 
 fn _byte_pair_merge<T>(
@@ -169,7 +165,6 @@ fn hash_current_thread() -> usize {
 }
 
 const MAX_NUM_THREADS: usize = 128;
-#[pyclass]
 struct CoreBPE {
     encoder: HashMap<Vec<u8>, usize>,
     special_tokens_encoder: HashMap<String, usize>,
@@ -192,19 +187,96 @@ impl CoreBPE {
         &self.special_regex_tls[hash_current_thread() % MAX_NUM_THREADS]
     }
 
-    fn _decode_native(&self, tokens: &[usize]) -> Vec<u8> {
-        let mut ret = Vec::with_capacity(tokens.len() * 2);
-        for token in tokens {
-            let token_bytes = self
-                .decoder
-                .get(token)
-                .unwrap_or_else(|| &self.special_tokens_decoder[token]);
-            ret.extend(token_bytes);
+    fn _increase_last_piece_token_len(
+        &self,
+        tokens: Vec<usize>,
+        mut last_piece_token_len: usize,
+    ) -> (Vec<usize>, usize) {
+        // Unfortunately, the locations where our regex splits can be unstable.
+        // For the purposes of determining unstable tokens, unstable regex splitting
+        // is only a problem if a split that was present disappears, since this can
+        // lead to merging of tokens otherwise thought to be stable.
+        // cl100k_base makes our life hard by including the \s*[\r\n]+
+        // pattern. This can e.g. cause "\n" + " " to become "\n \n".
+        // Here is a quick and dirty fix:
+        {
+            let token_is_all_space = |token| {
+                self.decoder
+                    .get(token)
+                    .map(|token_bytes| {
+                        token_bytes
+                            .iter()
+                            .rev()
+                            .all(|&b| [b' ', b'\n', b'\t'].contains(&b))
+                    })
+                    .unwrap_or(false)
+            };
+            if last_piece_token_len > 0
+                && token_is_all_space(&tokens[tokens.len() - last_piece_token_len])
+            {
+                while (last_piece_token_len < tokens.len())
+                    && token_is_all_space(&tokens[tokens.len() - last_piece_token_len - 1])
+                {
+                    last_piece_token_len += 1;
+                }
+            }
         }
-        ret
+        debug_assert!(last_piece_token_len <= tokens.len());
+
+        (tokens, last_piece_token_len)
+    }
+}
+
+impl CoreBPE {
+    pub fn new(
+        encoder: HashMap<Vec<u8>, usize>,
+        special_tokens_encoder: HashMap<String, usize>,
+        pattern: &str,
+    ) -> anyhow::Result<Self> {
+        let regex = Regex::new(pattern)
+            .map_err(|e| anyhow::anyhow!("Invalid regex: {}", e.to_string()))?;
+
+        let special_regex = {
+            let _parts = special_tokens_encoder
+                .keys()
+                .map(|s| fancy_regex::escape(s))
+                .collect::<Vec<_>>();
+            Regex::new(&_parts.join("|"))
+                .map_err(|e| anyhow::anyhow!("Invalid regex: {}", e.to_string()))?
+        };
+
+        let decoder: HashMap<usize, Vec<u8>> =
+            encoder.iter().map(|(k, v)| (*v, k.clone())).collect();
+
+        assert!(encoder.len() == decoder.len());
+
+        let special_tokens_decoder: HashMap<usize, Vec<u8>> = special_tokens_encoder
+            .iter()
+            .map(|(k, v)| (*v, k.as_bytes().to_vec()))
+            .collect();
+
+        // Clone because I don't know how to tell Rust I'm not going to change the map
+        let mut sorted_token_bytes: Vec<Vec<u8>> = encoder.keys().cloned().collect();
+        sorted_token_bytes.sort();
+
+        Ok(CoreBPE {
+            encoder,
+            special_tokens_encoder,
+            decoder,
+            special_tokens_decoder,
+            regex_tls: (0..MAX_NUM_THREADS).map(|_| regex.clone()).collect(),
+            special_regex_tls: (0..MAX_NUM_THREADS)
+                .map(|_| special_regex.clone())
+                .collect(),
+            sorted_token_bytes,
+        })
     }
 
-    fn _encode_ordinary_native(&self, text: &str) -> Vec<usize> {
+    // ====================
+    // Encoding
+    // ====================
+
+    pub fn encode_ordinary(&self, text: &str) -> Vec<usize> {
         // This is the core of the encoding logic; the other functions in here
         // just make things complicated :-)
         let regex = self._get_tl_regex();
@@ -220,7 +292,7 @@ impl CoreBPE {
         ret
     }
 
-    fn _encode_native(&self, text: &str, allowed_special: &HashSet<&str>) -> (Vec<usize>, usize) {
+    pub fn encode(&self, text: &str, allowed_special: HashSet<&str>) -> (Vec<usize>, usize) {
         let special_regex = self._get_tl_special_regex();
         let regex = self._get_tl_regex();
         let mut ret = vec![];
@@ -276,51 +348,37 @@ impl CoreBPE {
         (ret, last_piece_token_len)
     }
 
-    fn _increase_last_piece_token_len(
-        &self,
-        tokens: Vec<usize>,
-        mut last_piece_token_len: usize,
-    ) -> (Vec<usize>, usize) {
-        // Unfortunately, the locations where our regex splits can be unstable.
-        // For the purposes of determining unstable tokens, unstable regex splitting
-        // is only a problem if a split that was present disappears, since this can
-        // lead to merging of tokens otherwise thought to be stable.
-        // cl100k_base makes our life hard by including the \s*[\r\n]+
-        // pattern. This can e.g. cause "\n" + " " to become "\n \n".
-        // Here is a quick and dirty fix:
-        {
-            let token_is_all_space = |token| {
-                self.decoder
-                    .get(token)
-                    .map(|token_bytes| {
-                        token_bytes
-                            .iter()
-                            .rev()
-                            .all(|&b| [b' ', b'\n', b'\t'].contains(&b))
-                    })
-                    .unwrap_or(false)
-            };
-            if last_piece_token_len > 0
-                && token_is_all_space(&tokens[tokens.len() - last_piece_token_len])
-            {
-                while (last_piece_token_len < tokens.len())
-                    && token_is_all_space(&tokens[tokens.len() - last_piece_token_len - 1])
-                {
-                    last_piece_token_len += 1;
+    fn _encode_bytes(&self, bytes: &[u8]) -> Vec<usize> {
+        match std::str::from_utf8(bytes) {
+            Ok(text) => self.encode_ordinary(text),
+            Err(e) => {
+                let text = unsafe { std::str::from_utf8_unchecked(&bytes[..e.valid_up_to()]) };
+                let (tokens, last_piece_token_len) = self.encode(text, HashSet::new());
+                let (mut tokens, last_piece_token_len) =
+                    self._increase_last_piece_token_len(tokens, last_piece_token_len);
+                if !tokens.is_empty() && last_piece_token_len > 0 {
+                    // Lop off the tokens from the last piece and run BPE on the remaining bytes
+                    // Somewhat niche, but this may not be correct if we'd have had a regex
+                    // split between the valid UTF-8 and the invalid bytes, which is why this
+                    // method is private
+                    let mut unstable_bytes =
+                        self.decode_bytes(&tokens[tokens.len() - last_piece_token_len..]);
+                    unstable_bytes.extend_from_slice(&bytes[e.valid_up_to()..]);
+
+                    tokens.truncate(tokens.len() - last_piece_token_len);
+                    tokens.extend(byte_pair_encode(&unstable_bytes, &self.encoder));
                 }
+                tokens
             }
         }
-        debug_assert!(last_piece_token_len <= tokens.len());
-
-        (tokens, last_piece_token_len)
     }
 
-    fn _encode_unstable_native(
+    pub fn encode_with_unstable(
         &self,
         text: &str,
-        allowed_special: &HashSet<&str>,
+        allowed_special: HashSet<&str>,
     ) -> (Vec<usize>, HashSet<Vec<usize>>) {
-        let (tokens, last_piece_token_len) = self._encode_native(text, allowed_special);
+        let (tokens, last_piece_token_len) = self.encode(text, allowed_special);
         if last_piece_token_len == 0 {
             // If last_piece_token_len is zero, the last token was a special token and we have
             // no unstable bytes
@@ -329,7 +387,7 @@ impl CoreBPE {
         let (mut tokens, last_piece_token_len) =
             self._increase_last_piece_token_len(tokens, last_piece_token_len);
 
-        let unstable_bytes = self._decode_native(&tokens[tokens.len() - last_piece_token_len..]);
+        let unstable_bytes = self.decode_bytes(&tokens[tokens.len() - last_piece_token_len..]);
         tokens.truncate(tokens.len() - last_piece_token_len);
 
         // TODO: we should try harder to find additional stable tokens
@@ -377,7 +435,7 @@ impl CoreBPE {
                     // So convert to UTF-8 and do regex splitting.
                     // E.g. with cl100k_base "  !" gets split to " " + " !",
                     // but byte_pair_encode("  !") != byte_pair_encode(" ")
-                    Ok(s) => self._encode_ordinary_native(s),
+                    Ok(s) => self.encode_ordinary(s),
 
                     // Technically, whether or not this arm is correct depends on whether there
                     // would be a regex split before the UTF-8 truncation point.
@@ -430,108 +488,8 @@ impl CoreBPE {
 
         (tokens, completions)
     }
-}
 
-#[pymethods]
-impl CoreBPE {
-    #[new]
-    fn new(
-        encoder: HashMap<Vec<u8>, usize>,
-        special_tokens_encoder: HashMap<String, usize>,
-        pattern: &str,
-    ) -> PyResult<Self> {
-        let regex = Regex::new(pattern)
-            .map_err(|e| PyErr::new::<exceptions::PyValueError, _>(e.to_string()))?;
-
-        let special_regex = {
-            let _parts = special_tokens_encoder
-                .keys()
-                .map(|s| fancy_regex::escape(s))
-                .collect::<Vec<_>>();
-            Regex::new(&_parts.join("|"))
-                .map_err(|e| PyErr::new::<exceptions::PyValueError, _>(e.to_string()))?
-        };
-
-        let decoder: HashMap<usize, Vec<u8>> =
-            encoder.iter().map(|(k, v)| (*v, k.clone())).collect();
-
-        assert!(encoder.len() == decoder.len());
-
-        let special_tokens_decoder: HashMap<usize, Vec<u8>> = special_tokens_encoder
-            .iter()
-            .map(|(k, v)| (*v, k.as_bytes().to_vec()))
-            .collect();
-
-        // Clone because I don't know how to tell Rust I'm not going to change the map
-        let mut sorted_token_bytes: Vec<Vec<u8>> = encoder.keys().cloned().collect();
-        sorted_token_bytes.sort();
-
-        Ok(CoreBPE {
-            encoder,
-            special_tokens_encoder,
-            decoder,
-            special_tokens_decoder,
-            regex_tls: (0..MAX_NUM_THREADS).map(|_| regex.clone()).collect(),
-            special_regex_tls: (0..MAX_NUM_THREADS)
-                .map(|_| special_regex.clone())
-                .collect(),
-            sorted_token_bytes,
-        })
-    }
-
-    // ====================
-    // Encoding
-    // ====================
-
-    fn encode_ordinary(&self, py: Python, text: &str) -> Vec<usize> {
-        py.allow_threads(|| self._encode_ordinary_native(text))
-    }
-
-    fn encode(&self, py: Python, text: &str, allowed_special: HashSet<&str>) -> Vec<usize> {
-        py.allow_threads(|| self._encode_native(text, &allowed_special).0)
-    }
-
-    fn _encode_bytes(&self, py: Python, bytes: &[u8]) -> Vec<usize> {
-        py.allow_threads(|| {
-            match std::str::from_utf8(bytes) {
-                Ok(text) => self._encode_ordinary_native(text),
-                Err(e) => {
-                    let text = unsafe { std::str::from_utf8_unchecked(&bytes[..e.valid_up_to()]) };
-                    let (tokens, last_piece_token_len) = self._encode_native(text, &HashSet::new());
-                    let (mut tokens, last_piece_token_len) =
-                        self._increase_last_piece_token_len(tokens, last_piece_token_len);
-                    if !tokens.is_empty() && last_piece_token_len > 0 {
-                        // Lop off the tokens from the last piece and run BPE on the remaining bytes
-                        // Somewhat niche, but this may not be correct if we'd have had a regex
-                        // split between the valid UTF-8 and the invalid bytes, which is why this
-                        // method is private
-                        let mut unstable_bytes =
-                            self._decode_native(&tokens[tokens.len() - last_piece_token_len..]);
-                        unstable_bytes.extend_from_slice(&bytes[e.valid_up_to()..]);
-
-                        tokens.truncate(tokens.len() - last_piece_token_len);
-                        tokens.extend(byte_pair_encode(&unstable_bytes, &self.encoder));
-                    }
-                    tokens
-                }
-            }
-        })
-    }
-
-    fn encode_with_unstable(
-        &self,
-        py: Python,
-        text: &str,
-        allowed_special: HashSet<&str>,
-    ) -> Py<PyTuple> {
-        let (tokens, completions) =
-            py.allow_threads(|| self._encode_unstable_native(text, &allowed_special));
-        let py_completions =
-            PyList::new(py, completions.iter().map(|seq| PyList::new(py, &seq[..])));
-        (tokens, py_completions).into_py(py)
-    }
-
-    fn encode_single_token(&self, piece: &[u8]) -> PyResult<usize> {
+    pub fn encode_single_token(&self, piece: &[u8]) -> anyhow::Result<usize> {
         if let Some(token) = self.encoder.get(piece).copied() {
             return Ok(token);
         }
@@ -540,10 +498,10 @@ impl CoreBPE {
                 return Ok(token);
             }
         }
-        Err(PyErr::new::<exceptions::PyKeyError, _>(piece.to_owned()))
+        Err(anyhow::anyhow!("Piece {:?} not found", piece))
     }
 
-    fn encode_single_piece(&self, piece: &[u8]) -> Vec<usize> {
+    pub fn encode_single_piece(&self, piece: &[u8]) -> Vec<usize> {
         if let Some(token) = self.encoder.get(piece) {
             return vec![*token];
         }
@@ -554,39 +512,37 @@ impl CoreBPE {
     // Decoding
     // ====================
 
-    fn decode_bytes(&self, py: Python, tokens: Vec<usize>) -> Py<PyBytes> {
-        let bytes = py.allow_threads(|| self._decode_native(&tokens));
-        PyBytes::new(py, &bytes).into()
+    pub fn decode_bytes(&self, tokens: &[usize]) -> Vec<u8> {
+        let mut ret = Vec::with_capacity(tokens.len() * 2);
+        for token in tokens {
+            let token_bytes = self
+                .decoder
+                .get(token)
+                .unwrap_or_else(|| &self.special_tokens_decoder[token]);
+            ret.extend(token_bytes);
+        }
+        ret
     }
 
-    fn decode_single_token_bytes(&self, py: Python, token: usize) -> PyResult<Py<PyBytes>> {
+    pub fn decode_single_token_bytes(&self, token: usize) -> anyhow::Result<&Vec<u8>> {
         if let Some(bytes) = self.decoder.get(&token) {
-            return Ok(PyBytes::new(py, bytes).into());
+            return Ok(bytes);
         }
         if let Some(bytes) = self.special_tokens_decoder.get(&token) {
-            return Ok(PyBytes::new(py, bytes).into());
+            return Ok(bytes);
         }
-        Err(PyErr::new::<exceptions::PyKeyError, _>(token.to_string()))
+        Err(anyhow::anyhow!("Token {} not found", token))
     }
 
     // ====================
     // Miscellaneous
     // ====================
 
-    fn token_byte_values(&self, py: Python) -> Vec<Py<PyBytes>> {
-        self.sorted_token_bytes
-            .iter()
-            .map(|x| PyBytes::new(py, x).into())
-            .collect()
+    pub fn token_byte_values(&self) -> &Vec<Vec<u8>> {
+        &self.sorted_token_bytes
     }
 }
 
-#[pymodule]
-fn _tiktoken(_py: Python, m: &PyModule) -> PyResult<()> {
-    m.add_class::<CoreBPE>()?;
-    Ok(())
-}
-
 #[cfg(test)]
 mod tests {
     use rustc_hash::FxHashMap as HashMap;
diff --git a/tests/test_simple_public.py b/tests/test_simple_public.py
deleted file mode 100644
index 8458c12..0000000
--- a/tests/test_simple_public.py
+++ /dev/null
@@ -1,42 +0,0 @@
-import subprocess
-import sys
-
-import tiktoken
-
-
-def test_simple():
-    # Note that there are more actual tests, they're just not currently public :-)
-    enc = tiktoken.get_encoding("gpt2")
-    assert enc.encode("hello world") == [31373, 995]
-    assert enc.decode([31373, 995]) == "hello world"
-    assert enc.encode("hello <|endoftext|>", allowed_special="all") == [31373, 220, 50256]
-
-    enc = tiktoken.get_encoding("cl100k_base")
-    assert enc.encode("hello world") == [15339, 1917]
-    assert enc.decode([15339, 1917]) == "hello world"
-    assert enc.encode("hello <|endoftext|>", allowed_special="all") == [15339, 220, 100257]
-
-    for enc_name in tiktoken.list_encoding_names():
-        enc = tiktoken.get_encoding(enc_name)
-        for token in range(10_000):
-            assert enc.encode_single_token(enc.decode_single_token_bytes(token)) == token
-
-
-def test_encoding_for_model():
-    enc = tiktoken.encoding_for_model("gpt2")
-    assert enc.name == "gpt2"
-    enc = tiktoken.encoding_for_model("text-davinci-003")
-    assert enc.name == "p50k_base"
-    enc = tiktoken.encoding_for_model("text-davinci-edit-001")
-    assert enc.name == "p50k_edit"
-    enc = tiktoken.encoding_for_model("gpt-3.5-turbo-0301")
-    assert enc.name == "cl100k_base"
-
-
-def test_optional_blobfile_dependency():
-    prog = """
-import tiktoken
-import sys
-assert "blobfile" not in sys.modules
-"""
-    subprocess.check_call([sys.executable, "-c", prog])
diff --git a/tiktoken/__init__.py b/tiktoken/__init__.py
deleted file mode 100644
index 9ad09a3..0000000
--- a/tiktoken/__init__.py
+++ /dev/null
@@ -1,4 +0,0 @@
-from .core import Encoding as Encoding
-from .model import encoding_for_model as encoding_for_model
-from .registry import get_encoding as get_encoding
-from .registry import list_encoding_names as list_encoding_names
diff --git a/tiktoken/core.py b/tiktoken/core.py
deleted file mode 100644
index 05613aa..0000000
--- a/tiktoken/core.py
+++ /dev/null
@@ -1,329 +0,0 @@
-from __future__ import annotations
-
-import functools
-from concurrent.futures import ThreadPoolExecutor
-from typing import AbstractSet, Collection, Literal, NoReturn, Optional, Union
-
-import regex
-
-from tiktoken import _tiktoken
-
-
-class Encoding:
-    def __init__(
-        self,
-        name: str,
-        *,
-        pat_str: str,
-        mergeable_ranks: dict[bytes, int],
-        special_tokens: dict[str, int],
-        explicit_n_vocab: Optional[int] = None,
-    ):
-        """Creates an Encoding object.
-
-        See openai_public.py for examples of how to construct an Encoding object.
-
-        Args:
-            name: The name of the encoding. It should be clear from the name of the encoding
-                what behaviour to expect, in particular, encodings with different special tokens
-                should have different names.
-            pat_str: A regex pattern string that is used to split the input text.
-            mergeable_ranks: A dictionary mapping mergeable token bytes to their ranks. The ranks
-                must correspond to merge priority.
-            special_tokens: A dictionary mapping special token strings to their token values.
-            explicit_n_vocab: The number of tokens in the vocabulary. If provided, it is checked
-                that the number of mergeable tokens and special tokens is equal to this number.
-        """
-        self.name = name
-
-        self._pat_str = pat_str
-        self._mergeable_ranks = mergeable_ranks
-        self._special_tokens = special_tokens
-
-        self.max_token_value = max(
-            max(mergeable_ranks.values()), max(special_tokens.values(), default=0)
-        )
-        if explicit_n_vocab:
-            assert len(mergeable_ranks) + len(special_tokens) == explicit_n_vocab
-            assert self.max_token_value == explicit_n_vocab - 1
-
-        self._core_bpe = _tiktoken.CoreBPE(mergeable_ranks, special_tokens, pat_str)
-
-    def __repr__(self) -> str:
-        return f"<Encoding {self.name!r}>"
-
-    # ====================
-    # Encoding
-    # ====================
-
-    def encode_ordinary(self, text: str) -> list[int]:
-        """Encodes a string into tokens, ignoring special tokens.
-
-        This is equivalent to `encode(text, disallowed_special=())` (but slightly faster).
-
-        ```
-        >>> enc.encode_ordinary("hello world")
-        [31373, 995]
-        """
-        return self._core_bpe.encode_ordinary(text)
-
-    def encode(
-        self,
-        text: str,
-        *,
-        allowed_special: Union[Literal["all"], AbstractSet[str]] = set(),  # noqa: B006
-        disallowed_special: Union[Literal["all"], Collection[str]] = "all",
-    ) -> list[int]:
-        """Encodes a string into tokens.
-
-        Special tokens are artificial tokens used to unlock capabilities from a model,
-        such as fill-in-the-middle. So we want to be careful about accidentally encoding special
-        tokens, since they can be used to trick a model into doing something we don't want it to do.
-
-        Hence, by default, encode will raise an error if it encounters text that corresponds
-        to a special token. This can be controlled on a per-token level using the `allowed_special`
-        and `disallowed_special` parameters. In particular:
-        - Setting `disallowed_special` to () will prevent this function from raising errors and
-          cause all text corresponding to special tokens to be encoded as natural text.
-        - Setting `allowed_special` to "all" will cause this function to treat all text
-          corresponding to special tokens to be encoded as special tokens.
-
-        ```
-        >>> enc.encode("hello world")
-        [31373, 995]
-        >>> enc.encode("<|endoftext|>", allowed_special={"<|endoftext|>"})
-        [50256]
-        >>> enc.encode("<|endoftext|>", allowed_special="all")
-        [50256]
-        >>> enc.encode("<|endoftext|>")
-        # Raises ValueError
-        >>> enc.encode("<|endoftext|>", disallowed_special=())
-        [27, 91, 437, 1659, 5239, 91, 29]
-        ```
-        """
-        if allowed_special == "all":
-            allowed_special = self.special_tokens_set
-        if disallowed_special == "all":
-            disallowed_special = self.special_tokens_set - allowed_special
-        if disallowed_special:
-            if not isinstance(disallowed_special, frozenset):
-                disallowed_special = frozenset(disallowed_special)
-            if match := _special_token_regex(disallowed_special).search(text):
-                raise_disallowed_special_token(match.group())
-
-        return self._core_bpe.encode(text, allowed_special)
-
-    def encode_ordinary_batch(self, text: list[str], *, num_threads: int = 8) -> list[list[int]]:
-        """Encodes a list of strings into tokens, in parallel, ignoring special tokens.
-
-        This is equivalent to `encode_batch(text, disallowed_special=())` (but slightly faster).
-
-        ```
-        >>> enc.encode_ordinary_batch(["hello world", "goodbye world"])
-        [[31373, 995], [11274, 16390, 995]]
-        ```
-        """
-        encoder = functools.partial(self.encode_ordinary)
-        with ThreadPoolExecutor(num_threads) as e:
-            return list(e.map(encoder, text))
-
-    def encode_batch(
-        self,
-        text: list[str],
-        *,
-        num_threads: int = 8,
-        allowed_special: Union[Literal["all"], AbstractSet[str]] = set(),  # noqa: B006
-        disallowed_special: Union[Literal["all"], Collection[str]] = "all",
-    ) -> list[list[int]]:
-        """Encodes a list of strings into tokens, in parallel.
-
-        See `encode` for more details on `allowed_special` and `disallowed_special`.
-
-        ```
-        >>> enc.encode_batch(["hello world", "goodbye world"])
-        [[31373, 995], [11274, 16390, 995]]
-        ```
-        """
-        if allowed_special == "all":
-            allowed_special = self.special_tokens_set
-        if disallowed_special == "all":
-            disallowed_special = self.special_tokens_set - allowed_special
-        if not isinstance(disallowed_special, frozenset):
-            disallowed_special = frozenset(disallowed_special)
-
-        encoder = functools.partial(
-            self.encode, allowed_special=allowed_special, disallowed_special=disallowed_special
-        )
-        with ThreadPoolExecutor(num_threads) as e:
-            return list(e.map(encoder, text))
-
-    def encode_with_unstable(
-        self,
-        text: str,
-        *,
-        allowed_special: Union[Literal["all"], AbstractSet[str]] = set(),  # noqa: B006
-        disallowed_special: Union[Literal["all"], Collection[str]] = "all",
-    ) -> tuple[list[int], list[list[int]]]:
-        """Encodes a string into stable tokens and possible completion sequences.
-
-        Note that the stable tokens will only represent a substring of `text`.
-
-        See `encode` for more details on `allowed_special` and `disallowed_special`.
-
-        This API should itself be considered unstable.
-
-        ```
-        >>> enc.encode_with_unstable("hello fanta")
-        ([31373], [(277, 4910), (5113, 265), ..., (8842,)])
-
-        >>> text = "..."
-        >>> stable_tokens, completions = enc.encode_with_unstable(text)
-        >>> assert text.encode().startswith(enc.decode_bytes(stable_tokens))
-        >>> assert all(enc.decode_bytes(stable_tokens + seq).startswith(text.encode()) for seq in completions)
-        ```
-        """
-        if allowed_special == "all":
-            allowed_special = self.special_tokens_set
-        if disallowed_special == "all":
-            disallowed_special = self.special_tokens_set - allowed_special
-        if disallowed_special:
-            if not isinstance(disallowed_special, frozenset):
-                disallowed_special = frozenset(disallowed_special)
-            if match := _special_token_regex(disallowed_special).search(text):
-                raise_disallowed_special_token(match.group())
-
-        return self._core_bpe.encode_with_unstable(text, allowed_special)
-
-    def encode_single_token(self, text_or_bytes: Union[str, bytes]) -> int:
-        """Encodes text corresponding to a single token to its token value.
-
-        NOTE: this will encode all special tokens.
-
-        Raises `KeyError` if the token is not in the vocabulary.
-
-        ```
-        >>> enc.encode_single_token("hello")
-        31373
-        ```
-        """
-        if isinstance(text_or_bytes, str):
-            text_or_bytes = text_or_bytes.encode("utf-8")
-        return self._core_bpe.encode_single_token(text_or_bytes)
-
-    # ====================
-    # Decoding
-    # ====================
-
-    def decode_bytes(self, tokens: list[int]) -> bytes:
-        """Decodes a list of tokens into bytes.
-
-        ```
-        >>> enc.decode_bytes([31373, 995])
-        b'hello world'
-        ```
-        """
-        return self._core_bpe.decode_bytes(tokens)
-
-    def decode(self, tokens: list[int], errors: str = "replace") -> str:
-        """Decodes a list of tokens into a string.
-
-        WARNING: the default behaviour of this function is lossy, since decoded bytes are not
-        guaranteed to be valid UTF-8. You can control this behaviour using the `errors` parameter,
-        for instance, setting `errors=strict`.
-
-        ```
-        >>> enc.decode([31373, 995])
-        'hello world'
-        ```
-        """
-        return self._core_bpe.decode_bytes(tokens).decode("utf-8", errors=errors)
-
-    def decode_single_token_bytes(self, token: int) -> bytes:
-        """Decodes a token into bytes.
-
-        NOTE: this will decode all special tokens.
-
-        Raises `KeyError` if the token is not in the vocabulary.
-
-        ```
-        >>> enc.decode_single_token_bytes(31373)
-        b'hello'
-        ```
-        """
-        return self._core_bpe.decode_single_token_bytes(token)
-
-    def decode_tokens_bytes(self, tokens: list[int]) -> list[bytes]:
-        """Decodes a list of tokens into a list of bytes.
-
-        Useful for visualising tokenisation.
-        >>> enc.decode_tokens_bytes([31373, 995])
-        [b'hello', b' world']
-        """
-        return [self.decode_single_token_bytes(token) for token in tokens]
-
-    # ====================
-    # Miscellaneous
-    # ====================
-
-    def token_byte_values(self) -> list[bytes]:
-        """Returns the list of all token byte values."""
-        return self._core_bpe.token_byte_values()
-
-    @property
-    def eot_token(self) -> int:
-        return self._special_tokens["<|endoftext|>"]
-
-    @functools.cached_property
-    def special_tokens_set(self) -> set[str]:
-        return set(self._special_tokens.keys())
-
-    @property
-    def n_vocab(self) -> int:
-        """For backwards compatibility. Prefer to use `enc.max_token_value + 1`."""
-        return self.max_token_value + 1
-
-    # ====================
-    # Private
-    # ====================
-
-    def _encode_single_piece(self, text_or_bytes: Union[str, bytes]) -> list[int]:
-        """Encodes text corresponding to bytes without a regex split.
-
-        NOTE: this will not encode any special tokens.
-
-        ```
-        >>> enc.encode_single_piece("helloqqqq")
-        [31373, 38227, 38227]
-        ```
-        """
-        if isinstance(text_or_bytes, str):
-            text_or_bytes = text_or_bytes.encode("utf-8")
-        return self._core_bpe.encode_single_piece(text_or_bytes)
-
-    def _encode_only_native_bpe(self, text: str) -> list[int]:
-        """Encodes a string into tokens, but do regex splitting in Python."""
-        _unused_pat = regex.compile(self._pat_str)
-        ret = []
-        for piece in regex.findall(_unused_pat, text):
-            ret.extend(self._core_bpe.encode_single_piece(piece))
-        return ret
-
-    def _encode_bytes(self, text: bytes) -> list[int]:
-        return self._core_bpe._encode_bytes(text)
-
-
-@functools.lru_cache(maxsize=128)
-def _special_token_regex(tokens: frozenset[str]) -> "regex.Pattern[str]":
-    inner = "|".join(regex.escape(token) for token in tokens)
-    return regex.compile(f"({inner})")
-
-
-def raise_disallowed_special_token(token: str) -> NoReturn:
-    raise ValueError(
-        f"Encountered text corresponding to disallowed special token {token!r}.\n"
-        "If you want this text to be encoded as a special token, "
-        f"pass it to `allowed_special`, e.g. `allowed_special={{{token!r}, ...}}`.\n"
-        f"If you want this text to be encoded as normal text, disable the check for this token "
-        f"by passing `disallowed_special=(enc.special_tokens_set - {{{token!r}}})`.\n"
-        "To disable this check for all special tokens, pass `disallowed_special=()`.\n"
-    )
diff --git a/tiktoken/load.py b/tiktoken/load.py
deleted file mode 100644
index 4a49ae4..0000000
--- a/tiktoken/load.py
+++ /dev/null
@@ -1,118 +0,0 @@
-from __future__ import annotations
-
-import base64
-import hashlib
-import json
-import os
-import tempfile
-import uuid
-
-import requests
-
-
-def read_file(blobpath: str) -> bytes:
-    if not blobpath.startswith("http://") and not blobpath.startswith("https://"):
-        try:
-            import blobfile
-        except ImportError:
-            raise ImportError(
-                "blobfile is not installed. Please install it by running `pip install blobfile`."
-            )
-        with blobfile.BlobFile(blobpath, "rb") as f:
-            return f.read()
-    # avoiding blobfile for public files helps avoid auth issues, like MFA prompts
-    return requests.get(blobpath).content
-
-
-def read_file_cached(blobpath: str) -> bytes:
-    if "TIKTOKEN_CACHE_DIR" in os.environ:
-        cache_dir = os.environ["TIKTOKEN_CACHE_DIR"]
-    elif "DATA_GYM_CACHE_DIR" in os.environ:
-        cache_dir = os.environ["DATA_GYM_CACHE_DIR"]
-    else:
-        cache_dir = os.path.join(tempfile.gettempdir(), "data-gym-cache")
-
-    if cache_dir == "":
-        # disable caching
-        return read_file(blobpath)
-
-    cache_key = hashlib.sha1(blobpath.encode()).hexdigest()
-
-    cache_path = os.path.join(cache_dir, cache_key)
-    if os.path.exists(cache_path):
-        with open(cache_path, "rb") as f:
-            return f.read()
-
-    contents = read_file(blobpath)
-
-    os.makedirs(cache_dir, exist_ok=True)
-    tmp_filename = cache_path + "." + str(uuid.uuid4()) + ".tmp"
-    with open(tmp_filename, "wb") as f:
-        f.write(contents)
-    os.rename(tmp_filename, cache_path)
-
-    return contents
-
-
-def data_gym_to_mergeable_bpe_ranks(
-    vocab_bpe_file: str, encoder_json_file: str
-) -> dict[bytes, int]:
-    # NB: do not add caching to this function
-    rank_to_intbyte = [b for b in range(2**8) if chr(b).isprintable() and chr(b) != " "]
-
-    data_gym_byte_to_byte = {chr(b): b for b in rank_to_intbyte}
-    n = 0
-    for b in range(2**8):
-        if b not in rank_to_intbyte:
-            rank_to_intbyte.append(b)
-            data_gym_byte_to_byte[chr(2**8 + n)] = b
-            n += 1
-    assert len(rank_to_intbyte) == 2**8
-
-    # vocab_bpe contains the merges along with associated ranks
-    vocab_bpe_contents = read_file_cached(vocab_bpe_file).decode()
-    bpe_merges = [tuple(merge_str.split()) for merge_str in vocab_bpe_contents.split("\n")[1:-1]]
-
-    def decode_data_gym(value: str) -> bytes:
-        return bytes(data_gym_byte_to_byte[b] for b in value)
-
-    # add the single byte tokens
-    bpe_ranks = {bytes([b]): i for i, b in enumerate(rank_to_intbyte)}
-    # add the merged tokens
-    n = len(bpe_ranks)
-    for first, second in bpe_merges:
-        bpe_ranks[decode_data_gym(first) + decode_data_gym(second)] = n
-        n += 1
-
-    # check that the encoder file matches the merges file
-    # this sanity check is important since tiktoken assumes that ranks are ordered the same
-    # as merge priority
-    encoder_json = json.loads(read_file_cached(encoder_json_file))
-    encoder_json_loaded = {decode_data_gym(k): v for k, v in encoder_json.items()}
-    # drop these two special tokens if present, since they're not mergeable bpe tokens
-    encoder_json_loaded.pop(b"<|endoftext|>", None)
-    encoder_json_loaded.pop(b"<|startoftext|>", None)
-    assert bpe_ranks == encoder_json_loaded
-
-    return bpe_ranks
-
-
-def dump_tiktoken_bpe(bpe_ranks: dict[bytes, int], tiktoken_bpe_file: str) -> None:
-    try:
-        import blobfile
-    except ImportError:
-        raise ImportError(
-            "blobfile is not installed. Please install it by running `pip install blobfile`."
-        )
-    with blobfile.BlobFile(tiktoken_bpe_file, "wb") as f:
-        for token, rank in sorted(bpe_ranks.items(), key=lambda x: x[1]):
-            f.write(base64.b64encode(token) + b" " + str(rank).encode() + b"\n")
-
-
-def load_tiktoken_bpe(tiktoken_bpe_file: str) -> dict[bytes, int]:
-    # NB: do not add caching to this function
-    contents = read_file_cached(tiktoken_bpe_file)
-    return {
-        base64.b64decode(token): int(rank)
-        for token, rank in (line.split() for line in contents.splitlines() if line)
-    }
diff --git a/tiktoken/model.py b/tiktoken/model.py
deleted file mode 100644
index b8af787..0000000
--- a/tiktoken/model.py
+++ /dev/null
@@ -1,75 +0,0 @@
-from __future__ import annotations
-
-from .core import Encoding
-from .registry import get_encoding
-
-# TODO: these will likely be replaced by an API endpoint
-MODEL_PREFIX_TO_ENCODING: dict[str, str] = {
-    # chat
-    "gpt-4-": "cl100k_base",  # e.g., gpt-4-0314, etc., plus gpt-4-32k
-    "gpt-3.5-turbo-": "cl100k_base",  # e.g, gpt-3.5-turbo-0301, -0401, etc.
-}
-
-MODEL_TO_ENCODING: dict[str, str] = {
-    # chat
-    "gpt-4": "cl100k_base",
-    "gpt-3.5-turbo": "cl100k_base",
-    # text
-    "text-davinci-003": "p50k_base",
-    "text-davinci-002": "p50k_base",
-    "text-davinci-001": "r50k_base",
-    "text-curie-001": "r50k_base",
-    "text-babbage-001": "r50k_base",
-    "text-ada-001": "r50k_base",
-    "davinci": "r50k_base",
-    "curie": "r50k_base",
-    "babbage": "r50k_base",
-    "ada": "r50k_base",
-    # code
-    "code-davinci-002": "p50k_base",
-    "code-davinci-001": "p50k_base",
-    "code-cushman-002": "p50k_base",
-    "code-cushman-001": "p50k_base",
-    "davinci-codex": "p50k_base",
-    "cushman-codex": "p50k_base",
-    # edit
-    "text-davinci-edit-001": "p50k_edit",
-    "code-davinci-edit-001": "p50k_edit",
-    # embeddings
-    "text-embedding-ada-002": "cl100k_base",
-    # old embeddings
-    "text-similarity-davinci-001": "r50k_base",
-    "text-similarity-curie-001": "r50k_base",
-    "text-similarity-babbage-001": "r50k_base",
-    "text-similarity-ada-001": "r50k_base",
-    "text-search-davinci-doc-001": "r50k_base",
-    "text-search-curie-doc-001": "r50k_base",
-    "text-search-babbage-doc-001": "r50k_base",
-    "text-search-ada-doc-001": "r50k_base",
-    "code-search-babbage-code-001": "r50k_base",
-    "code-search-ada-code-001": "r50k_base",
-    # open source
-    "gpt2": "gpt2",
-}
-
-
-def encoding_for_model(model_name: str) -> Encoding:
-    """Returns the encoding used by a model."""
-    encoding_name = None
-    if model_name in MODEL_TO_ENCODING:
-        encoding_name = MODEL_TO_ENCODING[model_name]
-    else:
-        # Check if the model matches a known prefix
-        # Prefix matching avoids needing library updates for every model version release
-        # Note that this can match on non-existent models (e.g., gpt-3.5-turbo-FAKE)
-        for model_prefix, model_encoding_name in MODEL_PREFIX_TO_ENCODING.items():
-            if model_name.startswith(model_prefix):
-                return get_encoding(model_encoding_name)
-
-    if encoding_name is None:
-        raise KeyError(
-            f"Could not automatically map {model_name} to a tokeniser. "
-            "Please use `tiktok.get_encoding` to explicitly get the tokeniser you expect."
-        ) from None
-
-    return get_encoding(encoding_name)
diff --git a/tiktoken/py.typed b/tiktoken/py.typed
deleted file mode 100644
index e69de29..0000000
diff --git a/tiktoken/registry.py b/tiktoken/registry.py
deleted file mode 100644
index 52d8ec2..0000000
--- a/tiktoken/registry.py
+++ /dev/null
@@ -1,73 +0,0 @@
-from __future__ import annotations
-
-import importlib
-import pkgutil
-import threading
-from typing import Any, Callable, Optional
-
-import tiktoken_ext
-
-from tiktoken.core import Encoding
-
-_lock = threading.RLock()
-ENCODINGS: dict[str, Encoding] = {}
-ENCODING_CONSTRUCTORS: Optional[dict[str, Callable[[], dict[str, Any]]]] = None
-
-
-def _find_constructors() -> None:
-    global ENCODING_CONSTRUCTORS
-    with _lock:
-        if ENCODING_CONSTRUCTORS is not None:
-            return
-        ENCODING_CONSTRUCTORS = {}
-
-        # tiktoken_ext is a namespace package
-        # submodules inside tiktoken_ext will be inspected for ENCODING_CONSTRUCTORS attributes
-        # - we use namespace package pattern so `pkgutil.iter_modules` is fast
-        # - it's a separate top-level package because namespace subpackages of non-namespace
-        #   packages don't quite do what you want with editable installs
-        plugin_mods = pkgutil.iter_modules(tiktoken_ext.__path__, tiktoken_ext.__name__ + ".")
-
-        for _, mod_name, _ in plugin_mods:
-            mod = importlib.import_module(mod_name)
-            try:
-                constructors = mod.ENCODING_CONSTRUCTORS
-            except AttributeError as e:
-                raise ValueError(
-                    f"tiktoken plugin {mod_name} does not define ENCODING_CONSTRUCTORS"
-                ) from e
-            for enc_name, constructor in constructors.items():
-                if enc_name in ENCODING_CONSTRUCTORS:
-                    raise ValueError(
-                        f"Duplicate encoding name {enc_name} in tiktoken plugin {mod_name}"
-                    )
-                ENCODING_CONSTRUCTORS[enc_name] = constructor
-
-
-def get_encoding(encoding_name: str) -> Encoding:
-    if encoding_name in ENCODINGS:
-        return ENCODINGS[encoding_name]
-
-    with _lock:
-        if encoding_name in ENCODINGS:
-            return ENCODINGS[encoding_name]
-
-        if ENCODING_CONSTRUCTORS is None:
-            _find_constructors()
-            assert ENCODING_CONSTRUCTORS is not None
-
-        if encoding_name not in ENCODING_CONSTRUCTORS:
-            raise ValueError(f"Unknown encoding {encoding_name}")
-
-        constructor = ENCODING_CONSTRUCTORS[encoding_name]
-        enc = Encoding(**constructor())
-        ENCODINGS[encoding_name] = enc
-        return enc
-
-
-def list_encoding_names() -> list[str]:
-    with _lock:
-        if ENCODING_CONSTRUCTORS is None:
-            _find_constructors()
-            assert ENCODING_CONSTRUCTORS is not None
-        return list(ENCODING_CONSTRUCTORS)
diff --git a/tiktoken_ext/openai_public.py b/tiktoken_ext/openai_public.py
deleted file mode 100644
index 16a6ec5..0000000
--- a/tiktoken_ext/openai_public.py
+++ /dev/null
@@ -1,88 +0,0 @@
-from tiktoken.load import data_gym_to_mergeable_bpe_ranks, load_tiktoken_bpe
-
-ENDOFTEXT = "<|endoftext|>"
-FIM_PREFIX = "<|fim_prefix|>"
-FIM_MIDDLE = "<|fim_middle|>"
-FIM_SUFFIX = "<|fim_suffix|>"
-ENDOFPROMPT = "<|endofprompt|>"
-
-
-def gpt2():
-    mergeable_ranks = data_gym_to_mergeable_bpe_ranks(
-        vocab_bpe_file="https://openaipublic.blob.core.windows.net/gpt-2/encodings/main/vocab.bpe",
-        encoder_json_file="https://openaipublic.blob.core.windows.net/gpt-2/encodings/main/encoder.json",
-    )
-    return {
-        "name": "gpt2",
-        "explicit_n_vocab": 50257,
-        "pat_str": r"""'s|'t|'re|'ve|'m|'ll|'d| ?\p{L}+| ?\p{N}+| ?[^\s\p{L}\p{N}]+|\s+(?!\S)|\s+""",
-        "mergeable_ranks": mergeable_ranks,
-        "special_tokens": {"<|endoftext|>": 50256},
-    }
-
-
-def r50k_base():
-    mergeable_ranks = load_tiktoken_bpe(
-        "https://openaipublic.blob.core.windows.net/encodings/r50k_base.tiktoken"
-    )
-    return {
-        "name": "r50k_base",
-        "explicit_n_vocab": 50257,
-        "pat_str": r"""'s|'t|'re|'ve|'m|'ll|'d| ?\p{L}+| ?\p{N}+| ?[^\s\p{L}\p{N}]+|\s+(?!\S)|\s+""",
-        "mergeable_ranks": mergeable_ranks,
-        "special_tokens": {ENDOFTEXT: 50256},
-    }
-
-
-def p50k_base():
-    mergeable_ranks = load_tiktoken_bpe(
-        "https://openaipublic.blob.core.windows.net/encodings/p50k_base.tiktoken"
-    )
-    return {
-        "name": "p50k_base",
-        "explicit_n_vocab": 50281,
-        "pat_str": r"""'s|'t|'re|'ve|'m|'ll|'d| ?\p{L}+| ?\p{N}+| ?[^\s\p{L}\p{N}]+|\s+(?!\S)|\s+""",
-        "mergeable_ranks": mergeable_ranks,
-        "special_tokens": {ENDOFTEXT: 50256},
-    }
-
-
-def p50k_edit():
-    mergeable_ranks = load_tiktoken_bpe(
-        "https://openaipublic.blob.core.windows.net/encodings/p50k_base.tiktoken"
-    )
-    special_tokens = {ENDOFTEXT: 50256, FIM_PREFIX: 50281, FIM_MIDDLE: 50282, FIM_SUFFIX: 50283}
-    return {
-        "name": "p50k_edit",
-        "pat_str": r"""'s|'t|'re|'ve|'m|'ll|'d| ?\p{L}+| ?\p{N}+| ?[^\s\p{L}\p{N}]+|\s+(?!\S)|\s+""",
-        "mergeable_ranks": mergeable_ranks,
-        "special_tokens": special_tokens,
-    }
-
-
-def cl100k_base():
-    mergeable_ranks = load_tiktoken_bpe(
-        "https://openaipublic.blob.core.windows.net/encodings/cl100k_base.tiktoken"
-    )
-    special_tokens = {
-        ENDOFTEXT: 100257,
-        FIM_PREFIX: 100258,
-        FIM_MIDDLE: 100259,
-        FIM_SUFFIX: 100260,
-        ENDOFPROMPT: 100276,
-    }
-    return {
-        "name": "cl100k_base",
-        "pat_str": r"""(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\r\n\p{L}\p{N}]?\p{L}+|\p{N}{1,3}| ?[^\s\p{L}\p{N}]+[\r\n]*|\s*[\r\n]+|\s+(?!\S)|\s+""",
-        "mergeable_ranks": mergeable_ranks,
-        "special_tokens": special_tokens,
-    }
-
-
-ENCODING_CONSTRUCTORS = {
-    "gpt2": gpt2,
-    "r50k_base": r50k_base,
-    "p50k_base": p50k_base,
-    "p50k_edit": p50k_edit,
-    "cl100k_base": cl100k_base,
-}