From a57b1df80eb3a40f2ff2d323e29ccb77ad8c64f4 Mon Sep 17 00:00:00 2001 From: Gabriel Tofvesson Date: Wed, 22 Mar 2023 00:08:52 +0100 Subject: [PATCH] Add token counter --- .gitmodules | 3 ++ Cargo.lock | 153 ++++++++++++++++++++++++++++++++++++++++++++++++++++ Cargo.toml | 1 + tiktoken | 1 + 4 files changed, 158 insertions(+) create mode 160000 tiktoken diff --git a/.gitmodules b/.gitmodules index 984fd01..c10af2e 100644 --- a/.gitmodules +++ b/.gitmodules @@ -4,3 +4,6 @@ [submodule "OpenAI-Rust"] path = OpenAI-Rust url = git@github.com:GabrielTofvesson/OpenAI-Rust.git +[submodule "tiktoken"] + path = tiktoken + url = git@github.com:GabrielTofvesson/tiktoken.git diff --git a/Cargo.lock b/Cargo.lock index 152a21a..36f0fde 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -20,6 +20,15 @@ dependencies = [ "opaque-debug", ] +[[package]] +name = "aho-corasick" +version = "0.7.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cc936419f96fa211c1b9166887b38e5e40b19958e5b895be7c1f93adec7071ac" +dependencies = [ + "memchr", +] + [[package]] name = "alsa" version = "0.7.0" @@ -92,6 +101,21 @@ dependencies = [ "syn 1.0.109", ] +[[package]] +name = "bit-set" +version = "0.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0700ddab506f33b20a03b13996eccd309a48e5ff77d0d95926aa0210fb4e95f1" +dependencies = [ + "bit-vec", +] + +[[package]] +name = "bit-vec" +version = "0.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "349f9b6a179ed607305526ca489b34ad0a41aed5f7980fa90eb03160b69598fb" + [[package]] name = "bitflags" version = "1.3.2" @@ -107,6 +131,18 @@ dependencies = [ "generic-array", ] +[[package]] +name = "bstr" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c3d4260bcc2e8fc9df1eac4919a720effeb63a3f0952f5bf4944adfa18897f09" +dependencies = [ + "memchr", + "once_cell", + "regex-automata", + "serde", +] + [[package]] name = "bumpalo" version = "3.12.0" @@ -452,6 +488,16 @@ dependencies = [ "libc", ] +[[package]] +name = "fancy-regex" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0678ab2d46fa5195aaf59ad034c083d351377d4af57f3e073c074d0da3e3c766" +dependencies = [ + "bit-set", + "regex", +] + [[package]] name = "fastrand" version = "1.9.0" @@ -730,6 +776,12 @@ dependencies = [ "hashbrown", ] +[[package]] +name = "indoc" +version = "1.0.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bfa799dd5ed20a7e349f3b4639aa80d74549c81716d9ec4f994c9b5815598306" + [[package]] name = "instant" version = "0.1.12" @@ -893,6 +945,15 @@ version = "2.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d" +[[package]] +name = "memoffset" +version = "0.6.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5aa361d4faea93603064a027415f07bd8e1d5c88c9fbf68bf56a285428fd79ce" +dependencies = [ + "autocfg", +] + [[package]] name = "mime" version = "0.3.16" @@ -1255,6 +1316,66 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "pyo3" +version = "0.17.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "268be0c73583c183f2b14052337465768c07726936a260f480f0857cb95ba543" +dependencies = [ + "cfg-if", + "indoc", + "libc", + "memoffset", + "parking_lot", + "pyo3-build-config", + "pyo3-ffi", + "pyo3-macros", + "unindent", +] + +[[package]] +name = "pyo3-build-config" +version = "0.17.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "28fcd1e73f06ec85bf3280c48c67e731d8290ad3d730f8be9dc07946923005c8" +dependencies = [ + "once_cell", + "target-lexicon", +] + +[[package]] +name = "pyo3-ffi" +version = "0.17.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0f6cb136e222e49115b3c51c32792886defbfb0adead26a688142b346a0b9ffc" +dependencies = [ + "libc", + "pyo3-build-config", +] + +[[package]] +name = "pyo3-macros" +version = "0.17.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94144a1266e236b1c932682136dc35a9dee8d3589728f68130c7c3861ef96b28" +dependencies = [ + "proc-macro2", + "pyo3-macros-backend", + "quote", + "syn 1.0.109", +] + +[[package]] +name = "pyo3-macros-backend" +version = "0.17.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c8df9be978a2d2f0cdebabb03206ed73b11314701a5bfe71b0d753b81997777f" +dependencies = [ + "proc-macro2", + "quote", + "syn 1.0.109", +] + [[package]] name = "quote" version = "1.0.26" @@ -1291,9 +1412,17 @@ version = "1.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "48aaa5748ba571fb95cd2c85c09f629215d3a6ece942baa100950af03a34f733" dependencies = [ + "aho-corasick", + "memchr", "regex-syntax", ] +[[package]] +name = "regex-automata" +version = "0.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c230d73fb8d8c1b9c0b3135c5142a8acee3a0558fb8db5cf1cb65f8d7862132" + [[package]] name = "regex-syntax" version = "0.6.28" @@ -1615,6 +1744,12 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "target-lexicon" +version = "0.12.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8ae9980cab1db3fceee2f6c6f643d5d8de2997c58ee8d25fb0cc8a9e9e7348e5" + [[package]] name = "tempfile" version = "3.4.0" @@ -1648,6 +1783,17 @@ dependencies = [ "syn 2.0.0", ] +[[package]] +name = "tiktoken" +version = "0.3.2" +dependencies = [ + "bstr", + "fancy-regex", + "pyo3", + "regex", + "rustc-hash", +] + [[package]] name = "time" version = "0.3.20" @@ -1819,6 +1965,12 @@ dependencies = [ "tinyvec", ] +[[package]] +name = "unindent" +version = "0.1.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e1766d682d402817b5ac4490b3c3002d91dfa0d22812f341609f97b08757359c" + [[package]] name = "url" version = "2.3.1" @@ -1850,6 +2002,7 @@ dependencies = [ "elevenlabs_rs", "openai_rs", "rodio", + "tiktoken", "tokio", ] diff --git a/Cargo.toml b/Cargo.toml index 93f7106..8ccca3a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -9,6 +9,7 @@ edition = "2021" anyhow = "1.0.70" openai_rs = { path = "./OpenAI-Rust" } elevenlabs_rs = { path = "./elevenlabs-api-rust" } +tiktoken = { path = "./tiktoken" } tokio = "1.26.0" rodio = "0.17.1" diff --git a/tiktoken b/tiktoken new file mode 160000 index 0000000..82facf9 --- /dev/null +++ b/tiktoken @@ -0,0 +1 @@ +Subproject commit 82facf911fe41f39f13565be73899eab1e1761ef