95 lines
3.4 KiB
Rust
95 lines
3.4 KiB
Rust
use elevenlabs_rs::{elevenlabs_api::ElevenLabsAPI, model::{tts::TTSMessage, voice::VoiceSettings}};
|
|
use openai_rs::{context::Context, chat::{ChatMessage, Role, ChatHistoryBuilder}, transcription::{TranscriptionRequestBuilder, AudioFile}, translation::TranslationRequestBuilder};
|
|
use tokio::{fs::File, io::AsyncWriteExt};
|
|
|
|
fn get_file(name: &str) -> anyhow::Result<String> {
|
|
Ok(std::fs::read_to_string(std::path::Path::new(name))?.trim().to_string())
|
|
}
|
|
|
|
fn get_openai() -> anyhow::Result<Context> {
|
|
Ok(Context::new(get_file("openai.key")?))
|
|
}
|
|
|
|
fn get_elevenlabs() -> anyhow::Result<ElevenLabsAPI> {
|
|
Ok(ElevenLabsAPI::new(get_file("elevenlabs.key")?))
|
|
}
|
|
|
|
const VOICE_ID: &str = "u339B6b9cariBZ7Vw3q4";
|
|
const INPUT_FILE: &str = "input_prompt.mp3";
|
|
|
|
async fn transform_prompt(openai: &Context, prompt: File) -> anyhow::Result<String> {
|
|
Ok(openai.create_translation(TranslationRequestBuilder::default().prompt("[English]").model("whisper-1").file(AudioFile::MP3(prompt)).build()?).await?.text)
|
|
}
|
|
|
|
async fn generate_response(openai: &Context, elevenlabs: &ElevenLabsAPI, history: &mut Vec<ChatMessage>) {
|
|
let response = openai.create_chat_completion(
|
|
ChatHistoryBuilder::default()
|
|
.messages(history.clone())
|
|
.model("gpt-3.5-turbo")
|
|
.build()
|
|
.unwrap()
|
|
).await;
|
|
|
|
if let Ok(mut response) = response {
|
|
let response = response.choices.remove(0).message;
|
|
let tts = elevenlabs.generate_tts(VOICE_ID.to_owned(), TTSMessage::new(response.content.clone(), VoiceSettings {
|
|
stability: 0.5,
|
|
similarity_boost: 0.75,
|
|
}));
|
|
history.push(response);
|
|
|
|
let tts = tts.await;
|
|
if let Ok(tts) = tts {
|
|
File::create("response.wav").await.unwrap().write_all(tts.audio()[0].as_slice()).await.unwrap();
|
|
} else {
|
|
println!("{:?}", tts);
|
|
}
|
|
} else {
|
|
println!("{:?}", response);
|
|
}
|
|
let response = openai.create_chat_completion(
|
|
ChatHistoryBuilder::default()
|
|
.messages(history.clone())
|
|
.model("gpt-3.5-turbo")
|
|
.build()
|
|
.unwrap()
|
|
).await;
|
|
|
|
if let Ok(mut response) = response {
|
|
let response = response.choices.remove(0).message;
|
|
let tts = elevenlabs.generate_tts(VOICE_ID.to_owned(), TTSMessage::new(response.content.clone(), VoiceSettings {
|
|
stability: 0.5,
|
|
similarity_boost: 0.75,
|
|
}));
|
|
history.push(response);
|
|
|
|
let tts = tts.await;
|
|
if let Ok(tts) = tts {
|
|
File::create("response.wav").await.unwrap().write_all(tts.audio()[0].as_slice()).await.unwrap();
|
|
} else {
|
|
println!("{:?}", tts);
|
|
}
|
|
} else {
|
|
println!("{:?}", response);
|
|
}
|
|
}
|
|
|
|
#[tokio::main]
|
|
async fn main() {
|
|
let openai = get_openai().unwrap();
|
|
let elevenlabs = get_elevenlabs().unwrap();
|
|
|
|
// Start of chat
|
|
let mut history: Vec<ChatMessage> = Vec::new();
|
|
history.push(ChatMessage::new(Role::System, "You are a voice assistant; Give helpful, accurate and concise responses. Your name is Jarvis. You are currently only capable of responding to prompts."));
|
|
|
|
let response = transform_prompt(&openai, File::open(INPUT_FILE).await.unwrap()).await;
|
|
|
|
if let Ok(response) = response {
|
|
history.push(ChatMessage::new(Role::User, response));
|
|
generate_response(&openai, &elevenlabs, &mut history).await;
|
|
} else {
|
|
println!("{:?}", response);
|
|
}
|
|
}
|