Add support for katakana English and new audio query types in VOICEVOX client

tuna2134 · tuna2134 · commit 5d0e8e23d810 · 2025-08-16T14:22:28.000Z
diff --git a/vvclient/client.py b/vvclient/client.py
@@ -56,6 +56,8 @@ async def create_audio_query(
         params = {"text": text, "speaker": speaker}
         if core_version:
             params["core_version"] = core_version
+        if not enable_katakana_english:
+            params["enable_katakana_english"] = "false"
         return AudioQuery(self.http, await self.http.create_audio_query(params))
 
     async def fetch_engine_version(self) -> str:
diff --git a/vvclient/http.py b/vvclient/http.py
@@ -6,6 +6,7 @@
 
 from .errors import NotFoundError, HTTPException
 from .types import AudioQueryType
+from .types.sing import AudioQuery as SingAudioQueryType, RequestPostAudioQuery
 
 
 class Route:
@@ -46,6 +47,11 @@ async def create_audio_query(
     ) -> AudioQueryType:
         return await self.request(Route("POST", "/audio_query"), params=params)
 
+    async def create_sing_audio_query(
+        self, params: Dict[str, Union[str, int]], payload: RequestPostAudioQuery
+    ) -> SingAudioQueryType:
+        return await self.request(Route("POST", "/sing_audio_query"), params=params)
+
     async def synthesis(
         self, params: Dict[str, Union[str, int]], audio_query: AudioQueryType
     ) -> bytes:
diff --git a/vvclient/types/sing/__init__.py b/vvclient/types/sing/__init__.py
@@ -0,0 +1 @@
+from .audio_query import *
diff --git a/vvclient/types/sing/audio_query.py b/vvclient/types/sing/audio_query.py
@@ -0,0 +1,27 @@
+from typing import TypedDict, List
+
+
+class Note(TypedDict):
+    id: str
+    key: int
+    frame_length: int
+    lyric: str
+
+
+class RequestPostAudioQuery(TypedDict):
+    notes: List[Note]
+
+
+class Phoneme(TypedDict):
+    phoneme: str
+    frame_length: int
+    note_id: str
+
+
+class AudioQuery(TypedDict):
+    f0: List[int]
+    volume: List[int]
+    phonemes: List[Phoneme]
+    volumeScale: int
+    outputSamplingRate: int
+    outputStereo: bool