README.md aktualisiert

This commit is contained in:
2026-01-28 16:57:54 +00:00
parent d3ed9d2e97
commit 4e791c19dc

154
README.md
View File

@@ -219,144 +219,28 @@ arecord -D plughw:1,0 --format S16_LE --rate 16000 --channels 1 --duration 5 tes
# ausfuehren mit
python3 test_simple.py test_mono.wav
#### test_simple.py
# vorbereitungen
```
#!/usr/bin/env python3
import wave
import sys
from vosk import Model, KaldiRecognizer, SetLogLevel
# You can set log level to -1 to disable debug messages
SetLogLevel(0)
wf = wave.open(sys.argv[1], "rb")
if wf.getnchannels() != 1 or wf.getsampwidth() != 2 or wf.getcomptype() != "NONE":
print("Audio file must be WAV format mono PCM.")
sys.exit(1)
model = Model("model") #lang="en-us")
# You can also init model by name or with a folder path
# model = Model(model_name="vosk-model-en-us-0.21")
# model = Model("models/en")
rec = KaldiRecognizer(model, wf.getframerate())
rec.SetWords(True)
rec.SetPartialWords(True)
while True:
data = wf.readframes(4000)
if len(data) == 0:
break
if rec.AcceptWaveform(data):
print(rec.Result())
else:
print(rec.PartialResult())
print(rec.FinalResult())
mkdir ~/vosk-models
```
### microphone.py
Dort alle `*.py`-Dateien und die per `chmod 755` ausführbar gemachte `start_voice.sh` aus diesem Repo einfügen.
Damit ohne weitere Interaktion gestartet wird, muss außerdem per `sudo nano /etc/systemd/system/voicerec.service` ein Service angelegt und wie folgt befüllt werden:
```
#!/usr/bin/env python3
[Unit]
Description=starten der services fuer die spracherkennung
Wants=network-online.target
After=network-online.target
# prerequisites: as described in https://alphacephei.com/vosk/install and also python module `sounddevice` (simply run command `pip install sounddevice`)
# Example usage using Dutch (nl) recognition model: `python test_microphone.py -m nl`
# For more help run: `python test_microphone.py -h`
[Service]
Type=simple
User=pi
Restart=always
RestartSec=10
StartLimitBurst=10000
WorkingDirectory=/home/pi
ExecStart=/home/pi/vosk-models/start_voice.sh
import argparse
import queue
import sys
import sounddevice as sd
from vosk import Model, KaldiRecognizer
q = queue.Queue()
def int_or_str(text):
"""Helper function for argument parsing."""
try:
return int(text)
except ValueError:
return text
def callback(indata, frames, time, status):
"""This is called (from a separate thread) for each audio block."""
if status:
print(status, file=sys.stderr)
q.put(bytes(indata))
parser = argparse.ArgumentParser(add_help=False)
parser.add_argument(
"-l", "--list-devices", action="store_true",
help="show list of audio devices and exit")
args, remaining = parser.parse_known_args()
if args.list_devices:
print(sd.query_devices())
parser.exit(0)
parser = argparse.ArgumentParser(
description=__doc__,
formatter_class=argparse.RawDescriptionHelpFormatter,
parents=[parser])
parser.add_argument(
"-f", "--filename", type=str, metavar="FILENAME",
help="audio file to store recording to")
parser.add_argument(
"-d", "--device", type=int_or_str,
help="input device (numeric ID or substring)")
parser.add_argument(
"-r", "--samplerate", type=int, help="sampling rate")
parser.add_argument(
"-m", "--model", type=str, help="language model; e.g. en-us, fr, nl; default is en-us")
args = parser.parse_args(remaining)
try:
if args.samplerate is None:
device_info = sd.query_devices(args.device, "input")
# soundfile expects an int, sounddevice provides a float:
args.samplerate = int(device_info["default_samplerate"])
if args.model is None:
model = Model(lang="de") #"en-us")
else:
model = Model(lang=args.model)
if args.filename:
dump_fn = open(args.filename, "wb")
else:
dump_fn = None
with sd.RawInputStream(samplerate=args.samplerate, blocksize = 8000, device=args.device,
dtype="int16", channels=1, callback=callback):
print("#" * 80)
print("Press Ctrl+C to stop the recording")
print("#" * 80)
rec = KaldiRecognizer(model, args.samplerate)
while True:
data = q.get()
if rec.AcceptWaveform(data):
print(rec.Result())
else:
partialPrint = rec.PartialResult()
print(partialPrint)
if "licht" in partialPrint and "an" in partialPrint and "hex" in partialPrint:
print("SCHALTER EIN!")
if "licht" in partialPrint and "aus" in partialPrint and "hex" in partialPrint:
print("SCHALTER AUS!")
if dump_fn is not None:
dump_fn.write(data)
except KeyboardInterrupt:
print("\nDone")
parser.exit(0)
except Exception as e:
parser.exit(type(e).__name__ + ": " + str(e))
[Install]
WantedBy=multi-user.target
EOF
```