Coverage for askbob\speech\transcriber.py: 100%

Hot-keys on this page

r m x p toggle line displays

j k next/prev highlighted chunk

0 (zero) top of page

1 (one) first highlighted chunk

1import datetime

2from typing import Optional

3import deepspeech

4import enum

5import logging

6import numpy as np

7import os

9from askbob.speech.listener.listener import UtteranceService

12class TranscriptionEvent(enum.Enum):

13 """An enum representing utterance-related events."""

14 START_UTTERANCE = 1

15 END_UTTERANCE = 2

18class Transcriber:

19 """The transcriber performs speech-to-text on captured utterances spoken by the user."""

21 def __init__(self, model: str, scorer: str, us: UtteranceService, save_path: Optional[str] = None):

22 # Load the DeepSpeech model

23 self.model = self.init_deepspeech(model, scorer)

25 # Utterance service

26 self.us = us

27 self.save_path = save_path

29 def init_deepspeech(self, model_path: str, scorer_path: str = "") -> deepspeech.Model:

30 """Initialises the DeepSpeech model.

32 Args:

33 model_path (str): The path to the DeepSpech model.

34 scorer_path (str, optional): The path to an external scorer. Defaults to "".

36 Returns:

37 deepspeech.Model: The DeepSpeech model.

38 """

40 logging.info("Initialising DeepSpeech model: %s", scorer_path)

42 model = deepspeech.Model(model_path)

43 if scorer_path:

44 logging.info("Enabling the external scorer: %s", scorer_path)

45 model.enableExternalScorer(scorer_path)

47 return model

49 def transcribe(self):

50 """Transcribes spoken words.

52 Yields:

53 TranscriptionEvent: Whether the utterance has started or ended.

54 str: The transcribed phrase spoken by the user.

55 """

57 if not self.us:

58 return

60 if self.save_path:

61 os.makedirs(self.save_path, exist_ok=True)

63 stream_context = self.model.createStream()

64 wav_data = bytearray()

65 last_event = None

66 try:

67 for utterance in self.us.utterances():

68 if utterance is not None:

69 if last_event != TranscriptionEvent.START_UTTERANCE:

70 logging.debug("Utterance started.")

71 last_event = TranscriptionEvent.START_UTTERANCE

72 yield last_event, None

74 stream_context.feedAudioContent(

75 np.frombuffer(utterance, np.int16))

77 if self.save_path:

78 wav_data.extend(utterance)

79 else:

80 logging.debug("Utterence ended.")

82 text = stream_context.finishStream()

83 if text and self.save_path:

84 self.us.write_wav(os.path.join(self.save_path, datetime.datetime.now().strftime(

85 "%Y-%m-%d_%H-%M-%S - " + text + ".wav")), wav_data)

87 last_event = TranscriptionEvent.END_UTTERANCE

88 yield last_event, text

90 if self.save_path:

91 wav_data = bytearray()

93 stream_context = self.model.createStream()

94 except KeyboardInterrupt:

95 return

Coverage for askbob\speech\transcriber.py : 100%

48 statements

Coverage for askbob\speech\transcriber.py : 100%

48 statements 48 run 0 missing 4 excluded 0 partial

48 statements