Implementation of speech recognition code based on Python

Time:2020-9-21

This article mainly introduces how to realize voice input recognition through python. The example code is introduced in detail, which has certain reference learning value for everyone’s study or work. Friends in need can refer to it

1、 Introduction

1. The first step is to save the recording locally

2. Call Baidu speech recognition SDK

Note: Baidu speech recognition on the voice source requirements, the bit rate must be 256Kbps

2、 Code

#Install necessary library
PIP install Baidu AIP Baidu SDK
pip install pyaudio
import wave
import pyaudio
from aip import AipSpeech

def record():
 #Defining data flow blocks
 CHUNK = 1024
 FORMAT = pyaudio.paInt16
 #To identify Baidu, the following two parameters must be set so that the bit rate is 256 Kbps
 CHANNELS = 1
 RATE = 16000
 #Recording time
 RECORD_SECONDS = 8
 #The name of the file to write to
 WAVE_OUTPUT_FILENAME = "output.wav"
 #Create pyaudio object
 p = pyaudio.PyAudio()

 #Open data stream
 stream = p.open(format=FORMAT,
     channels=CHANNELS,
     rate=RATE,
     input=True,
     frames_per_buffer=CHUNK)

 print("* recording")

 #Start recording
 frames = []
 for i in range(0, int(RATE / CHUNK * RECORD_SECONDS)):
  data = stream.read(CHUNK)
  frames.append(data)

 print("* done recording")
 #Stop data flow
 stream.stop_stream()
 stream.close()

 #Close pyaudio
 p.terminate()

 #Write recording file
 wf = wave.open(WAVE_OUTPUT_FILENAME, 'wb')
 wf.setnchannels(CHANNELS)
 wf.setsampwidth(p.get_sample_size(FORMAT))
 wf.setframerate(RATE)
 wf.writeframes(b''.join(frames))
 wf.close()
def ASR():
 #Recording
 record()

 "Your appid AK SK"
 APP_ID = '****'
 API_KEY = '****'
 SECRET_KEY = '****'

 client = AipSpeech(APP_ID, API_KEY, SECRET_KEY)

 #Read file
 def get_file_content(filePath):
  with open(filePath, 'rb') as fp:
   return fp.read()

 #Identify local files
 res=client.asr(get_file_content('output.wav'), 'wav', 16000, {
  'dev_pid': 1536,
 })

 print(res)
if __name__ == '__main__':
 ASR()

3、 Voice command control program

import wave
import pyaudio
from aip import AipSpeech
import win32api

def record():
 #Defining data flow blocks
 CHUNK = 1024
 FORMAT = pyaudio.paInt16
 #To identify Baidu, the following two parameters must be set so that the bit rate is 256 Kbps
 CHANNELS = 1
 RATE = 16000
 #Recording time
 RECORD_SECONDS = 8
 #The name of the file to write to
 WAVE_OUTPUT_FILENAME = "output.wav"
 #Create pyaudio object
 p = pyaudio.PyAudio()

 #Open data stream
 stream = p.open(format=FORMAT,
     channels=CHANNELS,
     rate=RATE,
     input=True,
     frames_per_buffer=CHUNK)

 print("* recording")

 #Start recording
 frames = []
 for i in range(0, int(RATE / CHUNK * RECORD_SECONDS)):
  data = stream.read(CHUNK)
  frames.append(data)

 print("* done recording")
 #Stop data flow
 stream.stop_stream()
 stream.close()

 #Close pyaudio
 p.terminate()

 #Write recording file
 wf = wave.open(WAVE_OUTPUT_FILENAME, 'wb')
 wf.setnchannels(CHANNELS)
 wf.setsampwidth(p.get_sample_size(FORMAT))
 wf.setframerate(RATE)
 wf.writeframes(b''.join(frames))
 wf.close()
def ASR():
 #Recording
 record()

 "Your appid AK SK"
 APP_ID = '****'
 API_KEY = '****'
 SECRET_KEY = '****'

 client = AipSpeech(APP_ID, API_KEY, SECRET_KEY)

 #Read file
 def get_file_content(filePath):
  with open(filePath, 'rb') as fp:
   return fp.read()

 #Identify local files
 res=client.asr(get_file_content('output.wav'), 'wav', 16000, {
  'dev_pid': 1536,
 })
 if not res.get("err_no"):
  return res.get("result")[0]
 return res.get("err_no")

def control(order):
 #The program path corresponding to the command
 open_ Order = {open QQ ": R" C: program files (x86) - Tencent / QQ / bin\ QQScLauncher.exe "Open NOTEPAD": R "D:" Notepad + + \ Notepad + +. Exe "," open browser ": R" C: users / ffm11 / appdata / roaming / 360se6 / application / 360 se.exe "}
 res=open_order.get(order)
 if res:
  #Parameter 1: used to specify the parent window handle. When an error occurs in the function call process, it is used as the parent window of the windows message window
  #Parameter 2: used to specify the operation to be performed.
  #The "open" operation means to execute the program specified by the lpfile parameter, or to open the file or folder specified by the lpfile parameter;
  #The "print" operation means to print the file specified by the lpfile parameter;
  #The "explore" operation means browsing the folder specified by the lpfile parameter.
  #Parameter 3: used to specify the name of the file to be opened, the file name of the program to be executed, or the name of the folder to browse.
  #Parameter 4: if the lpfile parameter is an executable program, this parameter specifies the command line parameter, otherwise it should be null
  #Parameter 5: if the lpfile parameter is an executable program, this parameter specifies the initial display mode of the program window; otherwise, this parameter should be set to 0.
  #The common constants of this parameter are:
  # SW_ Hide the window, the active state gives a window
  # SW_ Minimize minimize the window and make a window active
  # SW_ Restore displays a window with its original size and position and makes it active
  # SW_ Show displays a window with its current size and position and makes it active
  # SW_ Showmaxmaximizes the window and activates it
  # SW_ Showminized minimizes the window and activates it
  # SW_ Showinnoactive minimizes one window without changing the active window
  # SW_ Showna displays a window at its current size and position without changing the active window
  # SW_ Shownoactivate displays a window with the nearest size and position without changing the active window
  win32api.ShellExecute(0, 'open', res, '', '', 1)
 else:
  Print ("voice command failed")

if __name__ == '__main__':
 order=ASR()
 control(order.rstrip("。"))

The above is the whole content of this article, I hope to help you in your study, and I hope you can support developeppaer more.