You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
ORPA-pyOpenRPA/Sources/pyOpenRPA/Robot/Audio.py

237 lines
9.9 KiB

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

import pyaudio
from pydub import AudioSegment
import threading
import wave
import time
from pyOpenRPA.Utils import Text
import os
def DeviceMicrophoneIndex():
"""L-,W+: Выполнить поиск устройства, с помощью которого можно будет выполнить захват c микрофона.
"""
p = pyaudio.PyAudio()
lDeviceInfoDict = p.get_default_input_device_info()
lDefaultIndexInt = lDeviceInfoDict["index"]
return lDefaultIndexInt
def DeviceSystemSoundIndex():
"""L-,W+: Выполнить поиск устройства, с помощью которого можно будет выполнить захват аудио, которое поступает из приложений. Например: аудиоконференции Zoom, whatsapp, telegram и т.д.
"""
p = pyaudio.PyAudio()
inInputBool = True
inIsLoopbackBool = True
if inInputBool == True:
lDeviceInfoDict = p.get_default_output_device_info()
lDefaultIndexInt = lDeviceInfoDict["index"]
lDefaultNameStr = lDeviceInfoDict["name"]
lCatchIndexInt = None
lCatchDiffRatioFloat = 0.0
for lItemDict in DeviceListGet():
lCompareBool = False
if lItemDict["MaxOutputChannelsInt"]>0:
if inIsLoopbackBool==True and lItemDict["HostApiStr"]=="Windows WASAPI": lCompareBool = True
elif inIsLoopbackBool==False: lCompareBool = True
if lCompareBool == True:
lDiffRationFloat = Text.SimilarityNoCase(in1Str=lDefaultNameStr, in2Str=lItemDict["NameStr"])
if lDiffRationFloat> lCatchDiffRatioFloat:
lCatchDiffRatioFloat = lDiffRationFloat
lCatchIndexInt=lItemDict["IndexInt"]
else:
lDeviceInfoDict = p.get_default_output_device_info()
lDefaultIndexInt = lDeviceInfoDict["index"]
lDefaultNameStr = lDeviceInfoDict["name"]
lCatchIndexInt = None
lCatchDiffRatioFloat = 0.0
for lItemDict in DeviceListGet():
lCompareBool = False
if lItemDict["MaxInputChannelsInt"]>0:
if inIsLoopbackBool==True and lItemDict["HostApiStr"]=="Windows WASAPI": lCompareBool = True
elif inIsLoopbackBool==False: lCompareBool = True
if lCompareBool == True:
lDiffRationFloat = Text.SimilarityNoCase(in1Str=lDefaultNameStr, in2Str=lItemDict["NameStr"])
if lDiffRationFloat> lCatchDiffRatioFloat: lCatchIndexInt=lItemDict["IndexInt"]
return lCatchIndexInt
def DeviceListGet():
"""L-,W+: Вернуть список аудио устройст (входящих и исходящих, микрофонов и динамиков).
from pyOpenRPA.Robot import Audio
Audio.DeviceListGet()
:return: [{"IndexInt":1, "NameStr": "",
"HostApiInt": 0, "HostApiStr": "MME"|"Windows WASAPI"|"Windows WDM-KS",
"MaxInputChannelsInt": 0, "MaxOutputChannelsInt": 0,
"DefaultSampleRateFloat": 44100.0
},...]
:rtype: list
"""
l_result = []
p = pyaudio.PyAudio()
for i in range(0, p.get_device_count()):
l_info = p.get_device_info_by_index(i)
l_info_dict = {
"IndexInt":l_info["index"],
"NameStr": l_info["name"],
"MaxInputChannelsInt": l_info["maxInputChannels"],
"MaxOutputChannelsInt": l_info["maxOutputChannels"],
"HostApiInt": l_info["hostApi"],
"DefaultSampleRateFloat": l_info["defaultSampleRate"],
"HostApiStr": p.get_host_api_info_by_index(l_info["hostApi"])["name"] #"MME"|"Windows WASAPI"|"Windows WDM-KS"
}
l_result.append(l_info_dict)
return l_result
class Recorder:
mT=[]
mStatusStr = "0_READY" # "0_READY", "1_RECORDING"
mAudio = None
mCaptureThread = None
mStream = None
mDeviceInt = None
mChannelCountInt = None
mFramesInt = 512
mRecordedFramesList = []
mSampleRateInt = None
mSampleSizeInt = None
mCaptureBool = True
mFolderPathStr = None
mFileNameStr = None
mFileFormatStr = None
mFileAvailableChunkInt = None
mFileNameList=None
mChunkSecFloat = None
mChunkSilentSecFloat = None
mStartSecFloat = None
mStartChunkSecFloat = None
mDurationSecFloat = None
mThresholdInt = 500
mSilentLastCheckTimeFloat = None
mIsMicrophoneBool=None
def __init__(self, inDeviceInt=None):
self.mDeviceInt = inDeviceInt
if inDeviceInt == None: inDeviceInt = DeviceSystemSoundIndex()
self.mDeviceInt = inDeviceInt
if DeviceListGet()[inDeviceInt]["MaxInputChannelsInt"]>0: self.mIsMicrophoneBool = True
else: self.mIsMicrophoneBool = False
def StatusGet(self):
return self.mStatusStr
def CaptureStart(self, inFolderPathStr="",inFileNameStr = "out", inFileFormatStr = "mp3", inDurationSecFloat = None, inChunkSecFloat = 300.0, inChunkSilentSecFloat = 10.0):
# CHECK AUX.mp3
self.mFileNameList=[]
self.mRecordedFramesList=[]
self.mStatusStr = "1_RECORDING"
if inChunkSecFloat != None or inChunkSilentSecFloat != None: self.mFileAvailableChunkInt = 0
self.mDurationSecFloat = inDurationSecFloat
self.mChunkSecFloat = inChunkSecFloat
self.mChunkSilentSecFloat = inChunkSilentSecFloat
self.mSilentLastCheckTimeFloat=time.time()
self.mFolderPathStr = inFolderPathStr
self.mFileNameStr = inFileNameStr
self.mFileFormatStr = inFileFormatStr
self.mAudio = pyaudio.PyAudio()
self.mSampleSizeInt = self.mAudio.get_sample_size(pyaudio.paInt16)
lDeviceInfoDict = self.mAudio.get_device_info_by_index(self.mDeviceInt)
#Open stream
self.mSampleRateInt = int(lDeviceInfoDict["defaultSampleRate"])
self.mChannelCountInt = lDeviceInfoDict["maxInputChannels"] if (lDeviceInfoDict["maxOutputChannels"] < lDeviceInfoDict["maxInputChannels"]) else lDeviceInfoDict["maxOutputChannels"]
self.mStream = self.mAudio.open(format = pyaudio.paInt16,
channels = self.mChannelCountInt,
rate = self.mSampleRateInt,
input = True,
frames_per_buffer = self.mFramesInt,
input_device_index = lDeviceInfoDict["index"],
as_loopback = not self.mIsMicrophoneBool)
self.mCaptureThread = threading.Thread(target=self.__Capture__)
self.mStartSecFloat = time.time()
self.mStartChunkSecFloat = self.mStartSecFloat
self.mCaptureThread.start()
def __Capture__(self):
while self.mCaptureBool==True:
self.mRecordedFramesList.append(self.mStream.read(self.mFramesInt))
self.__TriggerCheck__()
def CaptureStop(self, inWaitStream=True):
self.mCaptureBool=False
if inWaitStream == True: self.mCaptureThread.join()
self.mStream.stop_stream()
self.mStream.close()
#Close module
self.mAudio.terminate()
self.CaptureChunk()
self.mStatusStr = "0_READY"
def CaptureChunk(self):
lFileNameStr = self.mFileNameStr
if self.mFileAvailableChunkInt!=None:
lFileNameStr+=f"_{self.mFileAvailableChunkInt:05}"
self.mFileAvailableChunkInt = self.mFileAvailableChunkInt + 1
# Advanced usage, if you have raw audio data:
sound = AudioSegment(
# raw audio data (bytes)
data=b''.join(self.mRecordedFramesList),
# 2 byte (16 bit) samples
sample_width=self.mSampleSizeInt,
# 44.1 kHz frame rate
frame_rate=self.mSampleRateInt,
# stereo
channels=self.mChannelCountInt
)
sound.export(os.path.join(self.mFolderPathStr,f"{lFileNameStr}.{self.mFileFormatStr}"), format=f"{self.mFileFormatStr}")
self.mFileNameList.append(f"{lFileNameStr}.{self.mFileFormatStr}")
self.mRecordedFramesList = []
self.mStartChunkSecFloat = time.time()
def FileListGet(self):
return self.mFileNameList
def FileLastGet(self):
return self.mFileNameList[-1]
def __Callback__(self, inDefList):
pass
def __CallbackIsSilent__(self):
pass
def __CallbackIsChunked__(self):
pass
def __CallbackIsStopped__(self):
pass
def IsSilent(self, inLastSecFloat=None):
"Returns 'True' if below the 'silent' threshold"
self.mSilentLastCheckTimeFloat = time.time()
if inLastSecFloat == None: inLastSecFloat = self.mChunkSilentSecFloat
lFrameLenInt = int(self.mSampleSizeInt*inLastSecFloat)
if lFrameLenInt<len(self.mRecordedFramesList): lData = self.mRecordedFramesList[-lFrameLenInt:]
else: lData = self.mRecordedFramesList
return max(lData) < self.mThresholdInt
def __TriggerCheck__(self):
"""L-,W+: Контроль записи / остановки аудио по следующим критериям:
- Общая длительность,
- Максимальная длительность части,
- Максимальная длит тишины (часть),
- Максимальная длительность тишины (остановка),
"""
# Проверка по длине записи (CHUNK)
if self.mChunkSecFloat != None and time.time() - self.mStartChunkSecFloat > self.mChunkSecFloat: self.CaptureChunk()
# Остановка записи по максимальной длине
if self.mDurationSecFloat != None and time.time() - self.mStartSecFloat > self.mDurationSecFloat: self.CaptureStop(inWaitStream=False)
# Проверка тишины
#if self.mChunkSilentSecFloat != None and time.time() - self.mSilentLastCheckTimeFloat and self.IsSilent(): self.mT.append("ТИШИНА!!")