From 5cea74b9407ee33d9701b500dc3d02d6904db534 Mon Sep 17 00:00:00 2001 From: Ivan Maslov Date: Sun, 2 Oct 2022 22:48:11 +0300 Subject: [PATCH] Audio in progress --- Sources/pyOpenRPA/Robot/Audio.py | 143 +++++++++++ Tools/Jupyter-notebooks/Audio.ipynb | 367 ++++++++++++++++++++++++++++ changelog.md | 3 +- 3 files changed, 512 insertions(+), 1 deletion(-) create mode 100644 Sources/pyOpenRPA/Robot/Audio.py create mode 100644 Tools/Jupyter-notebooks/Audio.ipynb diff --git a/Sources/pyOpenRPA/Robot/Audio.py b/Sources/pyOpenRPA/Robot/Audio.py new file mode 100644 index 00000000..4ec505ec --- /dev/null +++ b/Sources/pyOpenRPA/Robot/Audio.py @@ -0,0 +1,143 @@ + +import pyaudio +from pydub import AudioSegment +import threading +import wave + + +def DeviceSystemSoundSearchIndex(): + """L-,W+: Выполнить поиск устройства, с помощью которого можно будет выполнить захват аудио, которое поступает из приложений. Например: аудиоконференции Zoom, whatsapp, telegram и т.д. + """ + pass + +def DeviceListGet(): + """L-,W+: Вернуть список аудио устройст (входящих и исходящих, микрофонов и динамиков). + + from pyOpenRPA.Robot import Audio + Audio.DeviceListGet() + + :return: [{"IndexInt":1, "NameStr": "", + "HostApiInt": 0, "HostApiStr": "MME"|"Windows WASAPI"|"Windows WDM-KS", + "MaxInputChannelsInt": 0, "MaxOutputChannelsInt": 0, + "DefaultSampleRateFloat": 44100.0 + },...] + :rtype: list + """ + l_result = [] + p = pyaudio.PyAudio() + for i in range(0, p.get_device_count()): + l_info = p.get_device_info_by_index(i) + l_info_dict = { + "IndexInt":l_info["index"], + "NameStr": l_info["name"], + "MaxInputChannelsInt": l_info["maxInputChannels"], + "MaxOutputChannelsInt": l_info["maxOutputChannels"], + "HostApiInt": l_info["hostApi"], + "DefaultSampleRateFloat": l_info["defaultSampleRate"], + "HostApiStr": p.get_host_api_info_by_index(l_info["hostApi"])["name"] #"MME"|"Windows WASAPI"|"Windows WDM-KS" + } + l_result.append(l_info_dict) + return l_result + +class Recorder: + + mStatusStr = "0_READY" + mAudio = pyaudio.PyAudio() + mCaptureThread = None + mStream = None + + mDeviceInt = None + mChannelCountInt = None + mFramesInt = 512 + mRecordedFramesList = [] + mUseLoopbackBool = True + mSampleRateInt = None + mSampleSizeInt = mAudio.get_sample_size(pyaudio.paInt16) + + mCaptureBool = True + mFileNameStr = "aux" + mFileFormatStr = "mp3" + + def __init__(self, inDeviceInt=None): + self.mDeviceInt = inDeviceInt + + def CaptureStart(self): + lDeviceInfoDict = self.mAudio.get_device_info_by_index(self.mDeviceInt) + #Open stream + self.mSampleRateInt = int(lDeviceInfoDict["defaultSampleRate"]) + self.mChannelCountInt = lDeviceInfoDict["maxInputChannels"] if (lDeviceInfoDict["maxOutputChannels"] < lDeviceInfoDict["maxInputChannels"]) else lDeviceInfoDict["maxOutputChannels"] + self.mStream = self.mAudio.open(format = pyaudio.paInt16, + channels = self.mChannelCountInt, + rate = self.mSampleRateInt, + input = True, + frames_per_buffer = self.mFramesInt, + input_device_index = lDeviceInfoDict["index"], + as_loopback = self.mUseLoopbackBool) + self.mCaptureThread = threading.Thread(target=self.__Capture__) + self.mCaptureThread.start() + + def __Capture__(self): + while self.mCaptureBool == True: + self.mRecordedFramesList.append(self.mStream.read(self.mFramesInt)) + self.mStream.stop_stream() + self.mStream.close() + #Close module + self.mAudio.terminate() + print("done") + + def CaptureStop(self): + self.mCaptureBool=False + self.mCaptureThread.join() + print("done2") + self.CaptureChunk() + print("done3") + + + + def CaptureChunk(self): + print("CaptureChunk 1") + waveFile = wave.open(f"{self.mFileNameStr}.{self.mFileFormatStr}", 'wb') + waveFile.setnchannels(self.mChannelCountInt) + waveFile.setsampwidth(self.mSampleSizeInt) + waveFile.setframerate(self.mSampleRateInt) + waveFile.writeframes(b''.join(self.mRecordedFramesList)) + waveFile.close() + lSound = AudioSegment( + # raw audio data (bytes) + data=b''.join(self.mRecordedFramesList), + # 2 byte (16 bit) samples + sample_width=self.mSampleSizeInt, + # 44.1 kHz frame rate + frame_rate=self.mSampleRateInt, + # stereo + channels=self.mChannelCountInt + ) + print("CaptureChunk 2") + print(len(self.mRecordedFramesList)) + + lSound.export(f"{self.mFileNameStr}.{self.mFileFormatStr}", format=self.mFileFormatStr) + print("CaptureChunk 3") + self.mRecordedFramesList = [] + + def FileListGet(self): + pass + def FileLastGet(self): + pass + + def __Callback__(self, inDefList): + pass + def __CallbackIsSilent__(self): + pass + def __CallbackIsChunked__(self): + pass + def __CallbackIsStopped__(self): + pass + + def __TriggerCenter__(self): + """L-,W+: Контроль записи / остановки аудио по следующим критериям: + - Общая длительность, + - Максимальная длительность части, + - Максимальная длит тишины (часть), + - Максимальная длительность тишины (остановка), + """ + pass diff --git a/Tools/Jupyter-notebooks/Audio.ipynb b/Tools/Jupyter-notebooks/Audio.ipynb new file mode 100644 index 00000000..2bd0dd32 --- /dev/null +++ b/Tools/Jupyter-notebooks/Audio.ipynb @@ -0,0 +1,367 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import sys\n", + "import os\n", + "#sys.path.insert(0, os.path.abspath(\"..\\\\..\\\\Sources\")) # FOR WINDOWS\n", + "sys.path.insert(0, os.path.abspath(\"../../Sources\")) # FOR LINUX\n", + "import time" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "123\n" + ] + } + ], + "source": [ + "print(123)" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "from pyOpenRPA.Robot import Audio\n", + "lRec = Audio.Recorder(inDeviceInt=10)\n", + "lRec.CaptureStart()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "done\n", + "done2\n", + "CaptureChunk 1\n" + ] + } + ], + "source": [ + "lRec.CaptureStop()" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Available devices:\n", + "\n", + "0\n", + "0: \t Ïåðåíàçíà÷åíèå çâóêîâûõ óñòð. - Input \n", + " \t MME \n", + "\n", + "0\n", + "1: \t Ìèêðîôîí (Realtek High Definiti \n", + " \t MME \n", + "\n", + "0\n", + "2: \t Ìèêðîôîí (EpocCam Camera Audio) \n", + " \t MME \n", + "\n", + "0\n", + "3: \t Ìèêðîôîí (USB PnP Sound Device) \n", + " \t MME \n", + "\n", + "0\n", + "4: \t Ïåðåíàçíà÷åíèå çâóêîâûõ óñòð. - Output \n", + " \t MME \n", + "\n", + "0\n", + "5: \t Äèíàìèêè (USB PnP Sound Device) \n", + " \t MME \n", + "\n", + "0\n", + "6: \t 2 - NEC LCD1703M (AMD High Defi \n", + " \t MME \n", + "\n", + "0\n", + "7: \t Äèíàìèêè (Realtek High Definiti \n", + " \t MME \n", + "\n", + "1\n", + "8: \t 2 - NEC LCD1703M (AMD High Definition Audio Device) \n", + " \t Windows WASAPI \n", + "\n", + "1\n", + "9: \t Динамики (USB PnP Sound Device) \n", + " \t Windows WASAPI \n", + "\n", + "1\n", + "10: \t Динамики (Realtek High Definition Audio) \n", + " \t Windows WASAPI \n", + "\n", + "1\n", + "11: \t Микрофон (EpocCam Camera Audio) \n", + " \t Windows WASAPI \n", + "\n", + "1\n", + "12: \t Микрофон (Realtek High Definition Audio) \n", + " \t Windows WASAPI \n", + "\n", + "1\n", + "13: \t Микрофон (USB PnP Sound Device) \n", + " \t Windows WASAPI \n", + "\n", + "2\n", + "14: \t Стерео микшер (Realtek HD Audio Stereo input) \n", + " \t Windows WDM-KS \n", + "\n", + "2\n", + "15: \t Микрофон (Realtek HD Audio Mic input) \n", + " \t Windows WDM-KS \n", + "\n", + "2\n", + "16: \t Speakers (Realtek HD Audio output) \n", + " \t Windows WDM-KS \n", + "\n", + "2\n", + "17: \t Лин. вход (Realtek HD Audio Line input) \n", + " \t Windows WDM-KS \n", + "\n", + "2\n", + "18: \t Output (AMD HD Audio HDMI out #1) \n", + " \t Windows WDM-KS \n", + "\n", + "2\n", + "19: \t Динамики (USB PnP Sound Device) \n", + " \t Windows WDM-KS \n", + "\n", + "2\n", + "20: \t Микрофон (USB PnP Sound Device) \n", + " \t Windows WDM-KS \n", + "\n", + "2\n", + "21: \t MIDI (EpocCam Audio) \n", + " \t Windows WDM-KS \n", + "\n", + "2\n", + "22: \t Output (EpocCam Audio) \n", + " \t Windows WDM-KS \n", + "\n" + ] + } + ], + "source": [ + "print (textcolors.blue + \"Available devices:\\n\" + textcolors.end)\n", + "for i in range(0, p.get_device_count()):\n", + " info = p.get_device_info_by_index(i)\n", + " print(info[\"hostApi\"])\n", + " print (textcolors.green + str(info[\"index\"]) + textcolors.end + \": \\t %s \\n \\t %s \\n\" % (info[\"name\"], p.get_host_api_info_by_index(info[\"hostApi\"])[\"name\"]))" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "class textcolors:\n", + " if not os.name == 'nt':\n", + " blue = '\\033[94m'\n", + " green = '\\033[92m'\n", + " warning = '\\033[93m'\n", + " fail = '\\033[91m'\n", + " end = '\\033[0m'\n", + " else:\n", + " blue = ''\n", + " green = ''\n", + " warning = ''\n", + " fail = ''\n", + " end = ''" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Starting...\n", + "End.\n" + ] + }, + { + "data": { + "text/plain": [ + "<_io.BufferedRandom name='out.mp3'>" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import pyaudio\n", + "defaultframes = 512\n", + "recorded_frames = []\n", + "device_info = {}\n", + "useloopback = True\n", + "#Use module\n", + "p = pyaudio.PyAudio()\n", + "\n", + "recordtime = 15\n", + "device_id = 10\n", + "device_info = p.get_device_info_by_index(device_id)\n", + "#Open stream\n", + "channelcount = device_info[\"maxInputChannels\"] if (device_info[\"maxOutputChannels\"] < device_info[\"maxInputChannels\"]) else device_info[\"maxOutputChannels\"]\n", + "stream = p.open(format = pyaudio.paInt16,\n", + " channels = channelcount,\n", + " rate = int(device_info[\"defaultSampleRate\"]),\n", + " input = True,\n", + " frames_per_buffer = defaultframes,\n", + " input_device_index = device_info[\"index\"],\n", + " as_loopback = useloopback)\n", + "\n", + "#Start Recording\n", + "print (\"Starting...\")\n", + "\n", + "for i in range(0, int(int(device_info[\"defaultSampleRate\"]) / defaultframes * recordtime)):\n", + " recorded_frames.append(stream.read(defaultframes))\n", + "\n", + "print (\"End.\")\n", + "#Stop Recording\n", + "\n", + "stream.stop_stream()\n", + "stream.close()\n", + "\n", + "#Close module\n", + "p.terminate()\n", + "\n", + "filename = \"out.wav\"\n", + "from pydub import AudioSegment\n", + "# Advanced usage, if you have raw audio data:\n", + "sound = AudioSegment(\n", + " # raw audio data (bytes)\n", + " data=b''.join(recorded_frames),\n", + " # 2 byte (16 bit) samples\n", + " sample_width=p.get_sample_size(pyaudio.paInt16),\n", + " # 44.1 kHz frame rate\n", + " frame_rate=int(device_info[\"defaultSampleRate\"]),\n", + " # stereo\n", + " channels=channelcount\n", + ")\n", + "sound.export(\"out.mp3\", format=\"mp3\")" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "import difflib\n", + "def similarity_no_case(s1, s2):\n", + " normalized1 = s1.lower()\n", + " normalized2 = s2.lower()\n", + " matcher = difflib.SequenceMatcher(None, normalized1, normalized2)\n", + " return matcher.ratio()" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.36363636363636365" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "similarity(\"sadsdasd\",\"sadfsdfd \")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.2" + }, + "varInspector": { + "cols": { + "lenName": 16, + "lenType": 16, + "lenVar": 40 + }, + "kernels_config": { + "python": { + "delete_cmd_postfix": "", + "delete_cmd_prefix": "del ", + "library": "var_list.py", + "varRefreshCmd": "print(var_dic_list())" + }, + "r": { + "delete_cmd_postfix": ") ", + "delete_cmd_prefix": "rm(", + "library": "var_list.r", + "varRefreshCmd": "cat(var_dic_list()) " + } + }, + "types_to_exclude": [ + "module", + "function", + "builtin_function_or_method", + "instance", + "_Feature" + ], + "window_display": false + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/changelog.md b/changelog.md index fc90d98d..e402e2a2 100755 --- a/changelog.md +++ b/changelog.md @@ -18,7 +18,8 @@ AGT - AGENT - - Появился модуль захвата звука с микрофонов и с приложений (pyOpenRPA.Robot.Audio) - ОБЩЕЕ - - Jupyter: запуск из других дисков, отличных от C:// -- - Utils: Функции подготовки файлов/ директорий +- - Utils: Функции подготовки файлов / директорий +- - Utils: String - similarity [1.3.0] - ПОРТИРОВАНО НА LINUX (Ubuntu, Debian, Astra), адаптация функций