diff --git a/Elementary/Audio/AudioManager.cs b/Elementary/Audio/AudioManager.cs index 1f797c3..2f21483 100644 --- a/Elementary/Audio/AudioManager.cs +++ b/Elementary/Audio/AudioManager.cs @@ -5,7 +5,6 @@ using Discord; using Discord.Audio; using Discord.WebSocket; using Elementary.Dictionary; -using ManagedBass; using Microsoft.Extensions.DependencyInjection; using NAudio.Wave; using NLog; @@ -21,12 +20,14 @@ public class AudioManager private SozaiAPI _sozaiAPI; private VoicevoxAPI _voicevoxAPI; + private Ytdlp _ytdlp; private AudioConverter _audioConverter; private PlaybackQueue _playbackQueue; private EmojiDictionary _emojiDictionary; private DictionaryDB _dictionaryDB; + private Mecab _mecab; // private AudioMixer _audioMixer; private ILogger _logger; @@ -34,15 +35,17 @@ public class AudioManager public bool isConnected; public AudioManager(IServiceProvider services, DiscordSocketClient client, SozaiAPI sozaiApi, - VoicevoxAPI voicevoxApi, EmojiDictionary emojiDictionary, DictionaryDB dictionaryDB) + VoicevoxAPI voicevoxApi, EmojiDictionary emojiDictionary, DictionaryDB dictionaryDB, Mecab mecab, Ytdlp ytdlp) { _services = services; _client = client; _sozaiAPI = sozaiApi; _voicevoxAPI = voicevoxApi; + _ytdlp = ytdlp; _audioConverter = new(); _emojiDictionary = emojiDictionary; _dictionaryDB = dictionaryDB; + _mecab = mecab; _logger = LogManager.GetCurrentClassLogger(); } @@ -88,6 +91,28 @@ public class AudioManager // _audioMixer.AddStream(wave); await wave.CopyToAsync(_audioStream); // GC.Collect(); + + // try + // { + // await wave.CopyToAsync(_audioStream); + // } + // catch (Exception e) + // { + // _logger.Log(LogLevel.Error, e); + // } + // finally + // { + // await _audioStream.DisposeAsync(); + // } + } + + public async Task PlayYoutube(string url) + { + var stream = await _ytdlp.GetStream(url); + + await using var wave = _audioConverter.CreateStreamFromStream(stream, 0.1f); + // _audioMixer.AddStream(wave); + await wave.CopyToAsync(_audioStream); } public async Task PlayText(string text) @@ -98,25 +123,42 @@ public class AudioManager text = Regex.Replace(text, @"<:[\w]+:[\d]+>", m => m.Value.Split(":")[1]); // <:emoji:123456789> -> emoji text = Regex.Replace(text, @"", m => m.Value.Split(":")[1]); // <:emoji:123456789> -> emoji - - text = _dictionaryDB.Replace(text); - text = _emojiDictionary.Replace(text); + text = text.Replace("~", "ー"); float volume = 0.12f; Stream? stream = await _sozaiAPI.GetAudioStream(text); if (stream == null) { + text = _dictionaryDB.Replace(text); + + text = _emojiDictionary.Replace(text); + + text = await _mecab.ParseToKana(text); + stream = await _voicevoxAPI.Speak(text); volume = 0.8f; if (stream == null) return; } - + await using var wave = _audioConverter.CreateStreamFromStream(stream, volume); // _audioMixer.AddStream(wave); await wave.CopyToAsync(_audioStream); // GC.Collect(); + + // try + // { + // await wave.CopyToAsync(_audioStream); + // } + // catch (Exception e) + // { + // _logger.Log(LogLevel.Error, e); + // } + // finally + // { + // await _audioStream.DisposeAsync(); + // } } public async Task StopAudio() diff --git a/Elementary/Audio/AudioMixer.cs b/Elementary/Audio/AudioMixer.cs index f17180f..d418b9c 100644 --- a/Elementary/Audio/AudioMixer.cs +++ b/Elementary/Audio/AudioMixer.cs @@ -1,80 +1,80 @@ -using ManagedBass; -using ManagedBass.Mix; - -namespace Elementary.Audio; - -public class AudioMixer -{ - private readonly int _mixerStream; - private MemoryStream?[] _inputStreams; - private Stream _outStream; - private object _lock = new(); - - public AudioMixer(Stream outStream) - { - _outStream = outStream; - Bass.Init(-1, 44100, DeviceInitFlags.NoSpeakerAssignment, IntPtr.Zero); - _mixerStream = BassMix.CreateMixerStream(44100, 2, BassFlags.Float | BassFlags.MixerNonStop); - _inputStreams = new MemoryStream[10]; - - Bass.ChannelPlay(_mixerStream); - } - - /// - /// Add a stream to the mixer. - /// wait for the stream to finish playing - /// - /// - /// - public void AddStream(Stream stream, float volume = 1.0f) - { - Console.WriteLine("Adding stream to mixer"); - byte[] pcmBytes; - using (MemoryStream ms = new()) - { - stream.CopyTo(ms); - pcmBytes = ms.ToArray(); - } - - lock (_lock) - { - for (var i = 0; i < _inputStreams.Length; i++) - { - if (_inputStreams[i] == null) - { - _inputStreams[i] = new MemoryStream(pcmBytes); - break; - } - } - } - - var channel = Bass.CreateStream(pcmBytes, 0, pcmBytes.Length, BassFlags.Float); - Bass.ChannelSetAttribute(channel, ChannelAttribute.Volume, volume); - BassMix.MixerAddChannel(_mixerStream, channel, BassFlags.Default); - - int length = Bass.ChannelGetData(_mixerStream, new byte[4096], 4096); - byte[] buffer = new byte[length]; - length = Bass.ChannelGetData(_mixerStream, buffer, length); - _outStream.Write(buffer, 0, length); - } - - public void Stop() - { - Bass.ChannelStop(_mixerStream); - } - - public void Dispose() - { - lock (_lock) - { - foreach (var stream in _inputStreams) - { - stream?.Dispose(); - } - } - - _outStream.Flush(); - Bass.StreamFree(_mixerStream); - Bass.Free(); - } -} \ No newline at end of file +// using ManagedBass; +// using ManagedBass.Mix; +// +// namespace Elementary.Audio; +// +// public class AudioMixer +// { +// private readonly int _mixerStream; +// private MemoryStream?[] _inputStreams; +// private Stream _outStream; +// private object _lock = new(); +// +// public AudioMixer(Stream outStream) +// { +// _outStream = outStream; +// Bass.Init(-1, 44100, DeviceInitFlags.NoSpeakerAssignment, IntPtr.Zero); +// _mixerStream = BassMix.CreateMixerStream(44100, 2, BassFlags.Float | BassFlags.MixerNonStop); +// _inputStreams = new MemoryStream[10]; +// +// Bass.ChannelPlay(_mixerStream); +// } +// +// /// +// /// Add a stream to the mixer. +// /// wait for the stream to finish playing +// /// +// /// +// /// +// public void AddStream(Stream stream, float volume = 1.0f) +// { +// Console.WriteLine("Adding stream to mixer"); +// byte[] pcmBytes; +// using (MemoryStream ms = new()) +// { +// stream.CopyTo(ms); +// pcmBytes = ms.ToArray(); +// } +// +// lock (_lock) +// { +// for (var i = 0; i < _inputStreams.Length; i++) +// { +// if (_inputStreams[i] == null) +// { +// _inputStreams[i] = new MemoryStream(pcmBytes); +// break; +// } +// } +// } +// +// var channel = Bass.CreateStream(pcmBytes, 0, pcmBytes.Length, BassFlags.Float); +// Bass.ChannelSetAttribute(channel, ChannelAttribute.Volume, volume); +// BassMix.MixerAddChannel(_mixerStream, channel, BassFlags.Default); +// +// int length = Bass.ChannelGetData(_mixerStream, new byte[4096], 4096); +// byte[] buffer = new byte[length]; +// length = Bass.ChannelGetData(_mixerStream, buffer, length); +// _outStream.Write(buffer, 0, length); +// } +// +// public void Stop() +// { +// Bass.ChannelStop(_mixerStream); +// } +// +// public void Dispose() +// { +// lock (_lock) +// { +// foreach (var stream in _inputStreams) +// { +// stream?.Dispose(); +// } +// } +// +// _outStream.Flush(); +// Bass.StreamFree(_mixerStream); +// Bass.Free(); +// } +// } \ No newline at end of file diff --git a/Elementary/Audio/MessageHandler.cs b/Elementary/Audio/MessageHandler.cs index 365fdff..ed3f2e6 100644 --- a/Elementary/Audio/MessageHandler.cs +++ b/Elementary/Audio/MessageHandler.cs @@ -7,19 +7,24 @@ public class MessageHandler { // private IAudioClient _audioClient; private PlaybackQueue _playbackQueue; - + public MessageHandler(PlaybackQueue playbackQueue) { _playbackQueue = playbackQueue; } - - + + public async Task HandleMessage(SocketMessage message) { - await _playbackQueue.Enqueue(new PlaybackJob() + List lines = message.Content.Split("\n").ToList(); + + foreach (var line in lines) { - Type = JobType.Text, - Text = message.Content - }); + await _playbackQueue.Enqueue(new PlaybackJob() + { + Type = JobType.Text, + Text = line + }); + } } } \ No newline at end of file diff --git a/Elementary/Audio/SozaiAPI.cs b/Elementary/Audio/SozaiAPI.cs index ad77ae0..1f9268d 100644 --- a/Elementary/Audio/SozaiAPI.cs +++ b/Elementary/Audio/SozaiAPI.cs @@ -1,4 +1,5 @@ using System.Text.Json; +using Microsoft.Extensions.Caching.Memory; using NLog; namespace Elementary.Audio; @@ -9,6 +10,8 @@ public class SozaiAPI private Asset[] Assets; + private MemoryCache _cache; + private class Asset { /// @@ -29,6 +32,7 @@ public class SozaiAPI { _client = new HttpClient(); _logger = LogManager.GetCurrentClassLogger(); + _cache = new MemoryCache(new MemoryCacheOptions()); } public async Task Setup(string url) @@ -47,10 +51,17 @@ public class SozaiAPI if (asset == null) return null; _logger.Info($"Requested {asset.names[0]}"); + + if (_cache.TryGetValue(asset.url, out Stream? stream)) + { + _logger.Info($"Cache hit {asset.url}"); + return stream; + } var response = await _client.GetAsync(asset.url); _logger.Info($"Got response {response.StatusCode}"); - var stream = await response.Content.ReadAsStreamAsync(); + stream = await response.Content.ReadAsStreamAsync(); + _cache.Set(asset.url, stream); return stream; } } \ No newline at end of file diff --git a/Elementary/Audio/VoicevoxAPI.cs b/Elementary/Audio/VoicevoxAPI.cs index 01e4c5e..6674231 100644 --- a/Elementary/Audio/VoicevoxAPI.cs +++ b/Elementary/Audio/VoicevoxAPI.cs @@ -1,5 +1,6 @@ using System.Net.Http.Json; using System.Text; +using Microsoft.Extensions.Caching.Memory; using NLog; namespace Elementary.Audio; @@ -9,6 +10,7 @@ public class VoicevoxAPI private UriBuilder _APIRootUrl; private HttpClient _client; private ILogger _logger; + private MemoryCache _cache; public async Task Setup(string url) { @@ -17,6 +19,7 @@ public class VoicevoxAPI _APIRootUrl = new UriBuilder($"{_url.Scheme}://{_url.Host}:{_url.Port}"); _client = new HttpClient(); _logger = LogManager.GetCurrentClassLogger(); + _cache = new MemoryCache(new MemoryCacheOptions()); } /// @@ -28,11 +31,18 @@ public class VoicevoxAPI public async Task Speak(string text, string speaker = "47") { _logger.Info($"Requested TTS {text}"); + + if (_cache.TryGetValue(text, out Stream? stream)) + { + _logger.Info($"Cache hit {text}"); + return stream; + } var query = await GetAudioQuery(text, speaker); if (query == null) return null; - var stream = await GetAudioStream(query, speaker); + stream = await GetAudioStream(query, speaker); if (stream == null) return null; + _cache.Set(text, stream); return stream; } diff --git a/Elementary/Audio/Ytdlp.cs b/Elementary/Audio/Ytdlp.cs new file mode 100644 index 0000000..d2a6451 --- /dev/null +++ b/Elementary/Audio/Ytdlp.cs @@ -0,0 +1,39 @@ +using System.Diagnostics; +using System.Text; +using NLog; + +namespace Elementary.Audio; + +public class Ytdlp +{ + private readonly ILogger _logger; + + public Ytdlp() + { + _logger = LogManager.GetCurrentClassLogger(); + } + + public async Task GetStream(string url) + { + var process = new Process + { + StartInfo = + { + FileName = "yt-dlp", + Arguments = $"-f bestaudio -o - {url}", + RedirectStandardOutput = true, + RedirectStandardError = true, + UseShellExecute = false, + CreateNoWindow = true + }, + EnableRaisingEvents = true + }; + + var stream = new MemoryStream(); + process.Start(); + process.StandardOutput.BaseStream.CopyTo(stream); + process.WaitForExit(); + stream.Position = 0; + return stream; + } +} \ No newline at end of file diff --git a/Elementary/Commands/MessageCommands.cs b/Elementary/Commands/MessageCommands.cs index 495ba2d..d10f65a 100644 --- a/Elementary/Commands/MessageCommands.cs +++ b/Elementary/Commands/MessageCommands.cs @@ -112,4 +112,11 @@ public class MessageCommands : ModuleBase result += "```"; return ReplyAsync(result); } + + [Command("ytdlp", RunMode = RunMode.Async)] + [Summary("Play sound from Youtube URL.")] + public async Task YtdlpAsync([Summary("Youtube URL")] string url) + { + await _audioManager.PlayYoutube(url); + } } \ No newline at end of file diff --git a/Elementary/Config/Configuration.cs b/Elementary/Config/Configuration.cs index 6dc7d72..1b320c3 100644 --- a/Elementary/Config/Configuration.cs +++ b/Elementary/Config/Configuration.cs @@ -10,6 +10,13 @@ public class AppSettings public SozaiSettings SozaiSettings { get; set; } public EmojiSettings EmojiSettings { get; set; } public VoicevoxSettings VoicevoxSettings { get; set; } + public MecabSettings MecabSettings { get; set; } +} + +public class MecabSettings +{ + public bool enabled { get; set; } + public string? dictionaryPath { get; set; } } public class VoicevoxSettings diff --git a/Elementary/Dictionary/Mecab.cs b/Elementary/Dictionary/Mecab.cs new file mode 100644 index 0000000..ba79090 --- /dev/null +++ b/Elementary/Dictionary/Mecab.cs @@ -0,0 +1,67 @@ +using System.Diagnostics; +using System.Text; +using System.Text.RegularExpressions; + +namespace Elementary.Dictionary; + +public class Mecab +{ + private string? _dictionaryPath; + private bool _enabled; + + public async Task Setup(bool enabled, string? dictionaryPath) + { + _dictionaryPath = dictionaryPath; + _enabled = enabled; + await Task.CompletedTask; + } + + public async Task ParseToKana(string text) + { + if (!_enabled) + { + return text; + } + + // "Apple PencilあああiPad" -> "Apple Pencil iPad" + var englishWords = Regex.Matches(text, @"[a-zA-Z]+").Select(m => m.Value).ToList(); + var englishWordsString = string.Join(" ", englishWords); + + String args = $"-Oyomi"; + if (_dictionaryPath != null) + { + args += $" -d {_dictionaryPath}"; + } + + var process = new Process + { + StartInfo = + { + FileName = "mecab", + Arguments = args, + RedirectStandardInput = true, + RedirectStandardOutput = true, + StandardOutputEncoding = Encoding.UTF8, + StandardInputEncoding = Encoding.UTF8, + RedirectStandardError = true, + UseShellExecute = false, + CreateNoWindow = true + } + }; + process.Start(); + await process.StandardInput.WriteLineAsync(englishWordsString); + await process.StandardInput.FlushAsync(); + process.StandardInput.Close(); + var result = await process.StandardOutput.ReadToEndAsync(); + process.WaitForExit(); + + // replace english words with kana + var kanaWords = result.Split(" ").Select(m => m.Trim()).ToList(); + for (var i = 0; i < englishWords.Count; i++) + { + text = text.Replace(englishWords[i], kanaWords[i]); + } + + return text; + } +} \ No newline at end of file diff --git a/Elementary/Elementary.csproj b/Elementary/Elementary.csproj index 65146ad..e9ecbd4 100644 --- a/Elementary/Elementary.csproj +++ b/Elementary/Elementary.csproj @@ -17,12 +17,10 @@ - - - + diff --git a/Elementary/Program.cs b/Elementary/Program.cs index 5de73fa..fd04652 100644 --- a/Elementary/Program.cs +++ b/Elementary/Program.cs @@ -7,7 +7,6 @@ using Discord.WebSocket; using Elementary.Audio; using Elementary.Commands; using Elementary.Dictionary; -using ManagedBass; using Microsoft.Extensions.DependencyInjection; using Microsoft.Extensions.Logging; using NAudio.Wave; @@ -30,6 +29,8 @@ public class Program private VoicevoxAPI _voicevoxAPI; private EmojiDictionary _emojiDictionary; private DictionaryDB _dictionaryDB; + private Mecab _mecab; + private Ytdlp _ytdlp; public static async Task Main(string[] args) { @@ -73,15 +74,19 @@ public class Program .AddSingleton() .AddSingleton() .AddSingleton() + .AddSingleton() + .AddSingleton() .BuildServiceProvider(); _logger = LogManager.GetCurrentClassLogger(); _sozaiAPI = _services.GetRequiredService(); _voicevoxAPI = _services.GetRequiredService(); + _ytdlp = _services.GetRequiredService(); _emojiDictionary = _services.GetRequiredService(); _dictionaryDB = _services.GetRequiredService(); + _mecab = _services.GetRequiredService(); _handler = new(_client, _commands, _services, _services.GetRequiredService(), _services.GetRequiredService()); @@ -96,6 +101,8 @@ public class Program await _voicevoxAPI.Setup(configuration.AppSettings.VoicevoxSettings.Url); await _emojiDictionary.Setup(configuration.AppSettings.EmojiSettings.DictionaryPath); await _dictionaryDB.Setup(); + await _mecab.Setup(configuration.AppSettings.MecabSettings.enabled, + configuration.AppSettings.MecabSettings.dictionaryPath); await _client.LoginAsync(TokenType.Bot, configuration.AppSettings.DiscordSettings.Token); await _client.StartAsync(); await _client.SetActivityAsync(new Game("!join")); diff --git a/Elementary/appsettings.json b/Elementary/appsettings.json index 94071a3..5809a29 100644 --- a/Elementary/appsettings.json +++ b/Elementary/appsettings.json @@ -18,6 +18,9 @@ }, "EmojiSettings": { "DictionaryPath": "emoji-ja\\data\\emoji_ja.json" + }, + "MecabSettings": { + "Enabled": true } } } \ No newline at end of file