import {
  SpeechConfig,
  SpeechSynthesizer,
  SpeakerAudioDestination,
  AudioConfig,
  SpeechSynthesisOutputFormat,
  AudioOutputStream,
} from "microsoft-cognitiveservices-speech-sdk";

import { PitchShifter } from "soundtouchjs";

export const Abbreviation = ["UGWAF", "GWAF", "SOF", "GCMF", "COP", "AUM"];

export interface ISpeech {
  isSpeaking: boolean;
  isCompleted: boolean;
  setLanguage: (lang: string) => void;
  setVoiceName: (sexual?: string) => void;
  speak: (text: string) => Promise<void>;
  cancel: () => void;
  close: () => void;
}

export class Speech implements ISpeech {
  private speechConfig?: SpeechConfig;
  private audioConfig?: AudioConfig;
  private synthesizer?: SpeechSynthesizer;
  private speakerAudioDestination?: SpeakerAudioDestination;
  private context: AudioContext;
  private source?: AudioBufferSourceNode;
  private gainNode: GainNode;
  isSpeaking = false;
  isCompleted = false;
  playRate = 1.15;
  constructor(token: string, region: string, language = "zh-TW") {
    this.context = new ((<any>window).AudioContext ||
      (<any>window).webkitAudioContext)();
    const buffer = this.context.createBuffer(1, 1, 22050);
    this.source = this.context.createBufferSource();
    this.source.buffer = buffer;
    this.source.connect(this.context.destination);
    this.gainNode = this.context.createGain();

    /** Normal player, not working on iOS */
    // this.speakerAudioDestination = new SpeakerAudioDestination();
    // this.speakerAudioDestination.onAudioStart = (e) => {
    //   /** Working well */
    //   this.isSpeaking = true;
    // };
    // this.speakerAudioDestination.onAudioEnd = (e) => {
    //   /** Not working */
    //   this.isSpeaking = false;
    // };
    // this.audioConfig = AudioConfig.fromSpeakerOutput(
    //   this.speakerAudioDestination
    // );
    /** By stream for AudioContext to solve iOS issue */
    this.audioConfig = AudioConfig.fromStreamOutput(
      AudioOutputStream.createPullStream()
    );

    this.speechConfig = SpeechConfig.fromAuthorizationToken(token, region);
    this.speechConfig.speechSynthesisLanguage = language;
    this.speechConfig.speechSynthesisOutputFormat =
      SpeechSynthesisOutputFormat.Audio24Khz48KBitRateMonoMp3;
    this.synthesizer = new SpeechSynthesizer(
      this.speechConfig,
      this.audioConfig
    );

    this.synthesizer.synthesisCompleted = () => {
      this.isCompleted = true;
    };
  }

  setLanguage(lang: string) {
    if (this.speechConfig) {
      this.speechConfig.speechSynthesisLanguage = lang;
    }
  }

  setVoiceName(sexual?: string) {
    if (!this.speechConfig) {
      return;
    }

    if (sexual?.toLowerCase() === "male") {
      this.speechConfig.speechSynthesisVoiceName = "zh-TW-YunJheNeural";
    } else {
      /** Default */
      this.speechConfig.speechSynthesisVoiceName = "zh-TW-HsiaoChenNeural";
    }

    this.synthesizer = new SpeechSynthesizer(
      this.speechConfig,
      this.audioConfig
    );

    this.synthesizer.synthesisCompleted = () => {
      this.isCompleted = true;
    };
  }

  speak(text: string) {
    return new Promise<void>(async (res, rej) => {
      if (this.isSpeaking) {
        console.log("on speaking.");
        return res();
      }

      const parsedText = text
        .replace(/\[([^\[]+)\]\((.*)\)/g, " $1 ")
        .replace(
          /((USD|TWD)\s)(([1-9]\d{0,2}(,\d{3})*)|0)?(\.\d{1,2})?/g,
          (_str) => _str.replaceAll(/,/g, "")
        )
        .replace(new RegExp(`(${Abbreviation.join("|")})`, "g"), (_str) =>
          _str.split("").join("-")
        )
        .replace(/(\D)(2|二)\s?年/g, "$1兩年")
        .replace(/行業/g, "航業")
        .replace(/軋空/g, "嘎空");

      this.isCompleted = false;

      this.synthesizer?.speakTextAsync(
        parsedText,
        async (e) => {
          if (!e.audioData.byteLength) {
            // No audio data
            return;
          }

          if (this.isSpeaking) {
            console.log("> on speaking.");
            return;
          }
          this.isSpeaking = true;

          const buffer = await this.context.decodeAudioData(e.audioData);

          this.gainNode.gain.value = 1;
          // 改用 PitchShifter 播放
          this.shifter = new PitchShifter(this.context, buffer, 16384, () => {
            this.isSpeaking = false;
          });
          this.shifter.tempo = this.playRate || 1.15;
          this.shifter.pitch = 1;
          this.shifter.connect(this.gainNode);
          this.gainNode.connect(this.context.destination);

          await this.context.resume();
          res();
        },
        rej
      );
    });
  }

  cancel() {
    this.isSpeaking = false;
    return this.shifter?.disconnect(this.gainNode);
  }

  close() {
    this.speakerAudioDestination?.close();
    this.synthesizer?.close();

    this.synthesizer = undefined;
  }
}

export class ServerSpeech implements ISpeech {
  isSpeaking = false;
  isCompleted = false;
  setLanguage(lang: string) {}
  setVoiceName(sexual?: string | undefined) {}
  async speak(text: string) {
    console.log(text);
  }
  cancel() {}
  close() {}
}
