import React, { useEffect, useRef, useState } from 'react'
import type { Boundary } from '../../models/speechTypes'
import {
  SpeechSynthesisPonyfillType,
  SpeechSynthesisUtterance
} from '@davi-ai/web-speech-cognitive-services-davi'
import type { SpeechSynthesisEventProps } from '@davi-ai/web-speech-cognitive-services-davi'
import { useRetorik } from '../Contexts/RetorikContext'
import { useSpeech } from '../Contexts/SpeechContext'
import { useSpeechStore, setBoundaryData } from '../Contexts/speechStore'
import LoaderCallToAction from '../Loader/LoaderCallToAction'
import { audioFiles } from '../../utils/audioFiles'
import IndexedDbManager from './IndexedDbManager'
import { useLocaleStore } from '../Contexts/localeStore'
import { RecognitionState } from '../../models/enums'

interface RetorikSpeechProps {
  ponyfill?: SpeechSynthesisPonyfillType
  onEnd?: () => void
  onError?: () => void
  onStart?: () => void
  utterance: SpeechSynthesisUtterance | null
  nextUtterance: SpeechSynthesisUtterance | null
  appAvailable: boolean
}

interface NextUtteranceData {
  speechData: ArrayBuffer
  boundaries: Array<Boundary>
  visemes: Array<Boundary>
}

const DBManager = new IndexedDbManager()
const tagsRemoverRegex = /<[^<>]+>/g

const RetorikSpeech = ({
  ponyfill,
  onEnd,
  onError,
  onStart,
  utterance,
  nextUtterance,
  appAvailable
}: RetorikSpeechProps): JSX.Element => {
  const locale = useLocaleStore((state) => state.locale)
  const {
    loaderClosed,
    setLoaderClosed,
    configuration: { enableSpeechCaching, speechRecognitionOptions }
  } = useRetorik()
  const { voice } = useSpeech()
  const muted = useSpeechStore((state) => state.muted)
  const activeRecognitionState = useSpeechStore(
    (state) => state.activeRecognitionState
  )
  const boundaryRef = useRef<Array<Boundary> | undefined | null>(null)
  const audioRef = useRef<HTMLAudioElement>(null)
  const speechRecognitionAudioRef = useRef<HTMLAudioElement>(null)
  const [DBReady, setDBReady] = useState<boolean>(false)
  const [playingBlob, setPlayingBlob] = useState<boolean>(false)

  const [nextUtteranceData, setNextUtteranceData] =
    useState<NextUtteranceData | null>(null)
  const nextUtteranceBoundariesRef = useRef<Array<Boundary>>([])
  const nextUtteranceVisemesRef = useRef<Array<Boundary>>([])

  const emptyTextUtteranceRef = useRef<NodeJS.Timeout | null>(null)

  useEffect(() => {
    DBManager.checkDB().then((result) => setDBReady(result))
  }, [])

  const stopCurrentPlaying = (): void => {
    if (appAvailable || loaderClosed) {
      // Cancel synthesis if using synthesizer
      if (!playingBlob) {
        ponyfill && ponyfill.speechSynthesis.cancel()
      }

      resetNextUtteranceData()

      if (audioRef.current && !audioRef.current?.paused) {
        audioRef.current.pause()
        handleEnd()
      }
    }
  }

  const attachEvents = (
    tempUtterance: SpeechSynthesisUtterance,
    isMainUtterance?: boolean
  ): SpeechSynthesisUtterance => {
    tempUtterance.onerror = (event): void => {
      isMainUtterance ? handleError(event) : setNextUtteranceData(null)
    }

    tempUtterance.onsynthesiscompleted = (): void => {
      const endBoundary: Boundary = {
        word: '',
        startTime: 0,
        endTime: 0,
        boundaryType: 'EndBoundary'
      }

      if (isMainUtterance) {
        const tempBoundaries = boundaryRef?.current || []
        boundaryRef.current = [...tempBoundaries, endBoundary]
        setBoundaryData([...tempBoundaries, endBoundary])
      } else {
        nextUtteranceBoundariesRef.current = [
          ...nextUtteranceBoundariesRef.current,
          endBoundary
        ]
      }
    }

    tempUtterance.onboundary = (event: SpeechSynthesisEventProps): void => {
      if (event.boundaryType !== 'Viseme') {
        const newBoundary: Boundary = {
          word: event.name,
          startTime: event.elapsedTime,
          endTime: event.elapsedTime + event.duration,
          boundaryType: event.boundaryType
        }

        if (isMainUtterance) {
          // Update the boundaryData state with the new data.
          const tempBoundaries = boundaryRef?.current || []
          boundaryRef.current = [...tempBoundaries, newBoundary]
          setBoundaryData([...tempBoundaries, newBoundary])
        } else {
          nextUtteranceBoundariesRef.current = [
            ...nextUtteranceBoundariesRef.current,
            newBoundary
          ]
        }
      }
    }

    tempUtterance.onviseme = (event: SpeechSynthesisEventProps) => {
      const newViseme: Boundary = {
        word: event.name,
        startTime: event.elapsedTime,
        endTime: event.elapsedTime + event.duration,
        boundaryType: event.boundaryType
      }

      if (isMainUtterance) {
        // Do nothing ATM
        // Update the boundaryData state with the new data.
        // const tempBoundaries = boundaryRef?.current || []
        // boundaryRef.current = [...tempBoundaries, newBoundary]
        // setBoundaryData([...tempBoundaries, newBoundary])
      } else {
        nextUtteranceVisemesRef.current = [
          ...nextUtteranceVisemesRef.current,
          newViseme
        ]
      }
    }

    return tempUtterance
  }

  const processNextData = (data: ArrayBuffer) => {
    setNextUtteranceData(
      data?.byteLength
        ? {
            speechData: data,
            boundaries: nextUtteranceBoundariesRef.current,
            visemes: nextUtteranceVisemesRef.current
          }
        : null
    )

    // Reset boundary / viseme data
    resetNextUtteranceData(true)
  }

  const resetNextUtteranceData = (keepArrayBufferData?: boolean): void => {
    !keepArrayBufferData && setNextUtteranceData(null)
    nextUtteranceBoundariesRef.current = []
    nextUtteranceVisemesRef.current = []
  }

  useEffect(() => {
    if (utterance) {
      if (utterance.text) {
        if (ponyfill && audioRef?.current) {
          if (nextUtteranceData) {
            boundaryRef.current = nextUtteranceData.boundaries
            setBoundaryData(nextUtteranceData.boundaries)
            processData(nextUtteranceData.speechData, true)
          } else {
            // Play utterance whether by asking for a speech synthesis, or by retrieving data fro indexeddb
            playUtterance(attachEvents(utterance, true))
          }
        } else {
          stopCurrentPlaying()
        }
      } else {
        handleEmptyTextUtterance()
      }
    } else {
      stopCurrentPlaying()
    }

    return (): void => {
      stopCurrentPlaying()
      emptyTextUtteranceRef?.current &&
        clearTimeout(emptyTextUtteranceRef.current)
    }
  }, [utterance])

  useEffect(() => {
    if (nextUtterance?.text && ponyfill) {
      const tempUtterance = attachEvents(nextUtterance)
      ponyfill.speechSynthesis.synthesizeAndGetArrayData(
        tempUtterance,
        processNextData
      )
    } else {
      resetNextUtteranceData()
    }
  }, [nextUtterance])

  const playUtterance = async (
    utt: SpeechSynthesisUtterance
  ): Promise<void> => {
    if (DBReady && enableSpeechCaching !== false && utt) {
      const dataFromIndexedDB = await DBManager.getSpeechData(
        `${locale}.${voice?.name}.${utt.text
          .replace(tagsRemoverRegex, '')
          .substring(0, 50)}`
      )

      if (dataFromIndexedDB) {
        boundaryRef.current = dataFromIndexedDB.boundaries
        setBoundaryData(dataFromIndexedDB.boundaries)
        processData(dataFromIndexedDB.value, true)
      } else {
        ponyfill?.speechSynthesis.synthesizeAndGetArrayData(utt, processData)
      }
    } else {
      ponyfill?.speechSynthesis.synthesizeAndGetArrayData(utt, processData)
    }
  }

  const processData = (data: ArrayBuffer, alreadyExistsInDB?: boolean) => {
    if (data?.byteLength) {
      const blob = new Blob([data], { type: 'audio/mp3' })

      enableSpeechCaching !== false &&
        !alreadyExistsInDB &&
        DBReady &&
        boundaryRef?.current &&
        utterance &&
        DBManager.addSpeechData({
          id: `${locale}.${voice?.name}.${utterance.text
            .replace(tagsRemoverRegex, '')
            .substring(0, 50)}`,
          value: data,
          boundaries: boundaryRef.current
        })

      const url = URL.createObjectURL(blob)

      if (audioRef?.current) {
        setPlayingBlob(true)
        audioRef.current.src = url
        audioRef.current.play().catch((e) => console.warn(e))
      }
    } else {
      handleEmptyTextUtterance()
    }
  }

  const resetData = (): void => {
    boundaryRef.current = []
    setBoundaryData([])
    setPlayingBlob(false)
  }

  const handleStart = (): void => {
    if (appAvailable || loaderClosed) {
      onStart?.()
    }
  }

  const handleEnd = (): void => {
    if (appAvailable || loaderClosed) {
      onEnd && onEnd()
      resetData()
    } else {
      setLoaderClosed(true)
    }
  }

  const handleError = (error): void => {
    console.log('Error : ', error)
    onError && onError()
    resetData()
  }

  const handleEmptyTextUtterance = (): void => {
    handleStart()
    emptyTextUtteranceRef?.current &&
      clearTimeout(emptyTextUtteranceRef.current)
    emptyTextUtteranceRef.current = setTimeout(() => {
      handleEnd()
    }, 50)
  }

  /**
   * On call :
   *  - play 1 second muted sound to prime audio output
   *  - MANDATORY FOR SAFARI IN VOCAL MODE
   */
  const primeRetorikSpeech = (): void => {
    if (ponyfill && audioRef.current) {
      audioRef.current.play().catch((e) => console.warn(e))
      speechRecognitionAudioRef?.current &&
        speechRecognitionAudioRef.current.play().catch((e) => console.warn(e))

      // Send animation start event to secure animation not playing on safari if permissions are not sufficient
      window.dispatchEvent(new Event('retorikSpiritEnginePlay'))
    }
  }

  /**
   * Play sound on speech recognition activation if it isn't disabled in configuration
   */
  useEffect(() => {
    if (
      !!speechRecognitionOptions?.enableActivationSound &&
      activeRecognitionState === RecognitionState.Listening
    ) {
      speechRecognitionAudioRef?.current &&
        speechRecognitionAudioRef.current.play().catch((e) => console.warn(e))
    }
  }, [activeRecognitionState, speechRecognitionOptions])

  return (
    <React.Fragment>
      <audio
        ref={audioRef}
        src={audioFiles.onesecond}
        muted={appAvailable || loaderClosed ? muted : true}
        onPlay={handleStart}
        onEnded={handleEnd}
      />
      <audio
        ref={speechRecognitionAudioRef}
        src={audioFiles.speechrecognition}
        preload='auto'
        muted={appAvailable || loaderClosed ? muted : true}
      />
      {!(appAvailable || loaderClosed) && (
        <LoaderCallToAction handleValidation={primeRetorikSpeech} />
      )}
    </React.Fragment>
  )
}

RetorikSpeech.defaultProps = {
  ponyfill: undefined,
  onEnd: undefined,
  onError: undefined,
  onStart: undefined,
  utterance: undefined,
  appAvailable: false
}

export default RetorikSpeech
