jambonz / jambonz-feature-server

Core telephony feature server for the jambones platform
MIT License
47 stars 38 forks source link

Barge in flushes incorrect say #767

Closed paulotelles closed 2 months ago

paulotelles commented 5 months ago

When activating the barge in, using this example:

[
  {
    "say": {
      "text": "<speak><voice name=\"en-US-Standard-C\">Answer with this phrase: \"number one\"</voice></speak>",
      "disableTtsCache": false
    }
  },
  {
    "gather":{
      "input":[
        "speech"
      ],
      "timeout":10,
      "actionHook":"voice",
      "recognizer":{
        "punctuation":true,
        "vendor":"google",
        "language":"en-US",
        "hints":[

        ],
        "altLanguages":[

        ]
      }
    }
  },
  {
    "say":{
      "text":"<speak><voice name=\"en-US-Standard-C\">A long text is going to be played. Say a word when it starts to barge in.</voice></speak>",
      "disableTtsCache":false
    }
  },
  {
    "config":{
      "synthesizer":{
        "vendor":"google",
        "language":"en-US",
        "voice":"en-US-Standard-C"
      },
      "recognizer":{
        "language":"en-US",
        "vendor":"google"
      },
      "bargeIn":{
        "enable":true,
        "actionHook":"voice",
        "dtmfBargein":false,
        "input":[
          "speech"
        ],
        "minBargeinWordCount":1,
        "sticky":false
      }
    }
  },
  {
    "say":{
      "text":"<speak><voice name=\"en-US-Standard-C\"><break time=\"1500ms\"/>\r\nIn realms of old, where darkness creeps,\r\nA tale unfolds, where danger leaps.\r\nA flying axe, swift as the wind,\r\nWith purpose true, no mercy thinned.\r\n\r\nBorn of an ancient smithy's might,\r\nForged in fires, its form alight.\r\nA weapon honed, with lethal grace,\r\nA harbinger of fate's embrace.\r\n\r\nWith steel wings spread, it takes to flight,\r\nUnseen by mortal eye's short sight.\r\nAxehead gleaming, sharp and keen,\r\nIts presence feared, yet seldom seen.</voice></speak>",
      "disableTtsCache":false
    }
  },
  {
    "config":{
      "bargeIn":{
        "enable":false
      },
      "notifyEvents":true,
      "recognizer":{
        "punctuation":true,
        "vendor":"google",
        "language":"en-US",
        "hints":[

        ],
        "altLanguages":[

        ]
      },
      "synthesizer":{
        "vendor":"google",
        "language":"en-US",
        "voice":"en-US-Standard-C"
      }
    }
  },
  {
    "say":{
      "text":"<speak><voice name=\"en-US-Standard-C\">interrupted fast car</voice></speak>",
      "disableTtsCache":false
    }
  },
  {
    "config":{
      "synthesizer":{
        "vendor":"google",
        "language":"en-US",
        "voice":"en-US-Standard-C"
      },
      "recognizer":{
        "language":"en-US",
        "vendor":"google"
      },
      "bargeIn":{
        "enable":true,
        "actionHook":"voice",
        "dtmfBargein":false,
        "input":[
          "speech"
        ],
        "minBargeinWordCount":1,
        "sticky":false
      }
    }
  },
  {
    "say":{
      "text":"<speak><voice name=\"en-US-Standard-C\">Hello this is a test, please continue to listen this audio until the end, if you wanna barge in please do it, I'll be waiting and I'll keep saying something for a long time, I hope everything works well. Otherwise we will have to investigate what is going on, please barge in.</voice></speak>",
      "disableTtsCache":false
    }
  },
  {
    "config":{
      "bargeIn":{
        "enable":false
      },
      "notifyEvents":true,
      "recognizer":{
        "punctuation":true,
        "vendor":"google",
        "language":"en-US",
        "hints":[

        ],
        "altLanguages":[

        ]
      },
      "synthesizer":{
        "vendor":"google",
        "language":"en-US",
        "voice":"en-US-Standard-C"
      }
    }
  },
  {
    "say":{
      "text":"<speak><voice name=\"en-US-Standard-C\">interrupted last place</voice></speak>",
      "disableTtsCache":false
    }
  },
  {
    "hangup":{
      "headers":{
        "X-Reason":"Bot ended the call"
      }
    }
  }
]

The expectation is:

  1. Listen to "Answer with this phrase: number one"
  2. Answer with "number one"
  3. Listen to A long text is going to be played. Say a word when it starts to barge in.
  4. Start to listen to the next prompt, but barge in at some point.
  5. After the big text where you barge in there is a config to disable barge in, so it should not be skipped and you should listen to "interrupted fast car" and you cannot barge in this.
  6. You shoul hear the prompt "Hello this is a test, please continue to listen this audio until the end, if you wanna barge in please do it, I'll be waiting and I'll keep saying something for a long time, I hope everything works well. Otherwise we will have to investigate what is going on, please barge in." and you should be able to barge in cause there is another config to enable barge in before it.
  7. After you barge in you should listen to "interrupted last place" cause there is a config to disable barge in before it, so it should not be flushed.

I made a test on jambonz.me with call_sid: 00780599-3421-49c7-bd7b-cb07e75e5b26

The outcome is that after the barge in in the first place, you just listen to the last part: interrupted last place

After some investigation I believe that the issue is somewhere here:

kill(onBackgroundGatherBargein = false) {
    if (this.isConfirmCallSession) this.logger.debug('CallSession:kill (ConfirmSession)');
    else this.logger.info('CallSession:kill');
    this._endVerbHookSpan();
    if (this.currentTask) {
      this.currentTask.kill(this);
      this.currentTask = null;
    }
    if (onBackgroundGatherBargein) {
      /* search for a config with bargein disabled */
      while (this.tasks.length) {
        const t = this.tasks[0];
        if (t.name === TaskName.Config && t.bargeIn?.enable === false) {
          /* found it, clear to that point and remove the disable
            because we likely already received a partial transcription
            and we don't want to kill the background gather before we
            get the full transcription.
          */
          delete t.bargeIn.enable;
          this.logger.info('CallSession:kill - found bargein disabled in the stack, clearing to that point');
          break;
        }
        const rem = this.tasks.shift();
        this.logger.debug(`CallSession:kill - clearing task ${rem.summary}`);
      }
    }

since we delete delete t.bargeIn.enable; when barge in is disable, the other verbs are being flush until we have a second disable barge in.

I also can provide an endpoint for debugging.

paulotelles commented 5 months ago

@xquanluu any news about this bug?

xquanluu commented 5 months ago

Hi @paulotelles I. cannot reproduce the issue on my jambonz system.

From your wss application I can see that jambonz clearly stop at correct point. You can see that I put this extra log and see after clearing tasks, jambonz keep the Say: Text two after first barge.

Code:

          delete t.bargeIn.enable;
          this.logger.info('CallSession:kill - found bargein disabled in the stack, clearing to that point');
          this.logger.info({tasks: listTaskNames(this.tasks)}, 'CallSession:kill - remaining tasks 22');
          break;
[14:32:49.399] INFO (1960315): CallSession:kill - found bargein disabled in the stack, clearing to that point
    callId: "4b7b65b5-a2a2-123d-30b2-0e9a7c02a279"
    callSid: "6c92c16d-5781-42ac-8286-608ce72e0578"
    accountSid: "6079fe5f-149d-4c1c-b446-2e93d2519e58"
    callingNumber: "xhoaluu"
    calledNumber: "app-62844a92-9d01-482c-94ad-537afaf7ada5"
    traceId: "178d3b15f96be85c87939cd2f377bb1b"
[14:32:49.399] INFO (1960315): CallSession:kill - remaining tasks 22
    callId: "4b7b65b5-a2a2-123d-30b2-0e9a7c02a279"
    callSid: "6c92c16d-5781-42ac-8286-608ce72e0578"
    accountSid: "6079fe5f-149d-4c1c-b446-2e93d2519e58"
    callingNumber: "xhoaluu"
    calledNumber: "app-62844a92-9d01-482c-94ad-537afaf7ada5"
    traceId: "178d3b15f96be85c87939cd2f377bb1b"
    tasks: "[config{set synthesizer{microsoft,en-US,en-US-AvaMultilingualNeural},set recognizer{deepgram,en-GB},event notification on},say{text=<speak version=\"1.0\" xmlns=\"http://www.w3.org/2001/10/synthesis\" xml:lang=\"en-US\"><voice name=\"en-US-AvaMultilingualNeural\">Text two after first barge...},hangup]"
xquanluu commented 5 months ago

For same background bargin, gather fire 2 events to bargin to cleanup verbs 2 times

Gather sent Vad on is_final=false


[07:25:57.157] ^[[34mDEBUG^[[39m (1960315): ^[[36mGather:_onTranscription raw transcript^[[39m
    callId: "cedd8879-a32f-123d-30b2-0e9a7c02a279"
    callSid: "2624e454-3cd9-40b6-9067-a08c57909b4a"
    accountSid: "71a1ec5a-c86f-44e4-8763-daba39b3e568"
    callingNumber: "+1"
    calledNumber: "+2"
    traceId: "05dfa30921a8b98254e0f8e136698086"
    evt: {
      "stability": 0.009999999776482582,
      "is_final": false,
      "alternatives": [
        {
          "confidence": 0,
          "transcript": "stop"
        }
      ],
      "language_code": "en-us",
      "channel_tag": 0,
      "result_end_time": 5740
    }
    bugname: "background_bargeIn_google_transcribe"
    finished: null
    vendor: "google"
[07:25:57.157] ^[[34mDEBUG^[[39m (1960315): ^[[36mkilling audio due to speech^[[39m
    callId: "cedd8879-a32f-123d-30b2-0e9a7c02a279"
    callSid: "2624e454-3cd9-40b6-9067-a08c57909b4a"
    accountSid: "71a1ec5a-c86f-44e4-8763-daba39b3e568"
    callingNumber: "+1"
    calledNumber: "+2"
    traceId: "05dfa30921a8b98254e0f8e136698086"
    transcript: "stop"
[07:25:57.157] ^[[34mDEBUG^[[39m (1960315): ^[[36mBackgroundTaskManager:_bargeInTaskCompleted on event from background bargeIn, emitting bargein-done event^[[39m
    callId: "cedd8879-a32f-123d-30b2-0e9a7c02a279"
    callSid: "2624e454-3cd9-40b6-9067-a08c57909b4a"
    accountSid: "71a1ec5a-c86f-44e4-8763-daba39b3e568"
    callingNumber: "+1"
    calledNumber: "+2"

Then when final transcription, gather fire resolve event to background bargin again:


[07:25:57.631] ^[[34mDEBUG^[[39m (1960315): ^[[36mGather:_onTranscription raw transcript^[[39m
    callId: "cedd8879-a32f-123d-30b2-0e9a7c02a279"
    callSid: "2624e454-3cd9-40b6-9067-a08c57909b4a"
    accountSid: "71a1ec5a-c86f-44e4-8763-daba39b3e568"
    callingNumber: "+1"
    calledNumber: "+2"
    traceId: "05dfa30921a8b98254e0f8e136698086"
    evt: {
      "stability": 0,
      "is_final": true,
      "alternatives": [
        {
          "confidence": 0.13401676714420319,
          "transcript": "stop"
        }
      ],
      "language_code": "en-us",
      "channel_tag": 0,
      "result_end_time": 6040
    }
    bugname: "background_bargeIn_google_transcribe"
    finished: null
    vendor: "google"

// During play "interrupted fast car"

[07:25:57.631] ^[[34mDEBUG^[[39m (1960315): ^[[36mTaskGather:resolve with reason speech^[[39m
    callId: "cedd8879-a32f-123d-30b2-0e9a7c02a279"
    callSid: "2624e454-3cd9-40b6-9067-a08c57909b4a"
    accountSid: "71a1ec5a-c86f-44e4-8763-daba39b3e568"
    callingNumber: "+1"
    calledNumber: "+2"
    traceId: "05dfa30921a8b98254e0f8e136698086"
[07:25:57.632] ^[[34mDEBUG^[[39m (1960315): ^[[36mBackgroundTaskManager:_bargeInTaskCompleted on event from background bargeIn, emitting bargein-done event^[[39m
    callId: "cedd8879-a32f-123d-30b2-0e9a7c02a279"
    callSid: "2624e454-3cd9-40b6-9067-a08c57909b4a"
    accountSid: "71a1ec5a-c86f-44e4-8763-daba39b3e568"
    callingNumber: "+1"
    calledNumber: "+2"
    traceId: "05dfa30921a8b98254e0f8e136698086"
    evt: {
      "language_code": "en-US",
      "channel_tag": 1,
      "is_final": true,
      "alternatives": [
        {
          "confidence": 0.13401676714420319,
          "transcript": "stop"
        }
      ],
      "vendor": {
        "name": "google",
        "evt": {
          "stability": 0,
          "is_final": true,
          "alternatives": [
            {
              "confidence": 0.13401676714420319,
              "transcript": "stop"
            }
          ],
          "language_code": "en-us",
          "channel_tag": 0,
          "result_end_time": 6040
        }
      }
    }
paulotelles commented 5 months ago

The fix proposed was successfull, here the call_sids:

call_sid: 4627bb06-4b74-4cbb-8294-e1f05a8dce71

call_sid: fc821df4-9989-479e-83d9-e29f2160b52c

The barge in was successful and after that I had a disable barge in and a say node, I could hear the say node after the disable bargein in all the cases.