SpeechTools.jm 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483
  1. /*
  2. * FreeSWITCH Modular Media Switching Software Library / Soft-Switch Application
  3. * Copyright (C) 2005/2012, Anthony Minessale II <anthm@freeswitch.org>
  4. *
  5. * Version: MPL 1.1
  6. *
  7. * The contents of this file are subject to the Mozilla Public License Version
  8. * 1.1 (the "License"); you may not use this file except in compliance with
  9. * the License. You may obtain a copy of the License at
  10. * http://www.mozilla.org/MPL/
  11. *
  12. * Software distributed under the License is distributed on an "AS IS" basis,
  13. * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
  14. * for the specific language governing rights and limitations under the
  15. * License.
  16. *
  17. * The Original Code is FreeSWITCH Modular Media Switching Software Library / Soft-Switch Application
  18. *
  19. * The Initial Developer of the Original Code is
  20. * Anthony Minessale II <anthm@freeswitch.org>
  21. * Portions created by the Initial Developer are Copyright (C)
  22. * the Initial Developer. All Rights Reserved.
  23. *
  24. * Contributor(s):
  25. *
  26. * Anthony Minessale II <anthm@freeswitch.org>
  27. *
  28. *
  29. * SpeechTools.jm Speech Detection Interface
  30. *
  31. */
  32. /* Constructor for Grammar Class (Class to identify a grammar entity) */
  33. function Grammar(grammar_name, path, obj_path, min_score, confirm_score, halt) {
  34. this.grammar_name = grammar_name;
  35. this.path = path;
  36. this.min_score = min_score;
  37. this.confirm_score = confirm_score;
  38. this.halt = halt;
  39. this.obj_path = obj_path;
  40. if (!this.min_score) {
  41. this.min_score = 1;
  42. }
  43. if (!this.confirm_score) {
  44. this.confirm_score = 400;
  45. }
  46. }
  47. /* Constructor for SpeechDetect Class (Class to Detect Speech) */
  48. function SpeechDetect(session, mod, ip) {
  49. this.ip = ip;
  50. this.session = session;
  51. this.mod = mod;
  52. this.grammar_name = undefined;
  53. this.grammar_hash = new Array();
  54. this.grammar_name = false;
  55. this.audio_base = "";
  56. this.audio_ext = ".wav";
  57. this.tts_eng = false;
  58. this.tts_voice = false;
  59. this.AutoUnload = false;
  60. this.debug = false;
  61. /* Set the TTS info */
  62. this.setTTS = function (tts_eng, tts_voice) {
  63. this.tts_eng = tts_eng;
  64. this.tts_voice = tts_voice;
  65. }
  66. /* Set the audio base */
  67. this.setAudioBase = function (audio_base) {
  68. this.audio_base = audio_base;
  69. }
  70. /* Set the audio extension */
  71. this.setAudioExt= function (audio_ext) {
  72. this.audio_ext = audio_ext;
  73. }
  74. /* Add a grammar to be used */
  75. this.addGrammar = function(grammar_object) {
  76. this.grammar_hash[grammar_object.grammar_name] = grammar_object;
  77. }
  78. /* Play an audio file */
  79. this.streamFile = function(str) {
  80. var rv;
  81. if (!str) {
  82. console_log("error", "No file specified!\n");
  83. return;
  84. }
  85. files = str.split(",");
  86. for( x = 0; x < files.length; x++) {
  87. if (!files[x] || files[x] == "noop") {
  88. continue;
  89. }
  90. this.session.streamFile(this.audio_base + files[x] + this.audio_ext);
  91. }
  92. }
  93. /* Speak with TTS */
  94. this.speak = function(str) {
  95. return this.session.speak(this.tts_eng, this.tts_voice, str);
  96. }
  97. /* Set the current grammar */
  98. this.setGrammar = function (grammar_name) {
  99. var grammar_object = this.grammar_hash[grammar_name];
  100. if (!grammar_object) {
  101. console_log("error", "Missing Grammar!\n");
  102. return false;
  103. }
  104. if (this.grammar_name) {
  105. if (this.AutoUnload) {
  106. console_log("debug", "Unloading grammar " + this.grammar_name + "\n");
  107. this.session.execute("detect_speech", "nogrammar " + this.grammar_name);
  108. }
  109. if (grammar_object.path) {
  110. this.session.execute("detect_speech", "grammar " + grammar_name + " " + grammar_object.path);
  111. } else {
  112. this.session.execute("detect_speech", "grammar " + grammar_name);
  113. }
  114. } else {
  115. this.session.execute("detect_speech", this.mod + " " + grammar_name + " " + grammar_object.path + " " + this.ip);
  116. }
  117. this.grammar_name = grammar_name;
  118. }
  119. /* Pause speech detection */
  120. this.pause = function() {
  121. this.session.execute("detect_speech", "pause");
  122. }
  123. /* Resume speech detection */
  124. this.resume = function() {
  125. this.session.execute("detect_speech", "resume");
  126. }
  127. /* Stop speech detection */
  128. this.stop = function() {
  129. this.session.execute("detect_speech", "stop");
  130. }
  131. /* Callback function for streaming,TTS or bridged calls */
  132. this.onInput = function(s, type, inputEvent, _this) {
  133. try {
  134. if (type == "event") {
  135. var speech_type = inputEvent.getHeader("Speech-Type");
  136. var rv = new Array();
  137. if (!_this.grammar_name) {
  138. console_log("error", "No Grammar name!\n");
  139. _this.session.hangup();
  140. return false;
  141. }
  142. var grammar_object = _this.grammar_hash[_this.grammar_name];
  143. if (!grammar_object) {
  144. console_log("error", "Can't find grammar for " + _this.grammar_name + "\n");
  145. _this.session.hangup();
  146. return false;
  147. }
  148. if (speech_type == "begin-speaking") {
  149. if (grammar_object.halt) {
  150. return false;
  151. }
  152. } else {
  153. var body = inputEvent.getBody();
  154. var result;
  155. var xml;
  156. body = body.replace(/<\?.*?\?>/g, '');
  157. xml = new XML("<xml>" + body + "</xml>");
  158. result = xml.result;
  159. _this.lastDetect = body;
  160. if (_this.debug) {
  161. console_log("debug", "----XML:\n" + body + "\n");
  162. console_log("debug", "----Heard [" + result.interpretation.input + "]\n");
  163. console_log("debug", "----Hit score " + result.interpretation.@confidence + "/" +
  164. grammar_object.min_score + "/" + grammar_object.confirm_score + "\n");
  165. }
  166. if (result.interpretation.@confidence >= grammar_object.min_score) {
  167. if (result.interpretation.@confidence < grammar_object.confirm_score) {
  168. rv.push("_confirm_");
  169. }
  170. eval("xo = " + grammar_object.obj_path + ";");
  171. for (x = 0; x < xo.length(); x++) {
  172. rv.push(xo[x]);
  173. console_log("info", "----" +xo[x] + "\n");
  174. }
  175. } else {
  176. rv.push("_no_idea_");
  177. }
  178. }
  179. delete interp;
  180. return rv;
  181. }
  182. }
  183. catch(err) {
  184. console_log("crit", "----ERROR:\n" + err + "\n");
  185. }
  186. }
  187. }
  188. /* Constructor for SpeechObtainer Class (Class to collect data from a SpeechDetect Class) */
  189. function SpeechObtainer(asr, req, wait_time) {
  190. this.items = new Array();
  191. this.collected_items = new Array();
  192. this.index = 0;
  193. this.collected_index = 0;
  194. this.req = req;
  195. this.tts_eng = undefined;
  196. this.tts_voice = false;
  197. this.asr = asr;
  198. this.top_sound = false;
  199. this.add_sound = false;
  200. this.dup_sound = false;
  201. this.bad_sound = false;
  202. this.needConfirm = false;
  203. this.grammar_name = false;
  204. this.audio_base = asr.audio_base;
  205. this.audio_ext = asr.audio_ext;
  206. this.tts_eng = asr.tts_eng;
  207. this.tts_voice = asr.tts_voice;
  208. this.debug = asr.debug;
  209. if (!req) {
  210. req = 1;
  211. }
  212. if (!wait_time) {
  213. wait_time = 5000;
  214. }
  215. this.waitTime = wait_time + 0;
  216. /* Set the TTS info */
  217. this.setTTS = function (tts_eng, tts_voice) {
  218. this.tts_eng = tts_eng;
  219. this.tts_voice = tts_voice;
  220. }
  221. /* Set the audio base */
  222. this.setAudioBase = function (audio_base) {
  223. this.audio_base = audio_base;
  224. }
  225. /* Set the audio extension */
  226. this.setAudioExt= function (audio_ext) {
  227. this.audio_ext = audio_ext;
  228. }
  229. /* Set the grammar to use */
  230. this.setGrammar = function (grammar_name, path, obj_path, min_score, confirm_score, halt) {
  231. var grammar_object = new Grammar(grammar_name, path, obj_path, min_score, confirm_score, halt);
  232. this.asr.addGrammar(grammar_object);
  233. this.grammar_name = grammar_name;
  234. }
  235. /* Set the top audio file or tts for the collection */
  236. this.setTopSound = function (top_sound) {
  237. this.top_sound = top_sound;
  238. }
  239. /* Set the audio file or tts for misunderstood input */
  240. this.setBadSound = function (bad_sound) {
  241. this.bad_sound = bad_sound;
  242. }
  243. /* Set the audio file or tts for duplicate input */
  244. this.setDupSound = function (dup_sound) {
  245. this.dup_sound = dup_sound;
  246. }
  247. /* Set the audio file or tts for accepted input */
  248. this.setAddSound = function (add_sound) {
  249. this.add_sound = add_sound;
  250. }
  251. /* Add acceptable items (comma sep list) */
  252. this.addItem = function(item) {
  253. ia = item.split(",");
  254. var x;
  255. for (x = 0; x < ia.length; x++) {
  256. this.items[this.index++] = ia[x];
  257. }
  258. }
  259. this.addItemAlias = function(item,alias) {
  260. ia = item.split(",");
  261. var x;
  262. for (x = 0; x < ia.length; x++) {
  263. this.items[this.index++] = ia[x] + ":::" + alias;
  264. }
  265. }
  266. /* Add a regex */
  267. this.addRegEx = function(item) {
  268. this.items[this.index++] = item;
  269. }
  270. /* Reset the object and delete all collect items */
  271. this.reset = function() {
  272. this.collected_index = 0;
  273. delete this.collected_items;
  274. this.collected_items = new Array();
  275. }
  276. /* Stream a file, collecting input */
  277. this.streamFile = function(str) {
  278. var rv;
  279. if (!str) {
  280. console_log("error", "No file specified!\n");
  281. return;
  282. }
  283. files = str.split(",");
  284. for( x = 0; x < files.length; x++) {
  285. if (!files[x] || files[x] == "noop") {
  286. continue;
  287. }
  288. rv = this.asr.session.streamFile(this.audio_base + files[x] + this.audio_ext , this.asr.onInput, this.asr);
  289. if (rv) {
  290. break;
  291. }
  292. }
  293. return rv;
  294. }
  295. /* Speak some text, collecting input */
  296. this.speak = function(str) {
  297. return this.asr.session.speak(this.tts_eng, this.tts_voice, str, this.asr.onInput, this.asr);
  298. }
  299. /* Process collected input */
  300. this.react = function(say_str, play_str) {
  301. var rv;
  302. if (!rv) {
  303. rv = this.asr.session.collectInput(this.asr.onInput, this.asr, 500);
  304. }
  305. if (!rv) {
  306. this.asr.resume();
  307. if (this.tts_eng && this.tts_voice) {
  308. rv = this.speak(say_str);
  309. } else {
  310. rv = this.streamFile(play_str);
  311. }
  312. }
  313. if (!rv) {
  314. rv = this.asr.session.collectInput(this.asr.onInput, this.asr, 500);
  315. }
  316. if (rv && !rv[0]) {
  317. rv = false;
  318. }
  319. return rv;
  320. }
  321. /* Collect input */
  322. this.run = function() {
  323. var rv;
  324. var hit;
  325. var dup;
  326. if (this.collected_index) {
  327. this.reset();
  328. }
  329. if (!this.grammar_name) {
  330. console_log("error", "No Grammar name!\n");
  331. this.session.hangup();
  332. return false;
  333. }
  334. this.asr.setGrammar(this.grammar_name);
  335. while(this.asr.session.ready() && this.collected_index < this.req) {
  336. var x;
  337. this.needConfirm = false;
  338. if (!rv) {
  339. rv = this.react(this.top_sound, this.top_sound);
  340. }
  341. if (!rv) {
  342. this.asr.resume();
  343. rv = this.asr.session.collectInput(this.asr.onInput, this.asr, this.waitTime);
  344. }
  345. hit = false;
  346. if (rv) {
  347. var items = rv;
  348. rv = undefined;
  349. for (y = 0; y < items.length; y++) {
  350. if (items[y] == "_no_idea_") {
  351. if (this.debug) {
  352. console_log("debug", "----We don't understand this\n");
  353. }
  354. break;
  355. }
  356. if (items[y] == "_confirm_") {
  357. this.needConfirm = true;
  358. if (this.debug) {
  359. console_log("debug", "----We need to confirm this one\n");
  360. }
  361. continue;
  362. }
  363. for(x = 0 ; x < this.index; x++) {
  364. if (this.debug) {
  365. console_log("debug", "----Testing [" + y + "] [" + x + "] " + items[y] + " =~ [" + this.items[x] + "]\n");
  366. }
  367. str = this.items[x];
  368. ab = str.split(":::");
  369. var re = new RegExp(ab[0], "i");
  370. match = re.exec(items[y]);
  371. if (match) {
  372. for (i = 0; i < match.length; i++) {
  373. if (ab.length == 1) {
  374. rep = match[i];
  375. } else {
  376. rep = ab[1];
  377. }
  378. dup = false;
  379. for(z = 0; z < this.collected_items.length; z++) {
  380. if (this.collected_items[z] == rep) {
  381. dup = true;
  382. break;
  383. }
  384. }
  385. if (dup) {
  386. if (this.dup_sound) {
  387. rv = this.react(this.dup_sound + " " + rep, this.dup_sound + "," + rep);
  388. }
  389. } else {
  390. if (this.debug) {
  391. console_log("debug", "----Adding " + rep + "\n");
  392. }
  393. this.collected_items[this.collected_index++] = rep;
  394. hit = true;
  395. if (this.add_sound) {
  396. rv = this.react(this.add_sound + " " + rep, this.add_sound + "," + rep);
  397. }
  398. }
  399. }
  400. }
  401. }
  402. }
  403. }
  404. if (!rv) {
  405. rv = this.asr.session.collectInput(this.asr.onInput, this.asr, 1000);
  406. }
  407. if (!rv && !hit && !dup) {
  408. rv = this.react(this.bad_sound, this.bad_sound);
  409. }
  410. }
  411. return this.collected_items;
  412. }
  413. }