res/res_speech.c


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404

/*
 * Asterisk -- An open source telephony toolkit.
 *
 * Copyright (C) 2006, Digium, Inc.
 *
 * Joshua Colp <jcolp@digium.com>
 *
 * See http://www.asterisk.org for more information about
 * the Asterisk project. Please do not directly contact
 * any of the maintainers of this project for assistance;
 * the project provides a web site, mailing lists and IRC
 * channels for your use.
 *
 * This program is free software, distributed under the terms of
 * the GNU General Public License Version 2. See the LICENSE file
 * at the top of the source tree.
 */

/*! \file
 *
 * \brief Generic Speech Recognition API
 *
 * \author Joshua Colp <jcolp@digium.com>
 */

#include "asterisk.h"

ASTERISK_FILE_VERSION(__FILE__, "$Revision$");

#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <string.h>

#include "asterisk/channel.h"
#include "asterisk/module.h"
#include "asterisk/lock.h"
#include "asterisk/linkedlists.h"
#include "asterisk/cli.h"
#include "asterisk/term.h"
#include "asterisk/options.h"
#include "asterisk/speech.h"


static AST_LIST_HEAD_STATIC(engines, ast_speech_engine);
static struct ast_speech_engine *default_engine = NULL;

/*! \brief Find a speech recognition engine of specified name, if NULL then use the default one */
static struct ast_speech_engine *find_engine(char *engine_name)
{
	struct ast_speech_engine *engine = NULL;

	/* If no name is specified -- use the default engine */
	if (engine_name == NULL || strlen(engine_name) == 0) {
		return default_engine;
	}

	AST_LIST_LOCK(&engines);
	AST_LIST_TRAVERSE_SAFE_BEGIN(&engines, engine, list) {
		if (!strcasecmp(engine->name, engine_name)) {
			break;
		}
	}
	AST_LIST_TRAVERSE_SAFE_END
	AST_LIST_UNLOCK(&engines);

	return engine;
}

/*! \brief Activate a loaded (either local or global) grammar */
int ast_speech_grammar_activate(struct ast_speech *speech, char *grammar_name)
{
	int res = 0;

	if (speech->engine->activate != NULL) {
		res = speech->engine->activate(speech, grammar_name);
	}

	return res;
}

/*! \brief Deactivate a loaded grammar on a speech structure */
int ast_speech_grammar_deactivate(struct ast_speech *speech, char *grammar_name)
{
	int res = 0;

        if (speech->engine->deactivate != NULL) {
                res = speech->engine->deactivate(speech, grammar_name);
        }

	return res;
}

/*! \brief Load a local grammar on a speech structure */
int ast_speech_grammar_load(struct ast_speech *speech, char *grammar_name, char *grammar)
{
	int res = 0;

	if (speech->engine->load != NULL) {
		res = speech->engine->load(speech, grammar_name, grammar);
	}

	return res;
}

/*! \brief Unload a local grammar from a speech structure */
int ast_speech_grammar_unload(struct ast_speech *speech, char *grammar_name)
{
        int res = 0;

        if (speech->engine->unload != NULL) {
                res = speech->engine->unload(speech, grammar_name);
        }

        return res;
}

/*! \brief Return the results of a recognition from the speech structure */
struct ast_speech_result *ast_speech_results_get(struct ast_speech *speech)
{
	struct ast_speech_result *result = NULL;

	if (speech->engine->get != NULL) {
		result = speech->engine->get(speech);
	}

	return result;
}

/*! \brief Free a list of results */
int ast_speech_results_free(struct ast_speech_result *result)
{
	struct ast_speech_result *current_result = result, *prev_result = NULL;
	int res = 0;

	while (current_result != NULL) {
		prev_result = current_result;
		/* Deallocate what we can */
		if (current_result->text != NULL) {
			free(current_result->text);
			current_result->text = NULL;
		}
		if (current_result->grammar != NULL) {
			free(current_result->grammar);
			current_result->grammar = NULL;
		}
		/* Move on and then free ourselves */
		current_result = current_result->next;
		free(prev_result);
		prev_result = NULL;
	}

	return res;
}

/*! \brief Start speech recognition on a speech structure */
void ast_speech_start(struct ast_speech *speech)
{

	/* Clear any flags that may affect things */
	ast_clear_flag(speech, AST_SPEECH_SPOKE);
	ast_clear_flag(speech, AST_SPEECH_QUIET);
	ast_clear_flag(speech, AST_SPEECH_HAVE_RESULTS);

	/* If results are on the structure, free them since we are starting again */
	if (speech->results != NULL) {
		ast_speech_results_free(speech->results);
		speech->results = NULL;
	}

	/* If the engine needs to start stuff up, do it */
	if (speech->engine->start != NULL) {
		speech->engine->start(speech);
	}

	return;
}

/*! \brief Write in signed linear audio to be recognized */
int ast_speech_write(struct ast_speech *speech, void *data, int len)
{
	int res = 0;

	/* Make sure the speech engine is ready to accept audio */
	if (speech->state != AST_SPEECH_STATE_READY) {
		return -1;
	}

	if (speech->engine->write != NULL) {
		speech->engine->write(speech, data, len);
	}

	return res;
}

/*! \brief Signal to the engine that DTMF was received */
int ast_speech_dtmf(struct ast_speech *speech, const char *dtmf)
{
	int res = 0;

	if (speech->state != AST_SPEECH_STATE_READY)
		return -1;

	if (speech->engine->dtmf != NULL) {
		res = speech->engine->dtmf(speech, dtmf);
	}

	return res;
}

/*! \brief Change an engine specific attribute */
int ast_speech_change(struct ast_speech *speech, char *name, const char *value)
{
	int res = 0;

	if (speech->engine->change != NULL) {
		res = speech->engine->change(speech, name, value);
	}

	return res;
}

/*! \brief Create a new speech structure using the engine specified */
struct ast_speech *ast_speech_new(char *engine_name, int format)
{
	struct ast_speech_engine *engine = NULL;
	struct ast_speech *new_speech = NULL;

	/* Try to find the speech recognition engine that was requested */
	engine = find_engine(engine_name);
	if (engine == NULL) {
		/* Invalid engine or no engine available */
		return NULL;
	}

	/* Allocate our own speech structure, and try to allocate a structure from the engine too */
	new_speech = ast_calloc(1, sizeof(*new_speech));
	if (new_speech == NULL) {
		/* Ran out of memory while trying to allocate some for a speech structure */
		return NULL;
	}

	/* Initialize the lock */
	ast_mutex_init(&new_speech->lock);

	/* Make sure no results are present */
	new_speech->results = NULL;

	/* Copy over our engine pointer */
	new_speech->engine = engine;

	/* We are not ready to accept audio yet */
	ast_speech_change_state(new_speech, AST_SPEECH_STATE_NOT_READY);

	/* Pass ourselves to the engine so they can set us up some more and if they error out then do not create a structure */
	if (engine->create(new_speech)) {
		ast_mutex_destroy(&new_speech->lock);
		free(new_speech);
		new_speech = NULL;
	}

	return new_speech;
}

/*! \brief Destroy a speech structure */
int ast_speech_destroy(struct ast_speech *speech)
{
	int res = 0;

	/* Call our engine so we are destroyed properly */
	speech->engine->destroy(speech);

	/* Deinitialize the lock */
	ast_mutex_destroy(&speech->lock);

	/* If results exist on the speech structure, destroy them */
	if (speech->results != NULL) {
		ast_speech_results_free(speech->results);
		speech->results = NULL;
	}

	/* If a processing sound is set - free the memory used by it */
	if (speech->processing_sound != NULL) {
		free(speech->processing_sound);
		speech->processing_sound = NULL;
	}

	/* Aloha we are done */
	free(speech);
	speech = NULL;

	return res;
}

/*! \brief Change state of a speech structure */
int ast_speech_change_state(struct ast_speech *speech, int state)
{
	int res = 0;

	switch (state) {
	case AST_SPEECH_STATE_WAIT:
		/* The engine heard audio, so they spoke */
		ast_set_flag(speech, AST_SPEECH_SPOKE);
	default:
		speech->state = state;
		break;
	}

	return res;
}

/*! \brief Change the type of results we want */
int ast_speech_change_results_type(struct ast_speech *speech, enum ast_speech_results_type results_type)
{
	int res = 0;

	speech->results_type = results_type;

	if (speech->engine->change_results_type)
		res = speech->engine->change_results_type(speech, results_type);

	return res;
}

/*! \brief Register a speech recognition engine */
int ast_speech_register(struct ast_speech_engine *engine)
{
	struct ast_speech_engine *existing_engine = NULL;
	int res = 0;

	existing_engine = find_engine(engine->name);
	if (existing_engine != NULL) {
		/* Engine already loaded */
		return -1;
	}

	if (option_verbose > 1)
		ast_verbose(VERBOSE_PREFIX_2 "Registered speech recognition engine '%s'\n", engine->name);

	/* Add to the engine linked list and make default if needed */
	AST_LIST_LOCK(&engines);
	AST_LIST_INSERT_HEAD(&engines, engine, list);
	if (default_engine == NULL) {
		default_engine = engine;
		if (option_verbose > 1)
			ast_verbose(VERBOSE_PREFIX_2 "Made '%s' the default speech recognition engine\n", engine->name);
	}
	AST_LIST_UNLOCK(&engines);

	return res;
}

/*! \brief Unregister a speech recognition engine */
int ast_speech_unregister(char *engine_name)
{
	struct ast_speech_engine *engine = NULL;
	int res = -1;

	if (engine_name == NULL) {
		return res;
	}

	AST_LIST_LOCK(&engines);
	AST_LIST_TRAVERSE_SAFE_BEGIN(&engines, engine, list) {
		if (!strcasecmp(engine->name, engine_name)) {
			/* We have our engine... removed it */
			AST_LIST_REMOVE_CURRENT(&engines, list);
			/* If this was the default engine, we need to pick a new one */
			if (default_engine == engine) {
				default_engine = AST_LIST_FIRST(&engines);
			}
			if (option_verbose > 1)
				ast_verbose(VERBOSE_PREFIX_2 "Unregistered speech recognition engine '%s'\n", engine_name);
			/* All went well */
			res = 0;
			break;
		}
	}
	AST_LIST_TRAVERSE_SAFE_END
	AST_LIST_UNLOCK(&engines);

	return res;
}

static int unload_module(void)
{
	/* We can not be unloaded */
	return -1;
}

static int load_module(void)
{
	int res = 0;

	/* Initialize our list of engines */
	AST_LIST_HEAD_INIT_NOLOCK(&engines);

	return res;
}

AST_MODULE_INFO(ASTERISK_GPL_KEY, AST_MODFLAG_GLOBAL_SYMBOLS, "Generic Speech Recognition API",
		.load = load_module,
		.unload = unload_module,
		);