diff options
Diffstat (limited to 'trunk/codecs/codec_speex.c')
-rw-r--r-- | trunk/codecs/codec_speex.c | 503 |
1 files changed, 503 insertions, 0 deletions
diff --git a/trunk/codecs/codec_speex.c b/trunk/codecs/codec_speex.c new file mode 100644 index 000000000..035f9958b --- /dev/null +++ b/trunk/codecs/codec_speex.c @@ -0,0 +1,503 @@ +/* + * Asterisk -- An open source telephony toolkit. + * + * Copyright (C) 1999 - 2005, Digium, Inc. + * + * Mark Spencer <markster@digium.com> + * + * + * See http://www.asterisk.org for more information about + * the Asterisk project. Please do not directly contact + * any of the maintainers of this project for assistance; + * the project provides a web site, mailing lists and IRC + * channels for your use. + * + * This program is free software, distributed under the terms of + * the GNU General Public License Version 2. See the LICENSE file + * at the top of the source tree. + */ + +/*! \file + * + * \brief Translate between signed linear and Speex (Open Codec) + * + * \note This work was motivated by Jeremy McNamara + * hacked to be configurable by anthm and bkw 9/28/2004 + * + * \ingroup codecs + * + * \extref The Speex library - http://www.speex.org + * + */ + +/*** MODULEINFO + <depend>speex</depend> + <use>speexdsp</use> + ***/ + +#include "asterisk.h" + +ASTERISK_FILE_VERSION(__FILE__, "$Revision$") + +#include <speex/speex.h> + +/* We require a post 1.1.8 version of Speex to enable preprocessing + and better type handling */ +#ifdef _SPEEX_TYPES_H +#include <speex/speex_preprocess.h> +#endif + +#include "asterisk/translate.h" +#include "asterisk/module.h" +#include "asterisk/config.h" +#include "asterisk/utils.h" + +/* Sample frame data */ +#include "slin_speex_ex.h" +#include "speex_slin_ex.h" + +/* codec variables */ +static int quality = 3; +static int complexity = 2; +static int enhancement = 0; +static int vad = 0; +static int vbr = 0; +static float vbr_quality = 4; +static int abr = 0; +static int dtx = 0; /* set to 1 to enable silence detection */ + +static int preproc = 0; +static int pp_vad = 0; +static int pp_agc = 0; +static float pp_agc_level = 8000; /* XXX what is this 8000 ? */ +static int pp_denoise = 0; +static int pp_dereverb = 0; +static float pp_dereverb_decay = 0.4; +static float pp_dereverb_level = 0.3; + +#define TYPE_SILENCE 0x2 +#define TYPE_HIGH 0x0 +#define TYPE_LOW 0x1 +#define TYPE_MASK 0x3 + +#define BUFFER_SAMPLES 8000 +#define SPEEX_SAMPLES 160 + +struct speex_coder_pvt { + void *speex; + SpeexBits bits; + int framesize; + int silent_state; +#ifdef _SPEEX_TYPES_H + SpeexPreprocessState *pp; + spx_int16_t buf[BUFFER_SAMPLES]; +#else + int16_t buf[BUFFER_SAMPLES]; /* input, waiting to be compressed */ +#endif +}; + + +static int lintospeex_new(struct ast_trans_pvt *pvt) +{ + struct speex_coder_pvt *tmp = pvt->pvt; + + if (!(tmp->speex = speex_encoder_init(&speex_nb_mode))) + return -1; + + speex_bits_init(&tmp->bits); + speex_bits_reset(&tmp->bits); + speex_encoder_ctl(tmp->speex, SPEEX_GET_FRAME_SIZE, &tmp->framesize); + speex_encoder_ctl(tmp->speex, SPEEX_SET_COMPLEXITY, &complexity); +#ifdef _SPEEX_TYPES_H + if (preproc) { + tmp->pp = speex_preprocess_state_init(tmp->framesize, 8000); /* XXX what is this 8000 ? */ + speex_preprocess_ctl(tmp->pp, SPEEX_PREPROCESS_SET_VAD, &pp_vad); + speex_preprocess_ctl(tmp->pp, SPEEX_PREPROCESS_SET_AGC, &pp_agc); + speex_preprocess_ctl(tmp->pp, SPEEX_PREPROCESS_SET_AGC_LEVEL, &pp_agc_level); + speex_preprocess_ctl(tmp->pp, SPEEX_PREPROCESS_SET_DENOISE, &pp_denoise); + speex_preprocess_ctl(tmp->pp, SPEEX_PREPROCESS_SET_DEREVERB, &pp_dereverb); + speex_preprocess_ctl(tmp->pp, SPEEX_PREPROCESS_SET_DEREVERB_DECAY, &pp_dereverb_decay); + speex_preprocess_ctl(tmp->pp, SPEEX_PREPROCESS_SET_DEREVERB_LEVEL, &pp_dereverb_level); + } +#endif + if (!abr && !vbr) { + speex_encoder_ctl(tmp->speex, SPEEX_SET_QUALITY, &quality); + if (vad) + speex_encoder_ctl(tmp->speex, SPEEX_SET_VAD, &vad); + } + if (vbr) { + speex_encoder_ctl(tmp->speex, SPEEX_SET_VBR, &vbr); + speex_encoder_ctl(tmp->speex, SPEEX_SET_VBR_QUALITY, &vbr_quality); + } + if (abr) + speex_encoder_ctl(tmp->speex, SPEEX_SET_ABR, &abr); + if (dtx) + speex_encoder_ctl(tmp->speex, SPEEX_SET_DTX, &dtx); + tmp->silent_state = 0; + + return 0; +} + +static int speextolin_new(struct ast_trans_pvt *pvt) +{ + struct speex_coder_pvt *tmp = pvt->pvt; + + if (!(tmp->speex = speex_decoder_init(&speex_nb_mode))) + return -1; + + speex_bits_init(&tmp->bits); + speex_decoder_ctl(tmp->speex, SPEEX_GET_FRAME_SIZE, &tmp->framesize); + if (enhancement) + speex_decoder_ctl(tmp->speex, SPEEX_SET_ENH, &enhancement); + + return 0; +} + +static struct ast_frame *lintospeex_sample(void) +{ + static struct ast_frame f; + f.frametype = AST_FRAME_VOICE; + f.subclass = AST_FORMAT_SLINEAR; + f.datalen = sizeof(slin_speex_ex); + /* Assume 8000 Hz */ + f.samples = sizeof(slin_speex_ex)/2; + f.mallocd = 0; + f.offset = 0; + f.src = __PRETTY_FUNCTION__; + f.data = slin_speex_ex; + return &f; +} + +static struct ast_frame *speextolin_sample(void) +{ + static struct ast_frame f; + f.frametype = AST_FRAME_VOICE; + f.subclass = AST_FORMAT_SPEEX; + f.datalen = sizeof(speex_slin_ex); + /* All frames are 20 ms long */ + f.samples = SPEEX_SAMPLES; + f.mallocd = 0; + f.offset = 0; + f.src = __PRETTY_FUNCTION__; + f.data = speex_slin_ex; + return &f; +} + +/*! \brief convert and store into outbuf */ +static int speextolin_framein(struct ast_trans_pvt *pvt, struct ast_frame *f) +{ + struct speex_coder_pvt *tmp = pvt->pvt; + + /* Assuming there's space left, decode into the current buffer at + the tail location. Read in as many frames as there are */ + int x; + int res; + int16_t *dst = (int16_t *)pvt->outbuf; + /* XXX fout is a temporary buffer, may have different types */ +#ifdef _SPEEX_TYPES_H + spx_int16_t fout[1024]; +#else + float fout[1024]; +#endif + + if (f->datalen == 0) { /* Native PLC interpolation */ + if (pvt->samples + tmp->framesize > BUFFER_SAMPLES) { + ast_log(LOG_WARNING, "Out of buffer space\n"); + return -1; + } +#ifdef _SPEEX_TYPES_H + speex_decode_int(tmp->speex, NULL, dst + pvt->samples); +#else + speex_decode(tmp->speex, NULL, fout); + for (x=0;x<tmp->framesize;x++) { + dst[pvt->samples + x] = (int16_t)fout[x]; + } +#endif + pvt->samples += tmp->framesize; + pvt->datalen += 2 * tmp->framesize; /* 2 bytes/sample */ + return 0; + } + + /* Read in bits */ + speex_bits_read_from(&tmp->bits, f->data, f->datalen); + for (;;) { +#ifdef _SPEEX_TYPES_H + res = speex_decode_int(tmp->speex, &tmp->bits, fout); +#else + res = speex_decode(tmp->speex, &tmp->bits, fout); +#endif + if (res < 0) + break; + if (pvt->samples + tmp->framesize > BUFFER_SAMPLES) { + ast_log(LOG_WARNING, "Out of buffer space\n"); + return -1; + } + for (x = 0 ; x < tmp->framesize; x++) + dst[pvt->samples + x] = (int16_t)fout[x]; + pvt->samples += tmp->framesize; + pvt->datalen += 2 * tmp->framesize; /* 2 bytes/sample */ + } + return 0; +} + +/*! \brief store input frame in work buffer */ +static int lintospeex_framein(struct ast_trans_pvt *pvt, struct ast_frame *f) +{ + struct speex_coder_pvt *tmp = pvt->pvt; + + /* XXX We should look at how old the rest of our stream is, and if it + is too old, then we should overwrite it entirely, otherwise we can + get artifacts of earlier talk that do not belong */ + memcpy(tmp->buf + pvt->samples, f->data, f->datalen); + pvt->samples += f->samples; + return 0; +} + +/*! \brief convert work buffer and produce output frame */ +static struct ast_frame *lintospeex_frameout(struct ast_trans_pvt *pvt) +{ + struct speex_coder_pvt *tmp = pvt->pvt; + int is_speech=1; + int datalen = 0; /* output bytes */ + int samples = 0; /* output samples */ + + /* We can't work on anything less than a frame in size */ + if (pvt->samples < tmp->framesize) + return NULL; + speex_bits_reset(&tmp->bits); + while (pvt->samples >= tmp->framesize) { +#ifdef _SPEEX_TYPES_H + /* Preprocess audio */ + if (preproc) + is_speech = speex_preprocess(tmp->pp, tmp->buf + samples, NULL); + /* Encode a frame of data */ + if (is_speech) { + /* If DTX enabled speex_encode returns 0 during silence */ + is_speech = speex_encode_int(tmp->speex, tmp->buf + samples, &tmp->bits) || !dtx; + } else { + /* 5 zeros interpreted by Speex as silence (submode 0) */ + speex_bits_pack(&tmp->bits, 0, 5); + } +#else + { + float fbuf[1024]; + int x; + /* Convert to floating point */ + for (x = 0; x < tmp->framesize; x++) + fbuf[x] = tmp->buf[samples + x]; + /* Encode a frame of data */ + is_speech = speex_encode(tmp->speex, fbuf, &tmp->bits) || !dtx; + } +#endif + samples += tmp->framesize; + pvt->samples -= tmp->framesize; + } + + /* Move the data at the end of the buffer to the front */ + if (pvt->samples) + memmove(tmp->buf, tmp->buf + samples, pvt->samples * 2); + + /* Use AST_FRAME_CNG to signify the start of any silence period */ + if (is_speech) { + tmp->silent_state = 0; + } else { + if (tmp->silent_state) { + return NULL; + } else { + tmp->silent_state = 1; + speex_bits_reset(&tmp->bits); + memset(&pvt->f, 0, sizeof(pvt->f)); + pvt->f.frametype = AST_FRAME_CNG; + pvt->f.samples = samples; + /* XXX what now ? format etc... */ + } + } + + /* Terminate bit stream */ + speex_bits_pack(&tmp->bits, 15, 5); + datalen = speex_bits_write(&tmp->bits, pvt->outbuf, pvt->t->buf_size); + return ast_trans_frameout(pvt, datalen, samples); +} + +static void speextolin_destroy(struct ast_trans_pvt *arg) +{ + struct speex_coder_pvt *pvt = arg->pvt; + + speex_decoder_destroy(pvt->speex); + speex_bits_destroy(&pvt->bits); +} + +static void lintospeex_destroy(struct ast_trans_pvt *arg) +{ + struct speex_coder_pvt *pvt = arg->pvt; +#ifdef _SPEEX_TYPES_H + if (preproc) + speex_preprocess_state_destroy(pvt->pp); +#endif + speex_encoder_destroy(pvt->speex); + speex_bits_destroy(&pvt->bits); +} + +static struct ast_translator speextolin = { + .name = "speextolin", + .srcfmt = AST_FORMAT_SPEEX, + .dstfmt = AST_FORMAT_SLINEAR, + .newpvt = speextolin_new, + .framein = speextolin_framein, + .destroy = speextolin_destroy, + .sample = speextolin_sample, + .desc_size = sizeof(struct speex_coder_pvt), + .buffer_samples = BUFFER_SAMPLES, + .buf_size = BUFFER_SAMPLES * 2, + .native_plc = 1, +}; + +static struct ast_translator lintospeex = { + .name = "lintospeex", + .srcfmt = AST_FORMAT_SLINEAR, + .dstfmt = AST_FORMAT_SPEEX, + .newpvt = lintospeex_new, + .framein = lintospeex_framein, + .frameout = lintospeex_frameout, + .destroy = lintospeex_destroy, + .sample = lintospeex_sample, + .desc_size = sizeof(struct speex_coder_pvt), + .buffer_samples = BUFFER_SAMPLES, + .buf_size = BUFFER_SAMPLES * 2, /* XXX maybe a lot less ? */ +}; + +static int parse_config(int reload) +{ + struct ast_flags config_flags = { reload ? CONFIG_FLAG_FILEUNCHANGED : 0 }; + struct ast_config *cfg = ast_config_load("codecs.conf", config_flags); + struct ast_variable *var; + int res; + float res_f; + + if (cfg == NULL) + return 0; + if (cfg == CONFIG_STATUS_FILEUNCHANGED) + return 0; + + for (var = ast_variable_browse(cfg, "speex"); var; var = var->next) { + if (!strcasecmp(var->name, "quality")) { + res = abs(atoi(var->value)); + if (res > -1 && res < 11) { + ast_verb(3, "CODEC SPEEX: Setting Quality to %d\n",res); + quality = res; + } else + ast_log(LOG_ERROR,"Error Quality must be 0-10\n"); + } else if (!strcasecmp(var->name, "complexity")) { + res = abs(atoi(var->value)); + if (res > -1 && res < 11) { + ast_verb(3, "CODEC SPEEX: Setting Complexity to %d\n",res); + complexity = res; + } else + ast_log(LOG_ERROR,"Error! Complexity must be 0-10\n"); + } else if (!strcasecmp(var->name, "vbr_quality")) { + if (sscanf(var->value, "%f", &res_f) == 1 && res_f >= 0 && res_f <= 10) { + ast_verb(3, "CODEC SPEEX: Setting VBR Quality to %f\n",res_f); + vbr_quality = res_f; + } else + ast_log(LOG_ERROR,"Error! VBR Quality must be 0-10\n"); + } else if (!strcasecmp(var->name, "abr_quality")) { + ast_log(LOG_ERROR,"Error! ABR Quality setting obsolete, set ABR to desired bitrate\n"); + } else if (!strcasecmp(var->name, "enhancement")) { + enhancement = ast_true(var->value) ? 1 : 0; + ast_verb(3, "CODEC SPEEX: Perceptual Enhancement Mode. [%s]\n",enhancement ? "on" : "off"); + } else if (!strcasecmp(var->name, "vbr")) { + vbr = ast_true(var->value) ? 1 : 0; + ast_verb(3, "CODEC SPEEX: VBR Mode. [%s]\n",vbr ? "on" : "off"); + } else if (!strcasecmp(var->name, "abr")) { + res = abs(atoi(var->value)); + if (res >= 0) { + if (res > 0) + ast_verb(3, "CODEC SPEEX: Setting ABR target bitrate to %d\n",res); + else + ast_verb(3, "CODEC SPEEX: Disabling ABR\n"); + abr = res; + } else + ast_log(LOG_ERROR,"Error! ABR target bitrate must be >= 0\n"); + } else if (!strcasecmp(var->name, "vad")) { + vad = ast_true(var->value) ? 1 : 0; + ast_verb(3, "CODEC SPEEX: VAD Mode. [%s]\n",vad ? "on" : "off"); + } else if (!strcasecmp(var->name, "dtx")) { + dtx = ast_true(var->value) ? 1 : 0; + ast_verb(3, "CODEC SPEEX: DTX Mode. [%s]\n",dtx ? "on" : "off"); + } else if (!strcasecmp(var->name, "preprocess")) { + preproc = ast_true(var->value) ? 1 : 0; + ast_verb(3, "CODEC SPEEX: Preprocessing. [%s]\n",preproc ? "on" : "off"); + } else if (!strcasecmp(var->name, "pp_vad")) { + pp_vad = ast_true(var->value) ? 1 : 0; + ast_verb(3, "CODEC SPEEX: Preprocessor VAD. [%s]\n",pp_vad ? "on" : "off"); + } else if (!strcasecmp(var->name, "pp_agc")) { + pp_agc = ast_true(var->value) ? 1 : 0; + ast_verb(3, "CODEC SPEEX: Preprocessor AGC. [%s]\n",pp_agc ? "on" : "off"); + } else if (!strcasecmp(var->name, "pp_agc_level")) { + if (sscanf(var->value, "%f", &res_f) == 1 && res_f >= 0) { + ast_verb(3, "CODEC SPEEX: Setting preprocessor AGC Level to %f\n",res_f); + pp_agc_level = res_f; + } else + ast_log(LOG_ERROR,"Error! Preprocessor AGC Level must be >= 0\n"); + } else if (!strcasecmp(var->name, "pp_denoise")) { + pp_denoise = ast_true(var->value) ? 1 : 0; + ast_verb(3, "CODEC SPEEX: Preprocessor Denoise. [%s]\n",pp_denoise ? "on" : "off"); + } else if (!strcasecmp(var->name, "pp_dereverb")) { + pp_dereverb = ast_true(var->value) ? 1 : 0; + ast_verb(3, "CODEC SPEEX: Preprocessor Dereverb. [%s]\n",pp_dereverb ? "on" : "off"); + } else if (!strcasecmp(var->name, "pp_dereverb_decay")) { + if (sscanf(var->value, "%f", &res_f) == 1 && res_f >= 0) { + ast_verb(3, "CODEC SPEEX: Setting preprocessor Dereverb Decay to %f\n",res_f); + pp_dereverb_decay = res_f; + } else + ast_log(LOG_ERROR,"Error! Preprocessor Dereverb Decay must be >= 0\n"); + } else if (!strcasecmp(var->name, "pp_dereverb_level")) { + if (sscanf(var->value, "%f", &res_f) == 1 && res_f >= 0) { + ast_verb(3, "CODEC SPEEX: Setting preprocessor Dereverb Level to %f\n",res_f); + pp_dereverb_level = res_f; + } else + ast_log(LOG_ERROR,"Error! Preprocessor Dereverb Level must be >= 0\n"); + } + } + ast_config_destroy(cfg); + return 0; +} + +static int reload(void) +{ + if (parse_config(1)) + return AST_MODULE_LOAD_DECLINE; + return AST_MODULE_LOAD_SUCCESS; +} + +static int unload_module(void) +{ + int res; + + res = ast_unregister_translator(&lintospeex); + res |= ast_unregister_translator(&speextolin); + + return res; +} + +static int load_module(void) +{ + int res; + + if (parse_config(0)) + return AST_MODULE_LOAD_DECLINE; + res=ast_register_translator(&speextolin); + if (!res) + res=ast_register_translator(&lintospeex); + else + ast_unregister_translator(&speextolin); + if (res) + return AST_MODULE_LOAD_FAILURE; + return AST_MODULE_LOAD_SUCCESS; +} + +AST_MODULE_INFO(ASTERISK_GPL_KEY, AST_MODFLAG_DEFAULT, "Speex Coder/Decoder", + .load = load_module, + .unload = unload_module, + .reload = reload, + ); |