aboutsummaryrefslogtreecommitdiffstats
path: root/trunk/codecs/codec_speex.c
diff options
context:
space:
mode:
Diffstat (limited to 'trunk/codecs/codec_speex.c')
-rw-r--r--trunk/codecs/codec_speex.c503
1 files changed, 503 insertions, 0 deletions
diff --git a/trunk/codecs/codec_speex.c b/trunk/codecs/codec_speex.c
new file mode 100644
index 000000000..035f9958b
--- /dev/null
+++ b/trunk/codecs/codec_speex.c
@@ -0,0 +1,503 @@
+/*
+ * Asterisk -- An open source telephony toolkit.
+ *
+ * Copyright (C) 1999 - 2005, Digium, Inc.
+ *
+ * Mark Spencer <markster@digium.com>
+ *
+ *
+ * See http://www.asterisk.org for more information about
+ * the Asterisk project. Please do not directly contact
+ * any of the maintainers of this project for assistance;
+ * the project provides a web site, mailing lists and IRC
+ * channels for your use.
+ *
+ * This program is free software, distributed under the terms of
+ * the GNU General Public License Version 2. See the LICENSE file
+ * at the top of the source tree.
+ */
+
+/*! \file
+ *
+ * \brief Translate between signed linear and Speex (Open Codec)
+ *
+ * \note This work was motivated by Jeremy McNamara
+ * hacked to be configurable by anthm and bkw 9/28/2004
+ *
+ * \ingroup codecs
+ *
+ * \extref The Speex library - http://www.speex.org
+ *
+ */
+
+/*** MODULEINFO
+ <depend>speex</depend>
+ <use>speexdsp</use>
+ ***/
+
+#include "asterisk.h"
+
+ASTERISK_FILE_VERSION(__FILE__, "$Revision$")
+
+#include <speex/speex.h>
+
+/* We require a post 1.1.8 version of Speex to enable preprocessing
+ and better type handling */
+#ifdef _SPEEX_TYPES_H
+#include <speex/speex_preprocess.h>
+#endif
+
+#include "asterisk/translate.h"
+#include "asterisk/module.h"
+#include "asterisk/config.h"
+#include "asterisk/utils.h"
+
+/* Sample frame data */
+#include "slin_speex_ex.h"
+#include "speex_slin_ex.h"
+
+/* codec variables */
+static int quality = 3;
+static int complexity = 2;
+static int enhancement = 0;
+static int vad = 0;
+static int vbr = 0;
+static float vbr_quality = 4;
+static int abr = 0;
+static int dtx = 0; /* set to 1 to enable silence detection */
+
+static int preproc = 0;
+static int pp_vad = 0;
+static int pp_agc = 0;
+static float pp_agc_level = 8000; /* XXX what is this 8000 ? */
+static int pp_denoise = 0;
+static int pp_dereverb = 0;
+static float pp_dereverb_decay = 0.4;
+static float pp_dereverb_level = 0.3;
+
+#define TYPE_SILENCE 0x2
+#define TYPE_HIGH 0x0
+#define TYPE_LOW 0x1
+#define TYPE_MASK 0x3
+
+#define BUFFER_SAMPLES 8000
+#define SPEEX_SAMPLES 160
+
+struct speex_coder_pvt {
+ void *speex;
+ SpeexBits bits;
+ int framesize;
+ int silent_state;
+#ifdef _SPEEX_TYPES_H
+ SpeexPreprocessState *pp;
+ spx_int16_t buf[BUFFER_SAMPLES];
+#else
+ int16_t buf[BUFFER_SAMPLES]; /* input, waiting to be compressed */
+#endif
+};
+
+
+static int lintospeex_new(struct ast_trans_pvt *pvt)
+{
+ struct speex_coder_pvt *tmp = pvt->pvt;
+
+ if (!(tmp->speex = speex_encoder_init(&speex_nb_mode)))
+ return -1;
+
+ speex_bits_init(&tmp->bits);
+ speex_bits_reset(&tmp->bits);
+ speex_encoder_ctl(tmp->speex, SPEEX_GET_FRAME_SIZE, &tmp->framesize);
+ speex_encoder_ctl(tmp->speex, SPEEX_SET_COMPLEXITY, &complexity);
+#ifdef _SPEEX_TYPES_H
+ if (preproc) {
+ tmp->pp = speex_preprocess_state_init(tmp->framesize, 8000); /* XXX what is this 8000 ? */
+ speex_preprocess_ctl(tmp->pp, SPEEX_PREPROCESS_SET_VAD, &pp_vad);
+ speex_preprocess_ctl(tmp->pp, SPEEX_PREPROCESS_SET_AGC, &pp_agc);
+ speex_preprocess_ctl(tmp->pp, SPEEX_PREPROCESS_SET_AGC_LEVEL, &pp_agc_level);
+ speex_preprocess_ctl(tmp->pp, SPEEX_PREPROCESS_SET_DENOISE, &pp_denoise);
+ speex_preprocess_ctl(tmp->pp, SPEEX_PREPROCESS_SET_DEREVERB, &pp_dereverb);
+ speex_preprocess_ctl(tmp->pp, SPEEX_PREPROCESS_SET_DEREVERB_DECAY, &pp_dereverb_decay);
+ speex_preprocess_ctl(tmp->pp, SPEEX_PREPROCESS_SET_DEREVERB_LEVEL, &pp_dereverb_level);
+ }
+#endif
+ if (!abr && !vbr) {
+ speex_encoder_ctl(tmp->speex, SPEEX_SET_QUALITY, &quality);
+ if (vad)
+ speex_encoder_ctl(tmp->speex, SPEEX_SET_VAD, &vad);
+ }
+ if (vbr) {
+ speex_encoder_ctl(tmp->speex, SPEEX_SET_VBR, &vbr);
+ speex_encoder_ctl(tmp->speex, SPEEX_SET_VBR_QUALITY, &vbr_quality);
+ }
+ if (abr)
+ speex_encoder_ctl(tmp->speex, SPEEX_SET_ABR, &abr);
+ if (dtx)
+ speex_encoder_ctl(tmp->speex, SPEEX_SET_DTX, &dtx);
+ tmp->silent_state = 0;
+
+ return 0;
+}
+
+static int speextolin_new(struct ast_trans_pvt *pvt)
+{
+ struct speex_coder_pvt *tmp = pvt->pvt;
+
+ if (!(tmp->speex = speex_decoder_init(&speex_nb_mode)))
+ return -1;
+
+ speex_bits_init(&tmp->bits);
+ speex_decoder_ctl(tmp->speex, SPEEX_GET_FRAME_SIZE, &tmp->framesize);
+ if (enhancement)
+ speex_decoder_ctl(tmp->speex, SPEEX_SET_ENH, &enhancement);
+
+ return 0;
+}
+
+static struct ast_frame *lintospeex_sample(void)
+{
+ static struct ast_frame f;
+ f.frametype = AST_FRAME_VOICE;
+ f.subclass = AST_FORMAT_SLINEAR;
+ f.datalen = sizeof(slin_speex_ex);
+ /* Assume 8000 Hz */
+ f.samples = sizeof(slin_speex_ex)/2;
+ f.mallocd = 0;
+ f.offset = 0;
+ f.src = __PRETTY_FUNCTION__;
+ f.data = slin_speex_ex;
+ return &f;
+}
+
+static struct ast_frame *speextolin_sample(void)
+{
+ static struct ast_frame f;
+ f.frametype = AST_FRAME_VOICE;
+ f.subclass = AST_FORMAT_SPEEX;
+ f.datalen = sizeof(speex_slin_ex);
+ /* All frames are 20 ms long */
+ f.samples = SPEEX_SAMPLES;
+ f.mallocd = 0;
+ f.offset = 0;
+ f.src = __PRETTY_FUNCTION__;
+ f.data = speex_slin_ex;
+ return &f;
+}
+
+/*! \brief convert and store into outbuf */
+static int speextolin_framein(struct ast_trans_pvt *pvt, struct ast_frame *f)
+{
+ struct speex_coder_pvt *tmp = pvt->pvt;
+
+ /* Assuming there's space left, decode into the current buffer at
+ the tail location. Read in as many frames as there are */
+ int x;
+ int res;
+ int16_t *dst = (int16_t *)pvt->outbuf;
+ /* XXX fout is a temporary buffer, may have different types */
+#ifdef _SPEEX_TYPES_H
+ spx_int16_t fout[1024];
+#else
+ float fout[1024];
+#endif
+
+ if (f->datalen == 0) { /* Native PLC interpolation */
+ if (pvt->samples + tmp->framesize > BUFFER_SAMPLES) {
+ ast_log(LOG_WARNING, "Out of buffer space\n");
+ return -1;
+ }
+#ifdef _SPEEX_TYPES_H
+ speex_decode_int(tmp->speex, NULL, dst + pvt->samples);
+#else
+ speex_decode(tmp->speex, NULL, fout);
+ for (x=0;x<tmp->framesize;x++) {
+ dst[pvt->samples + x] = (int16_t)fout[x];
+ }
+#endif
+ pvt->samples += tmp->framesize;
+ pvt->datalen += 2 * tmp->framesize; /* 2 bytes/sample */
+ return 0;
+ }
+
+ /* Read in bits */
+ speex_bits_read_from(&tmp->bits, f->data, f->datalen);
+ for (;;) {
+#ifdef _SPEEX_TYPES_H
+ res = speex_decode_int(tmp->speex, &tmp->bits, fout);
+#else
+ res = speex_decode(tmp->speex, &tmp->bits, fout);
+#endif
+ if (res < 0)
+ break;
+ if (pvt->samples + tmp->framesize > BUFFER_SAMPLES) {
+ ast_log(LOG_WARNING, "Out of buffer space\n");
+ return -1;
+ }
+ for (x = 0 ; x < tmp->framesize; x++)
+ dst[pvt->samples + x] = (int16_t)fout[x];
+ pvt->samples += tmp->framesize;
+ pvt->datalen += 2 * tmp->framesize; /* 2 bytes/sample */
+ }
+ return 0;
+}
+
+/*! \brief store input frame in work buffer */
+static int lintospeex_framein(struct ast_trans_pvt *pvt, struct ast_frame *f)
+{
+ struct speex_coder_pvt *tmp = pvt->pvt;
+
+ /* XXX We should look at how old the rest of our stream is, and if it
+ is too old, then we should overwrite it entirely, otherwise we can
+ get artifacts of earlier talk that do not belong */
+ memcpy(tmp->buf + pvt->samples, f->data, f->datalen);
+ pvt->samples += f->samples;
+ return 0;
+}
+
+/*! \brief convert work buffer and produce output frame */
+static struct ast_frame *lintospeex_frameout(struct ast_trans_pvt *pvt)
+{
+ struct speex_coder_pvt *tmp = pvt->pvt;
+ int is_speech=1;
+ int datalen = 0; /* output bytes */
+ int samples = 0; /* output samples */
+
+ /* We can't work on anything less than a frame in size */
+ if (pvt->samples < tmp->framesize)
+ return NULL;
+ speex_bits_reset(&tmp->bits);
+ while (pvt->samples >= tmp->framesize) {
+#ifdef _SPEEX_TYPES_H
+ /* Preprocess audio */
+ if (preproc)
+ is_speech = speex_preprocess(tmp->pp, tmp->buf + samples, NULL);
+ /* Encode a frame of data */
+ if (is_speech) {
+ /* If DTX enabled speex_encode returns 0 during silence */
+ is_speech = speex_encode_int(tmp->speex, tmp->buf + samples, &tmp->bits) || !dtx;
+ } else {
+ /* 5 zeros interpreted by Speex as silence (submode 0) */
+ speex_bits_pack(&tmp->bits, 0, 5);
+ }
+#else
+ {
+ float fbuf[1024];
+ int x;
+ /* Convert to floating point */
+ for (x = 0; x < tmp->framesize; x++)
+ fbuf[x] = tmp->buf[samples + x];
+ /* Encode a frame of data */
+ is_speech = speex_encode(tmp->speex, fbuf, &tmp->bits) || !dtx;
+ }
+#endif
+ samples += tmp->framesize;
+ pvt->samples -= tmp->framesize;
+ }
+
+ /* Move the data at the end of the buffer to the front */
+ if (pvt->samples)
+ memmove(tmp->buf, tmp->buf + samples, pvt->samples * 2);
+
+ /* Use AST_FRAME_CNG to signify the start of any silence period */
+ if (is_speech) {
+ tmp->silent_state = 0;
+ } else {
+ if (tmp->silent_state) {
+ return NULL;
+ } else {
+ tmp->silent_state = 1;
+ speex_bits_reset(&tmp->bits);
+ memset(&pvt->f, 0, sizeof(pvt->f));
+ pvt->f.frametype = AST_FRAME_CNG;
+ pvt->f.samples = samples;
+ /* XXX what now ? format etc... */
+ }
+ }
+
+ /* Terminate bit stream */
+ speex_bits_pack(&tmp->bits, 15, 5);
+ datalen = speex_bits_write(&tmp->bits, pvt->outbuf, pvt->t->buf_size);
+ return ast_trans_frameout(pvt, datalen, samples);
+}
+
+static void speextolin_destroy(struct ast_trans_pvt *arg)
+{
+ struct speex_coder_pvt *pvt = arg->pvt;
+
+ speex_decoder_destroy(pvt->speex);
+ speex_bits_destroy(&pvt->bits);
+}
+
+static void lintospeex_destroy(struct ast_trans_pvt *arg)
+{
+ struct speex_coder_pvt *pvt = arg->pvt;
+#ifdef _SPEEX_TYPES_H
+ if (preproc)
+ speex_preprocess_state_destroy(pvt->pp);
+#endif
+ speex_encoder_destroy(pvt->speex);
+ speex_bits_destroy(&pvt->bits);
+}
+
+static struct ast_translator speextolin = {
+ .name = "speextolin",
+ .srcfmt = AST_FORMAT_SPEEX,
+ .dstfmt = AST_FORMAT_SLINEAR,
+ .newpvt = speextolin_new,
+ .framein = speextolin_framein,
+ .destroy = speextolin_destroy,
+ .sample = speextolin_sample,
+ .desc_size = sizeof(struct speex_coder_pvt),
+ .buffer_samples = BUFFER_SAMPLES,
+ .buf_size = BUFFER_SAMPLES * 2,
+ .native_plc = 1,
+};
+
+static struct ast_translator lintospeex = {
+ .name = "lintospeex",
+ .srcfmt = AST_FORMAT_SLINEAR,
+ .dstfmt = AST_FORMAT_SPEEX,
+ .newpvt = lintospeex_new,
+ .framein = lintospeex_framein,
+ .frameout = lintospeex_frameout,
+ .destroy = lintospeex_destroy,
+ .sample = lintospeex_sample,
+ .desc_size = sizeof(struct speex_coder_pvt),
+ .buffer_samples = BUFFER_SAMPLES,
+ .buf_size = BUFFER_SAMPLES * 2, /* XXX maybe a lot less ? */
+};
+
+static int parse_config(int reload)
+{
+ struct ast_flags config_flags = { reload ? CONFIG_FLAG_FILEUNCHANGED : 0 };
+ struct ast_config *cfg = ast_config_load("codecs.conf", config_flags);
+ struct ast_variable *var;
+ int res;
+ float res_f;
+
+ if (cfg == NULL)
+ return 0;
+ if (cfg == CONFIG_STATUS_FILEUNCHANGED)
+ return 0;
+
+ for (var = ast_variable_browse(cfg, "speex"); var; var = var->next) {
+ if (!strcasecmp(var->name, "quality")) {
+ res = abs(atoi(var->value));
+ if (res > -1 && res < 11) {
+ ast_verb(3, "CODEC SPEEX: Setting Quality to %d\n",res);
+ quality = res;
+ } else
+ ast_log(LOG_ERROR,"Error Quality must be 0-10\n");
+ } else if (!strcasecmp(var->name, "complexity")) {
+ res = abs(atoi(var->value));
+ if (res > -1 && res < 11) {
+ ast_verb(3, "CODEC SPEEX: Setting Complexity to %d\n",res);
+ complexity = res;
+ } else
+ ast_log(LOG_ERROR,"Error! Complexity must be 0-10\n");
+ } else if (!strcasecmp(var->name, "vbr_quality")) {
+ if (sscanf(var->value, "%f", &res_f) == 1 && res_f >= 0 && res_f <= 10) {
+ ast_verb(3, "CODEC SPEEX: Setting VBR Quality to %f\n",res_f);
+ vbr_quality = res_f;
+ } else
+ ast_log(LOG_ERROR,"Error! VBR Quality must be 0-10\n");
+ } else if (!strcasecmp(var->name, "abr_quality")) {
+ ast_log(LOG_ERROR,"Error! ABR Quality setting obsolete, set ABR to desired bitrate\n");
+ } else if (!strcasecmp(var->name, "enhancement")) {
+ enhancement = ast_true(var->value) ? 1 : 0;
+ ast_verb(3, "CODEC SPEEX: Perceptual Enhancement Mode. [%s]\n",enhancement ? "on" : "off");
+ } else if (!strcasecmp(var->name, "vbr")) {
+ vbr = ast_true(var->value) ? 1 : 0;
+ ast_verb(3, "CODEC SPEEX: VBR Mode. [%s]\n",vbr ? "on" : "off");
+ } else if (!strcasecmp(var->name, "abr")) {
+ res = abs(atoi(var->value));
+ if (res >= 0) {
+ if (res > 0)
+ ast_verb(3, "CODEC SPEEX: Setting ABR target bitrate to %d\n",res);
+ else
+ ast_verb(3, "CODEC SPEEX: Disabling ABR\n");
+ abr = res;
+ } else
+ ast_log(LOG_ERROR,"Error! ABR target bitrate must be >= 0\n");
+ } else if (!strcasecmp(var->name, "vad")) {
+ vad = ast_true(var->value) ? 1 : 0;
+ ast_verb(3, "CODEC SPEEX: VAD Mode. [%s]\n",vad ? "on" : "off");
+ } else if (!strcasecmp(var->name, "dtx")) {
+ dtx = ast_true(var->value) ? 1 : 0;
+ ast_verb(3, "CODEC SPEEX: DTX Mode. [%s]\n",dtx ? "on" : "off");
+ } else if (!strcasecmp(var->name, "preprocess")) {
+ preproc = ast_true(var->value) ? 1 : 0;
+ ast_verb(3, "CODEC SPEEX: Preprocessing. [%s]\n",preproc ? "on" : "off");
+ } else if (!strcasecmp(var->name, "pp_vad")) {
+ pp_vad = ast_true(var->value) ? 1 : 0;
+ ast_verb(3, "CODEC SPEEX: Preprocessor VAD. [%s]\n",pp_vad ? "on" : "off");
+ } else if (!strcasecmp(var->name, "pp_agc")) {
+ pp_agc = ast_true(var->value) ? 1 : 0;
+ ast_verb(3, "CODEC SPEEX: Preprocessor AGC. [%s]\n",pp_agc ? "on" : "off");
+ } else if (!strcasecmp(var->name, "pp_agc_level")) {
+ if (sscanf(var->value, "%f", &res_f) == 1 && res_f >= 0) {
+ ast_verb(3, "CODEC SPEEX: Setting preprocessor AGC Level to %f\n",res_f);
+ pp_agc_level = res_f;
+ } else
+ ast_log(LOG_ERROR,"Error! Preprocessor AGC Level must be >= 0\n");
+ } else if (!strcasecmp(var->name, "pp_denoise")) {
+ pp_denoise = ast_true(var->value) ? 1 : 0;
+ ast_verb(3, "CODEC SPEEX: Preprocessor Denoise. [%s]\n",pp_denoise ? "on" : "off");
+ } else if (!strcasecmp(var->name, "pp_dereverb")) {
+ pp_dereverb = ast_true(var->value) ? 1 : 0;
+ ast_verb(3, "CODEC SPEEX: Preprocessor Dereverb. [%s]\n",pp_dereverb ? "on" : "off");
+ } else if (!strcasecmp(var->name, "pp_dereverb_decay")) {
+ if (sscanf(var->value, "%f", &res_f) == 1 && res_f >= 0) {
+ ast_verb(3, "CODEC SPEEX: Setting preprocessor Dereverb Decay to %f\n",res_f);
+ pp_dereverb_decay = res_f;
+ } else
+ ast_log(LOG_ERROR,"Error! Preprocessor Dereverb Decay must be >= 0\n");
+ } else if (!strcasecmp(var->name, "pp_dereverb_level")) {
+ if (sscanf(var->value, "%f", &res_f) == 1 && res_f >= 0) {
+ ast_verb(3, "CODEC SPEEX: Setting preprocessor Dereverb Level to %f\n",res_f);
+ pp_dereverb_level = res_f;
+ } else
+ ast_log(LOG_ERROR,"Error! Preprocessor Dereverb Level must be >= 0\n");
+ }
+ }
+ ast_config_destroy(cfg);
+ return 0;
+}
+
+static int reload(void)
+{
+ if (parse_config(1))
+ return AST_MODULE_LOAD_DECLINE;
+ return AST_MODULE_LOAD_SUCCESS;
+}
+
+static int unload_module(void)
+{
+ int res;
+
+ res = ast_unregister_translator(&lintospeex);
+ res |= ast_unregister_translator(&speextolin);
+
+ return res;
+}
+
+static int load_module(void)
+{
+ int res;
+
+ if (parse_config(0))
+ return AST_MODULE_LOAD_DECLINE;
+ res=ast_register_translator(&speextolin);
+ if (!res)
+ res=ast_register_translator(&lintospeex);
+ else
+ ast_unregister_translator(&speextolin);
+ if (res)
+ return AST_MODULE_LOAD_FAILURE;
+ return AST_MODULE_LOAD_SUCCESS;
+}
+
+AST_MODULE_INFO(ASTERISK_GPL_KEY, AST_MODFLAG_DEFAULT, "Speex Coder/Decoder",
+ .load = load_module,
+ .unload = unload_module,
+ .reload = reload,
+ );