/* * Asterisk -- An open source telephony toolkit. * * Copyright 2007-2008, Marta Carbone, Sergio Fadda, Luigi Rizzo * * See http://www.asterisk.org for more information about * the Asterisk project. Please do not directly contact * any of the maintainers of this project for assistance; * the project provides a web site, mailing lists and IRC * channels for your use. * * This program is free software, distributed under the terms of * the GNU General Public License Version 2. See the LICENSE file * at the top of the source tree. */ /* * Experimental support for video sessions. We use SDL for rendering, ffmpeg * as the codec library for encoding and decoding, and Video4Linux and X11 * to generate the local video stream. * * If one of these pieces is not available, either at compile time or at * runtime, we do our best to run without it. Of course, no codec library * means we can only deal with raw data, no SDL means we cannot do rendering, * no V4L or X11 means we cannot generate data (but in principle we could * stream from or record to a file). * * We need a recent (2007.07.12 or newer) version of ffmpeg to avoid warnings. * Older versions might give 'deprecated' messages during compilation, * thus not compiling in AST_DEVMODE, or don't have swscale, in which case * you can try to compile #defining OLD_FFMPEG here. * * $Revision$ */ //#define DROP_PACKETS 5 /* if set, drop this % of video packets */ //#define OLD_FFMPEG 1 /* set for old ffmpeg with no swscale */ #include "asterisk.h" ASTERISK_FILE_VERSION(__FILE__, "$Revision$") #include #include "asterisk/cli.h" #include "asterisk/file.h" #include "asterisk/channel.h" #include "console_video.h" /* The code is structured as follows. When a new console channel is created, we call console_video_start() to initialize SDL, the source, and the encoder/ decoder for the formats in use (XXX the latter two should be done later, once the codec negotiation is complete). Also, a thread is created to handle the video source and generate frames. While communication is on, the local source is generated by the video thread, which wakes up periodically, generates frames and enqueues them in chan->readq. Incoming rtp frames are passed to console_write_video(), decoded and passed to SDL for display. For as unfortunate and confusing as it can be, we need to deal with a number of different video representations (size, codec/pixel format, codec parameters), as follows: loc_src is the data coming from the camera/X11/etc. The format is typically constrained by the video source. enc_in is the input required by the encoder. Typically constrained in size by the encoder type. enc_out is the bitstream transmitted over RTP. Typically negotiated while the call is established. loc_dpy is the format used to display the local video source. Depending on user preferences this can have the same size as loc_src_fmt, or enc_in_fmt, or thumbnail size (e.g. PiP output) dec_in is the incoming RTP bitstream. Negotiated during call establishment, it is not necessarily the same as enc_in_fmt dec_out the output of the decoder. The format is whatever the other side sends, and the buffer is allocated by avcodec_decode_... so we only copy the data here. rem_dpy the format used to display the remote stream src_dpy is the format used to display the local video source streams The number of these fbuf_t is determined at run time, with dynamic allocation We store the format info together with the buffer storing the data. As a future optimization, a format/buffer may reference another one if the formats are equivalent. This will save some unnecessary format conversion. In order to handle video you need to add to sip.conf (and presumably iax.conf too) the following: [general](+) videosupport=yes allow=h263 ; this or other video formats allow=h263p ; this or other video formats */ /* * Codecs are absolutely necessary or we cannot do anything. * SDL is optional (used for rendering only), so that we can still * stream video withouth displaying it. */ #if !defined(HAVE_VIDEO_CONSOLE) || !defined(HAVE_FFMPEG) /* stubs if required pieces are missing */ int console_write_video(struct ast_channel *chan, struct ast_frame *f) { return 0; /* writing video not supported */ } int console_video_cli(struct video_desc *env, const char *var, int fd) { return 1; /* nothing matched */ } int console_video_config(struct video_desc **penv, const char *var, const char *val) { return 1; /* no configuration */ } void console_video_start(struct video_desc *env, struct ast_channel *owner) { ast_log(LOG_NOTICE, "voice only, console video support not present\n"); } void console_video_uninit(struct video_desc *env) { } int get_gui_startup(struct video_desc* env) { return 0; /* no gui here */ } int console_video_formats = 0; #else /* defined(HAVE_FFMPEG) && defined(HAVE_SDL) */ /*! The list of video formats we support. */ int console_video_formats = AST_FORMAT_H263_PLUS | AST_FORMAT_H263 | AST_FORMAT_MP4_VIDEO | AST_FORMAT_H264 | AST_FORMAT_H261 ; /* function to scale and encode buffers */ static void my_scale(struct fbuf_t *in, AVPicture *p_in, struct fbuf_t *out, AVPicture *p_out); /* * this structure will be an entry in the table containing * every device specified in the file oss.conf, it contains various infomation * about the device */ struct video_device { char *name; /* name of the device */ /* allocated dynamically (see fill_table function) */ struct grab_desc *grabber; /* the grabber for the device type */ void *grabber_data; /* device's private data structure */ struct fbuf_t *dev_buf; /* buffer for incoming data */ struct timeval last_frame; /* when we read the last frame ? */ int status_index; /* what is the status of the device (source) */ /* status index is set using the IS_ON, IS_PRIMARY and IS_SECONDARY costants */ /* status_index is the index of the status message in the src_msgs array in console_gui.c */ }; struct video_codec_desc; /* forward declaration */ /* * Descriptor of the local source, made of the following pieces: * + configuration info (geometry, device name, fps...). These are read * from the config file and copied here before calling video_out_init(); * + the frame buffer (buf) and source pixel format, allocated at init time; * + the encoding and RTP info, including timestamps to generate * frames at the correct rate; * + source-specific info, i.e. fd for /dev/video, dpy-image for x11, etc, * filled in by grabber_open, part of source_specific information are in * the device table (devices member), others are shared; * NOTE: loc_src.data == NULL means the rest of the struct is invalid, and * the video source is not available. */ struct video_out_desc { /* video device support. * videodevice and geometry are read from the config file. * At the right time we try to open it and allocate a buffer. * If we are successful, webcam_bufsize > 0 and we can read. */ /* all the following is config file info copied from the parent */ int fps; int bitrate; int qmin; int sendvideo; struct fbuf_t loc_src_geometry; /* local source geometry only (from config file) */ struct fbuf_t enc_out; /* encoder output buffer, allocated in video_out_init() */ struct video_codec_desc *enc; /* encoder */ void *enc_ctx; /* encoding context */ AVCodec *codec; AVFrame *enc_in_frame; /* enc_in mapped into avcodec format. */ /* The initial part of AVFrame is an AVPicture */ int mtu; /* Table of devices specified with "videodevice=" in oss.conf. * Static size as we have a limited number of entries. */ struct video_device devices[MAX_VIDEO_SOURCES]; int device_num; /*number of devices in table*/ int device_primary; /*index of the actual primary device in the table*/ int device_secondary; /*index of the actual secondary device in the table*/ int picture_in_picture; /*Is the PiP mode activated? 0 = NO | 1 = YES*/ /* these are the coordinates of the picture inside the picture (visible if PiP mode is active) these coordinates are valid considering the containing buffer with cif geometry*/ int pip_x; int pip_y; }; /* * The overall descriptor, with room for config info, video source and * received data descriptors, SDL info, etc. * This should be globally visible to all modules (grabber, vcodecs, gui) * and contain all configurtion info. */ struct video_desc { char codec_name[64]; /* the codec we use */ int stayopen; /* set if gui starts manually */ pthread_t vthread; /* video thread */ ast_mutex_t dec_lock; /* sync decoder and video thread */ int shutdown; /* set to shutdown vthread */ struct ast_channel *owner; /* owner channel */ struct fbuf_t enc_in; /* encoder input buffer, allocated in video_out_init() */ char keypad_file[256]; /* image for the keypad */ char keypad_font[256]; /* font for the keypad */ char sdl_videodriver[256]; struct fbuf_t rem_dpy; /* display remote video, no buffer (it is in win[WIN_REMOTE].bmp) */ struct fbuf_t loc_dpy; /* display local source, no buffer (managed by SDL in bmp[1]) */ /* geometry of the thumbnails for all video sources. */ struct fbuf_t src_dpy[MAX_VIDEO_SOURCES]; /* no buffer allocated here */ int frame_freeze; /* flag to freeze the incoming frame */ /* local information for grabbers, codecs, gui */ struct gui_info *gui; struct video_dec_desc *in; /* remote video descriptor */ struct video_out_desc out; /* local video descriptor */ }; static AVPicture *fill_pict(struct fbuf_t *b, AVPicture *p); void fbuf_free(struct fbuf_t *b) { struct fbuf_t x = *b; if (b->data && b->size) ast_free(b->data); memset(b, '\0', sizeof(*b)); /* restore some fields */ b->w = x.w; b->h = x.h; b->pix_fmt = x.pix_fmt; } /* return the status of env->stayopen to chan_oss, as the latter * does not have access to fields of struct video_desc */ int get_gui_startup(struct video_desc* env) { return env ? env->stayopen : 0; } #if 0 /* helper function to print the amount of memory used by the process. * Useful to track memory leaks, unfortunately this code is OS-specific * so we keep it commented out. */ static int used_mem(const char *msg) { char in[128]; pid_t pid = getpid(); sprintf(in, "ps -o vsz= -o rss= %d", pid); ast_log(LOG_WARNING, "used mem (vsize, rss) %s ", msg); system(in); return 0; } #endif #include "vcodecs.c" #include "console_gui.c" /*! \brief Try to open video sources, return 0 on success, 1 on error * opens all video sources found in the oss.conf configuration files. * Saves the grabber and the datas in the device table (in the devices field * of the descriptor referenced by v). * Initializes the device_primary and device_secondary * fields of v with the first devices that was * successfully opened. * * \param v = video out environment descriptor * * returns 0 on success, 1 on error */ static int grabber_open(struct video_out_desc *v) { struct grab_desc *g; void *g_data; int i, j; /* for each device in the device table... */ for (i = 0; i < v->device_num; i++) { /* device already open */ if (v->devices[i].grabber) continue; /* for each type of grabber supported... */ for (j = 0; (g = console_grabbers[j]); j++) { /* the grabber is opened and the informations saved in the device table */ g_data = g->open(v->devices[i].name, &v->loc_src_geometry, v->fps); if (!g_data) continue; v->devices[i].grabber = g; v->devices[i].grabber_data = g_data; v->devices[i].status_index |= IS_ON; } } /* the first working device is selected as the primary one and the secondary one */ for (i = 0; i < v->device_num; i++) { if (!v->devices[i].grabber) continue; v->device_primary = i; v->device_secondary = i; return 0; /* source found */ } return 1; /* no source found */ } /*! \brief complete a buffer from the specified local video source. * Called by get_video_frames(), in turn called by the video thread. * * \param dev = video environment descriptor * \param fps = frame per seconds, for every device * * returns: * - NULL on falure * - reference to the device buffer on success */ static struct fbuf_t *grabber_read(struct video_device *dev, int fps) { struct timeval now = ast_tvnow(); if (dev->grabber == NULL) /* not initialized */ return NULL; /* the last_frame field in this row of the device table (dev) is always initialized, it is set during the parsing of the config file, and never unset, function fill_device_table(). */ /* check if it is time to read */ if (ast_tvdiff_ms(now, dev->last_frame) < 1000/fps) return NULL; /* too early */ dev->last_frame = now; /* XXX actually, should correct for drift */ return dev->grabber->read(dev->grabber_data); } /*! \brief handler run when dragging with the left button on * the local source window - the effect is to move the offset * of the captured area. */ static void grabber_move(struct video_device *dev, int dx, int dy) { if (dev->grabber && dev->grabber->move) dev->grabber->move(dev->grabber_data, dx, dy); } /* * Map the codec name to the library. If not recognised, use a default. * This is useful in the output path where we decide by name, presumably. */ static struct video_codec_desc *map_config_video_format(char *name) { int i; for (i = 0; supported_codecs[i]; i++) if (!strcasecmp(name, supported_codecs[i]->name)) break; if (supported_codecs[i] == NULL) { ast_log(LOG_WARNING, "Cannot find codec for '%s'\n", name); i = 0; strcpy(name, supported_codecs[i]->name); } ast_log(LOG_WARNING, "Using codec '%s'\n", name); return supported_codecs[i]; } /*! \brief uninitialize the descriptor for local video stream */ static int video_out_uninit(struct video_desc *env) { struct video_out_desc *v = &env->out; int i; /* integer variable used as iterator */ /* XXX this should be a codec callback */ if (v->enc_ctx) { AVCodecContext *enc_ctx = (AVCodecContext *)v->enc_ctx; avcodec_close(enc_ctx); av_free(enc_ctx); v->enc_ctx = NULL; } if (v->enc_in_frame) { av_free(v->enc_in_frame); v->enc_in_frame = NULL; } v->codec = NULL; /* nothing to free, this is only a reference */ /* release the buffers */ fbuf_free(&env->enc_in); fbuf_free(&v->enc_out); /* close the grabbers */ for (i = 0; i < v->device_num; i++) { if (v->devices[i].grabber){ v->devices[i].grabber_data = v->devices[i].grabber->close(v->devices[i].grabber_data); v->devices[i].grabber = NULL; /* dev_buf is already freed by grabber->close() */ v->devices[i].dev_buf = NULL; } v->devices[i].status_index = 0; } v->picture_in_picture = 0; env->frame_freeze = 0; return -1; } /* * Initialize the encoder for the local source: * - enc_ctx, codec, enc_in_frame are used by ffmpeg for encoding; * - enc_out is used to store the encoded frame (to be sent) * - mtu is used to determine the max size of video fragment * NOTE: we enter here with the video source already open. */ static int video_out_init(struct video_desc *env) { int codec; int size; struct fbuf_t *enc_in; struct video_out_desc *v = &env->out; v->enc_ctx = NULL; v->codec = NULL; v->enc_in_frame = NULL; v->enc_out.data = NULL; codec = map_video_format(v->enc->format, CM_WR); v->codec = avcodec_find_encoder(codec); if (!v->codec) { ast_log(LOG_WARNING, "Cannot find the encoder for format %d\n", codec); return -1; /* error, but nothing to undo yet */ } v->mtu = 1400; /* set it early so the encoder can use it */ /* allocate the input buffer for encoding. * ffmpeg only supports PIX_FMT_YUV420P for the encoding. */ enc_in = &env->enc_in; enc_in->pix_fmt = PIX_FMT_YUV420P; enc_in->size = (enc_in->w * enc_in->h * 3)/2; enc_in->data = ast_calloc(1, enc_in->size); if (!enc_in->data) { ast_log(LOG_WARNING, "Cannot allocate encoder input buffer\n"); return video_out_uninit(env); } /* construct an AVFrame that points into buf_in */ v->enc_in_frame = avcodec_alloc_frame(); if (!v->enc_in_frame) { ast_log(LOG_WARNING, "Unable to allocate the encoding video frame\n"); return video_out_uninit(env); } /* parameters for PIX_FMT_YUV420P */ size = enc_in->w * enc_in->h; v->enc_in_frame->data[0] = enc_in->data; v->enc_in_frame->data[1] = v->enc_in_frame->data[0] + size; v->enc_in_frame->data[2] = v->enc_in_frame->data[1] + size/4; v->enc_in_frame->linesize[0] = enc_in->w; v->enc_in_frame->linesize[1] = enc_in->w/2; v->enc_in_frame->linesize[2] = enc_in->w/2; /* now setup the parameters for the encoder. * XXX should be codec-specific */ { AVCodecContext *enc_ctx = avcodec_alloc_context(); v->enc_ctx = enc_ctx; enc_ctx->pix_fmt = enc_in->pix_fmt; enc_ctx->width = enc_in->w; enc_ctx->height = enc_in->h; /* XXX rtp_callback ? * rtp_mode so ffmpeg inserts as many start codes as possible. */ enc_ctx->rtp_mode = 1; enc_ctx->rtp_payload_size = v->mtu / 2; // mtu/2 enc_ctx->bit_rate = v->bitrate; enc_ctx->bit_rate_tolerance = enc_ctx->bit_rate/2; enc_ctx->qmin = v->qmin; /* should be configured */ enc_ctx->time_base = (AVRational){1, v->fps}; enc_ctx->gop_size = v->fps*5; // emit I frame every 5 seconds v->enc->enc_init(v->enc_ctx); if (avcodec_open(enc_ctx, v->codec) < 0) { ast_log(LOG_WARNING, "Unable to initialize the encoder %d\n", codec); av_free(enc_ctx); v->enc_ctx = NULL; return video_out_uninit(env); } } /* * Allocate enough for the encoded bitstream. As we are compressing, * we hope that the output is never larger than the input size. */ v->enc_out.data = ast_calloc(1, enc_in->size); v->enc_out.size = enc_in->size; v->enc_out.used = 0; return 0; } /*! \brief possibly uninitialize the video console. * Called at the end of a call, should reset the 'owner' field, * then possibly terminate the video thread if the gui has * not been started manually. * In practice, signal the thread and give it a bit of time to * complete, giving up if it gets stuck. Because uninit * is called from hangup with the channel locked, and the thread * uses the chan lock, we need to unlock here. This is unsafe, * and we should really use refcounts for the channels. */ void console_video_uninit(struct video_desc *env) { int i, t = 100; /* initial wait is shorter, than make it longer */ if (env->stayopen == 0) { /* gui opened by a call, do the shutdown */ env->shutdown = 1; for (i=0; env->shutdown && i < 10; i++) { if (env->owner) ast_channel_unlock(env->owner); usleep(t); t = 1000000; if (env->owner) ast_channel_lock(env->owner); } env->vthread = NULL; } env->owner = NULL; /* this is unconditional */ } /*! fill an AVPicture from our fbuf info, as it is required by * the image conversion routines in ffmpeg. Note that the pointers * are recalculated if the fbuf has an offset (and so represents a picture in picture) * XXX This depends on the format. */ static AVPicture *fill_pict(struct fbuf_t *b, AVPicture *p) { /* provide defaults for commonly used formats */ int l4 = b->w * b->h/4; /* size of U or V frame */ int len = b->w; /* Y linesize, bytes */ int luv = b->w/2; /* U/V linesize, bytes */ int sample_size = 1; memset(p, '\0', sizeof(*p)); switch (b->pix_fmt) { case PIX_FMT_RGB555: case PIX_FMT_RGB565: sample_size = 2; luv = 0; break; case PIX_FMT_RGBA32: sample_size = 4; luv = 0; break; case PIX_FMT_YUYV422: /* Packed YUV 4:2:2, 16bpp, Y0 Cb Y1 Cr */ sample_size = 2; /* all data in first plane, probably */ luv = 0; break; } len *= sample_size; p->data[0] = b->data; p->linesize[0] = len; /* these are only valid for component images */ p->data[1] = luv ? b->data + 4*l4 : b->data+len; p->data[2] = luv ? b->data + 5*l4 : b->data+len; p->linesize[1] = luv; p->linesize[2] = luv; /* add the offsets to the pointers previously calculated, it is necessary for the picture in picture mode */ p->data[0] += len*b->win_y + b->win_x*sample_size; if (luv) { p->data[1] += luv*(b->win_y/2) + (b->win_x/2) * sample_size; p->data[2] += luv*(b->win_y/2) + (b->win_x/2) * sample_size; } return p; } /*! convert/scale between an input and an output format. * Old version of ffmpeg only have img_convert, which does not rescale. * New versions use sws_scale which does both. */ static void my_scale(struct fbuf_t *in, AVPicture *p_in, struct fbuf_t *out, AVPicture *p_out) { AVPicture my_p_in, my_p_out; int eff_w=out->w, eff_h=out->h; if (p_in == NULL) p_in = fill_pict(in, &my_p_in); if (p_out == NULL) p_out = fill_pict(out, &my_p_out); /*if win_w is different from zero then we must change the size of the scaled buffer (the position is already encoded into the out parameter)*/ if (out->win_w) { /* picture in picture enabled */ eff_w=out->win_w; eff_h=out->win_h; } #ifdef OLD_FFMPEG /* XXX img_convert is deprecated, and does not do rescaling, PiP not supported */ img_convert(p_out, out->pix_fmt, p_in, in->pix_fmt, in->w, in->h); #else /* XXX replacement */ { struct SwsContext *convert_ctx; convert_ctx = sws_getContext(in->w, in->h, in->pix_fmt, eff_w, eff_h, out->pix_fmt, SWS_BICUBIC, NULL, NULL, NULL); if (convert_ctx == NULL) { ast_log(LOG_ERROR, "FFMPEG::convert_cmodel : swscale context initialization failed"); return; } if (0) ast_log(LOG_WARNING, "in %d %dx%d out %d %dx%d\n", in->pix_fmt, in->w, in->h, out->pix_fmt, eff_w, eff_h); sws_scale(convert_ctx, p_in->data, p_in->linesize, in->w, in->h, /* src slice */ p_out->data, p_out->linesize); sws_freeContext(convert_ctx); } #endif /* XXX replacement */ } struct video_desc *get_video_desc(struct ast_channel *c); /* * This function is called (by asterisk) for each video packet * coming from the network (the 'in' path) that needs to be processed. * We need to reconstruct the entire video frame before we can decode it. * After a video packet is received we have to: * - extract the bitstream with pre_process_data() * - append the bitstream to a buffer * - if the fragment is the last (RTP Marker) we decode it with decode_video() * - after the decoding is completed we display the decoded frame with show_frame() */ int console_write_video(struct ast_channel *chan, struct ast_frame *f); int console_write_video(struct ast_channel *chan, struct ast_frame *f) { struct video_desc *env = get_video_desc(chan); struct video_dec_desc *v = env->in; if (!env->gui) /* no gui, no rendering */ return 0; if (v == NULL) env->in = v = dec_init(f->subclass & ~1); if (v == NULL) { /* This is not fatal, but we won't have incoming video */ ast_log(LOG_WARNING, "Cannot initialize input decoder\n"); return 0; } if (v->dec_in_cur == NULL) /* no buffer for incoming frames, drop */ return 0; #if defined(DROP_PACKETS) && DROP_PACKETS > 0 /* Simulate lost packets */ if ((random() % 10000) <= 100*DROP_PACKETS) { ast_log(LOG_NOTICE, "Packet lost [%d]\n", f->seqno); return 0; } #endif if (v->discard) { /* * In discard mode, drop packets until we find one with * the RTP marker set (which is the end of frame). * Note that the RTP marker flag is sent as the LSB of the * subclass, which is a bitmask of formats. The low bit is * normally used for audio so there is no interference. */ if (f->subclass & 0x01) { v->dec_in_cur->used = 0; v->dec_in_cur->ebit = 0; v->next_seq = f->seqno + 1; /* wrap at 16 bit */ v->discard = 0; ast_log(LOG_WARNING, "out of discard mode, frame %d\n", f->seqno); } return 0; } /* * Only in-order fragments will be accepted. Remember seqno * has 16 bit so there is wraparound. Also, ideally we could * accept a bit of reordering, but at the moment we don't. */ if (v->next_seq != f->seqno) { ast_log(LOG_WARNING, "discarding frame out of order, %d %d\n", v->next_seq, f->seqno); v->discard = 1; return 0; } v->next_seq++; if (f->data.ptr == NULL || f->datalen < 2) { ast_log(LOG_WARNING, "empty video frame, discard\n"); return 0; } if (v->d_callbacks->dec_decap(v->dec_in_cur, f->data.ptr, f->datalen)) { ast_log(LOG_WARNING, "error in dec_decap, enter discard\n"); v->discard = 1; } if (f->subclass & 0x01) { // RTP Marker /* prepare to decode: advance the buffer so the video thread knows. */ struct fbuf_t *tmp = v->dec_in_cur; /* store current pointer */ ast_mutex_lock(&env->dec_lock); if (++v->dec_in_cur == &v->dec_in[N_DEC_IN]) /* advance to next, circular */ v->dec_in_cur = &v->dec_in[0]; if (v->dec_in_dpy == NULL) { /* were not displaying anything, so set it */ v->dec_in_dpy = tmp; } else if (v->dec_in_dpy == v->dec_in_cur) { /* current slot is busy */ v->dec_in_cur = NULL; } ast_mutex_unlock(&env->dec_lock); } return 0; } /*! \brief refreshes the buffers of all the device by calling the * grabber_read on each device in the device table. * it encodes the primary source buffer, if the picture in picture mode is * enabled it encodes (in the buffer to split) the secondary source buffer too. * The encoded buffer is splitted to build the local and the remote view. * Return a list of ast_frame representing the video fragments. * The head pointer is returned by the function, the tail pointer * is returned as an argument. * * \param env = video environment descriptor * \param tail = tail ponter (pratically a return value) */ static struct ast_frame *get_video_frames(struct video_desc *env, struct ast_frame **tail) { struct video_out_desc *v = &env->out; struct ast_frame *dummy; struct fbuf_t *loc_src_primary = NULL, *p_read; int i; /* if no device was found in the config file */ if (!env->out.device_num) return NULL; /* every time this function is called we refresh the buffers of every device, updating the private device buffer in the device table */ for (i = 0; i < env->out.device_num; i++) { p_read = grabber_read(&env->out.devices[i], env->out.fps); /* it is used only if different from NULL, we mantain last good buffer otherwise */ if (p_read) env->out.devices[i].dev_buf = p_read; } /* select the primary device buffer as the one to encode */ loc_src_primary = env->out.devices[env->out.device_primary].dev_buf; /* loc_src_primary can be NULL if the device has been turned off during execution of it is read too early */ if (loc_src_primary) { /* Scale the video for the encoder, then use it for local rendering so we will see the same as the remote party */ my_scale(loc_src_primary, NULL, &env->enc_in, NULL); } if (env->out.picture_in_picture) { /* the picture in picture mode is enabled */ struct fbuf_t *loc_src_secondary; /* reads from the secondary source */ loc_src_secondary = env->out.devices[env->out.device_secondary].dev_buf; if (loc_src_secondary) { env->enc_in.win_x = env->out.pip_x; env->enc_in.win_y = env->out.pip_y; env->enc_in.win_w = env->enc_in.w/3; env->enc_in.win_h = env->enc_in.h/3; /* scales to the correct geometry and inserts in the enc_in buffer the picture in picture */ my_scale(loc_src_secondary, NULL, &env->enc_in, NULL); /* returns to normal parameters (not picture in picture) */ env->enc_in.win_x = 0; env->enc_in.win_y = 0; env->enc_in.win_w = 0; env->enc_in.win_h = 0; } else { /* loc_src_secondary can be NULL if the device has been turned off during execution of it is read too early */ env->out.picture_in_picture = 0; /* disable picture in picture */ } } show_frame(env, WIN_LOCAL); /* local rendering */ for (i = 0; i < env->out.device_num; i++) show_frame(env, i+WIN_SRC1); /* rendering of every source device in thumbnails */ if (tail == NULL) tail = &dummy; *tail = NULL; /* if no reason for encoding, do not encode */ if (!env->owner || !loc_src_primary || !v->sendvideo) return NULL; if (v->enc_out.data == NULL) { static volatile int a = 0; if (a++ < 2) ast_log(LOG_WARNING, "fail, no encoder output buffer\n"); return NULL; } v->enc->enc_run(v); return v->enc->enc_encap(&v->enc_out, v->mtu, tail); } /* * Helper thread to periodically poll the video sources and enqueue the * generated frames directed to the remote party to the channel's queue. * Using a separate thread also helps because the encoding can be * computationally expensive so we don't want to starve the main thread. */ static void *video_thread(void *arg) { struct video_desc *env = arg; int count = 0; char save_display[128] = ""; int i; /* integer variable used as iterator */ /* if sdl_videodriver is set, override the environment. Also, * if it contains 'console' override DISPLAY around the call to SDL_Init * so we use the console as opposed to the x11 version of aalib */ if (!ast_strlen_zero(env->sdl_videodriver)) { /* override */ const char *s = getenv("DISPLAY"); setenv("SDL_VIDEODRIVER", env->sdl_videodriver, 1); if (s && !strcasecmp(env->sdl_videodriver, "aalib-console")) { ast_copy_string(save_display, s, sizeof(save_display)); unsetenv("DISPLAY"); } } sdl_setup(env); if (!ast_strlen_zero(save_display)) setenv("DISPLAY", save_display, 1); ast_mutex_init(&env->dec_lock); /* used to sync decoder and renderer */ if (grabber_open(&env->out)) { ast_log(LOG_WARNING, "cannot open local video source\n"); } if (env->out.device_num) env->out.devices[env->out.device_primary].status_index |= IS_PRIMARY | IS_SECONDARY; /* even if no device is connected, we must call video_out_init, * as some of the data structures it initializes are * used in get_video_frames() */ video_out_init(env); /* Writes intial status of the sources. */ if (env->gui) { for (i = 0; i < env->out.device_num; i++) { print_message(env->gui->thumb_bd_array[i].board, src_msgs[env->out.devices[i].status_index]); } } for (;;) { struct timeval t = { 0, 50000 }; /* XXX 20 times/sec */ struct ast_frame *p, *f; struct ast_channel *chan; int fd; char *caption = NULL, buf[160]; /* determine if video format changed */ if (count++ % 10 == 0) { if (env->out.sendvideo && env->out.devices) sprintf(buf, "%s %s %dx%d @@ %dfps %dkbps", env->out.devices[env->out.device_primary].name, env->codec_name, env->enc_in.w, env->enc_in.h, env->out.fps, env->out.bitrate/1000); else sprintf(buf, "hold"); caption = buf; } /* manage keypad events */ /* XXX here we should always check for events, * otherwise the drag will not work */ if (env->gui) eventhandler(env, caption); /* sleep for a while */ ast_select(0, NULL, NULL, NULL, &t); if (env->in) { struct video_dec_desc *v = env->in; /* * While there is something to display, call the decoder and free * the buffer, possibly enabling the receiver to store new data. */ while (v->dec_in_dpy) { struct fbuf_t *tmp = v->dec_in_dpy; /* store current pointer */ /* decode the frame, but show it only if not frozen */ if (v->d_callbacks->dec_run(v, tmp) && !env->frame_freeze) show_frame(env, WIN_REMOTE); tmp->used = 0; /* mark buffer as free */ tmp->ebit = 0; ast_mutex_lock(&env->dec_lock); if (++v->dec_in_dpy == &v->dec_in[N_DEC_IN]) /* advance to next, circular */ v->dec_in_dpy = &v->dec_in[0]; if (v->dec_in_cur == NULL) /* receiver was idle, enable it... */ v->dec_in_cur = tmp; /* using the slot just freed */ else if (v->dec_in_dpy == v->dec_in_cur) /* this was the last slot */ v->dec_in_dpy = NULL; /* nothing more to display */ ast_mutex_unlock(&env->dec_lock); } } if (env->shutdown) break; f = get_video_frames(env, &p); /* read and display */ if (!f) continue; chan = env->owner; if (chan == NULL) { /* drop the chain of frames, nobody uses them */ while (f) { struct ast_frame *g = AST_LIST_NEXT(f, frame_list); ast_frfree(f); f = g; } continue; } fd = chan->alertpipe[1]; ast_channel_lock(chan); /* AST_LIST_INSERT_TAIL is only good for one frame, cannot use here */ if (chan->readq.first == NULL) { chan->readq.first = f; } else { chan->readq.last->frame_list.next = f; } chan->readq.last = p; /* * more or less same as ast_queue_frame, but extra * write on the alertpipe to signal frames. */ if (fd > -1) { int blah = 1, l = sizeof(blah); for (p = f; p; p = AST_LIST_NEXT(p, frame_list)) { if (write(fd, &blah, l) != l) ast_log(LOG_WARNING, "Unable to write to alert pipe on %s, frametype/subclass %d/%d: %s!\n", chan->name, f->frametype, f->subclass, strerror(errno)); } } ast_channel_unlock(chan); } /* thread terminating, here could call the uninit */ /* uninitialize the local and remote video environments */ env->in = dec_uninit(env->in); video_out_uninit(env); if (env->gui) env->gui = cleanup_sdl(env->gui, env->out.device_num); ast_mutex_destroy(&env->dec_lock); env->shutdown = 0; return NULL; } static void copy_geometry(struct fbuf_t *src, struct fbuf_t *dst) { if (dst->w == 0) dst->w = src->w; if (dst->h == 0) dst->h = src->h; } /*! initialize the video environment. * Apart from the formats (constant) used by sdl and the codec, * we use enc_in as the basic geometry. */ static void init_env(struct video_desc *env) { struct fbuf_t *c = &(env->out.loc_src_geometry); /* local source */ struct fbuf_t *ei = &(env->enc_in); /* encoder input */ struct fbuf_t *ld = &(env->loc_dpy); /* local display */ struct fbuf_t *rd = &(env->rem_dpy); /* remote display */ int i; /* integer working as iterator */ c->pix_fmt = PIX_FMT_YUV420P; /* default - camera format */ ei->pix_fmt = PIX_FMT_YUV420P; /* encoder input */ if (ei->w == 0 || ei->h == 0) { ei->w = 352; ei->h = 288; } ld->pix_fmt = rd->pix_fmt = PIX_FMT_YUV420P; /* sdl format */ /* inherit defaults */ copy_geometry(ei, c); /* camera inherits from encoder input */ copy_geometry(ei, rd); /* remote display inherits from encoder input */ copy_geometry(rd, ld); /* local display inherits from remote display */ /* fix the size of buffers for small windows */ for (i = 0; i < env->out.device_num; i++) { env->src_dpy[i].pix_fmt = PIX_FMT_YUV420P; env->src_dpy[i].w = SRC_WIN_W; env->src_dpy[i].h = SRC_WIN_H; } /* now we set the default coordinates for the picture in picture frames inside the env_in buffers, those can be changed by dragging the picture in picture with left click */ env->out.pip_x = ei->w - ei->w/3; env->out.pip_y = ei->h - ei->h/3; } /*! * The first call to the video code, called by oss_new() or similar. * Here we initialize the various components we use, namely SDL for display, * ffmpeg for encoding/decoding, and a local video source. * We do our best to progress even if some of the components are not * available. */ void console_video_start(struct video_desc *env, struct ast_channel *owner) { ast_log(LOG_WARNING, "env %p chan %p\n", env, owner); if (env == NULL) /* video not initialized */ return; env->owner = owner; /* work even if no owner is specified */ if (env->vthread) return; /* already initialized, nothing to do */ init_env(env); env->out.enc = map_config_video_format(env->codec_name); ast_log(LOG_WARNING, "start video out %s %dx%d\n", env->codec_name, env->enc_in.w, env->enc_in.h); /* * Register all codecs supported by the ffmpeg library. * We only need to do it once, but probably doesn't * harm to do it multiple times. */ avcodec_init(); avcodec_register_all(); av_log_set_level(AV_LOG_ERROR); /* only report errors */ if (env->out.fps == 0) { env->out.fps = 15; ast_log(LOG_WARNING, "fps unset, forcing to %d\n", env->out.fps); } if (env->out.bitrate == 0) { env->out.bitrate = 65000; ast_log(LOG_WARNING, "bitrate unset, forcing to %d\n", env->out.bitrate); } /* create the thread as detached so memory is freed on termination */ ast_pthread_create_detached_background(&env->vthread, NULL, video_thread, env); } /* * Parse a geometry string, accepting also common names for the formats. * Trick: if we have a leading > or < and a numeric geometry, * return the larger or smaller one. * E.g. <352x288 gives the smaller one, 320x240 */ static int video_geom(struct fbuf_t *b, const char *s) { int w = 0, h = 0; static struct { const char *s; int w; int h; } *fp, formats[] = { {"16cif", 1408, 1152 }, {"xga", 1024, 768 }, {"4cif", 704, 576 }, {"vga", 640, 480 }, {"cif", 352, 288 }, {"qvga", 320, 240 }, {"qcif", 176, 144 }, {"sqcif", 128, 96 }, {NULL, 0, 0 }, }; if (*s == '<' || *s == '>') sscanf(s+1,"%dx%d", &w, &h); for (fp = formats; fp->s; fp++) { if (*s == '>') { /* look for a larger one */ if (fp->w <= w) { if (fp > formats) fp--; /* back one step if possible */ break; } } else if (*s == '<') { /* look for a smaller one */ if (fp->w < w) break; } else if (!strcasecmp(s, fp->s)) { /* look for a string */ break; } } if (*s == '<' && fp->s == NULL) /* smallest */ fp--; if (fp->s) { b->w = fp->w; b->h = fp->h; } else if (sscanf(s, "%dx%d", &b->w, &b->h) != 2) { ast_log(LOG_WARNING, "Invalid video_size %s, using 352x288\n", s); b->w = 352; b->h = 288; } return 0; } /*! \brief add an entry to the video_device table, * ignoring duplicate names. * The table is a static array of 9 elements. * The last_frame field of each entry of the table is initialized to * the current time (we need a value inside this field, on stop of the * GUI the last_frame value is not changed, to avoid checking if it is 0 we * set the initial value on current time) XXX * * PARAMETERS: * \param devices_p = pointer to the table of devices * \param device_num_p = pointer to the number of devices * \param s = name of the new device to insert * * returns 0 on success, 1 on error */ static int device_table_fill(struct video_device *devices, int *device_num_p, const char *s) { int i; struct video_device *p; /* with the current implementation, we support a maximum of 9 devices.*/ if (*device_num_p >= 9) return 0; /* more devices will be ignored */ /* ignore duplicate names */ for (i = 0; i < *device_num_p; i++) { if (!strcmp(devices[i].name, s)) return 0; } /* inserts the new video device */ p = &devices[*device_num_p]; /* XXX the string is allocated but NEVER deallocated, the good time to do that is when the module is unloaded, now we skip the problem */ p->name = ast_strdup(s); /* copy the name */ /* other fields initially NULL */ p->grabber = NULL; p->grabber_data = NULL; p->dev_buf = NULL; p->last_frame = ast_tvnow(); p->status_index = 0; (*device_num_p)++; /* one device added */ return 0; } /* extend ast_cli with video commands. Called by console_video_config */ int console_video_cli(struct video_desc *env, const char *var, int fd) { if (env == NULL) return 1; /* unrecognised */ if (!strcasecmp(var, "videodevice")) { ast_cli(fd, "videodevice is [%s]\n", env->out.devices[env->out.device_primary].name); } else if (!strcasecmp(var, "videocodec")) { ast_cli(fd, "videocodec is [%s]\n", env->codec_name); } else if (!strcasecmp(var, "sendvideo")) { ast_cli(fd, "sendvideo is [%s]\n", env->out.sendvideo ? "on" : "off"); } else if (!strcasecmp(var, "video_size")) { int in_w = 0, in_h = 0; if (env->in) { in_w = env->in->dec_out.w; in_h = env->in->dec_out.h; } ast_cli(fd, "sizes: video %dx%d camera %dx%d local %dx%d remote %dx%d in %dx%d\n", env->enc_in.w, env->enc_in.h, env->out.loc_src_geometry.w, env->out.loc_src_geometry.h, env->loc_dpy.w, env->loc_dpy.h, env->rem_dpy.w, env->rem_dpy.h, in_w, in_h); } else if (!strcasecmp(var, "bitrate")) { ast_cli(fd, "bitrate is [%d]\n", env->out.bitrate); } else if (!strcasecmp(var, "qmin")) { ast_cli(fd, "qmin is [%d]\n", env->out.qmin); } else if (!strcasecmp(var, "fps")) { ast_cli(fd, "fps is [%d]\n", env->out.fps); } else if (!strcasecmp(var, "startgui")) { env->stayopen = 1; console_video_start(env, NULL); } else if (!strcasecmp(var, "stopgui") && env->stayopen != 0) { env->stayopen = 0; if (env->gui && env->owner) ast_cli_command(-1, "console hangup"); else /* not in a call */ console_video_uninit(env); } else { return 1; /* unrecognised */ } return 0; /* recognised */ } /*! parse config command for video support. */ int console_video_config(struct video_desc **penv, const char *var, const char *val) { struct video_desc *env; if (penv == NULL) { ast_log(LOG_WARNING, "bad argument penv=NULL\n"); return 1; /* error */ } /* allocate the video descriptor first time we get here */ env = *penv; if (env == NULL) { env = *penv = ast_calloc(1, sizeof(struct video_desc)); if (env == NULL) { ast_log(LOG_WARNING, "fail to allocate video_desc\n"); return 1; /* error */ } /* set default values - 0's are already there */ env->out.device_primary = 0; env->out.device_secondary = 0; env->out.fps = 5; env->out.bitrate = 65000; env->out.sendvideo = 1; env->out.qmin = 3; env->out.device_num = 0; } CV_START(var, val); CV_F("videodevice", device_table_fill(env->out.devices, &env->out.device_num, val)); CV_BOOL("sendvideo", env->out.sendvideo); CV_F("video_size", video_geom(&env->enc_in, val)); CV_F("camera_size", video_geom(&env->out.loc_src_geometry, val)); CV_F("local_size", video_geom(&env->loc_dpy, val)); CV_F("remote_size", video_geom(&env->rem_dpy, val)); CV_STR("keypad", env->keypad_file); CV_F("region", keypad_cfg_read(env->gui, val)); CV_UINT("startgui", env->stayopen); /* enable gui at startup */ CV_STR("keypad_font", env->keypad_font); CV_STR("sdl_videodriver", env->sdl_videodriver); CV_UINT("fps", env->out.fps); CV_UINT("bitrate", env->out.bitrate); CV_UINT("qmin", env->out.qmin); CV_STR("videocodec", env->codec_name); return 1; /* nothing found */ CV_END; /* the 'nothing found' case */ return 0; /* found something */ } #endif /* video support */