aboutsummaryrefslogtreecommitdiffstats
path: root/trunk/main/minimime/mimeparser.y
diff options
context:
space:
mode:
Diffstat (limited to 'trunk/main/minimime/mimeparser.y')
-rw-r--r--trunk/main/minimime/mimeparser.y748
1 files changed, 748 insertions, 0 deletions
diff --git a/trunk/main/minimime/mimeparser.y b/trunk/main/minimime/mimeparser.y
new file mode 100644
index 000000000..7c32a290d
--- /dev/null
+++ b/trunk/main/minimime/mimeparser.y
@@ -0,0 +1,748 @@
+%{
+/*
+ * Copyright (c) 2004 Jann Fischer. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/**
+ * These are the grammatic definitions in yacc syntax to parse MIME conform
+ * messages.
+ *
+ * TODO:
+ * - honour parse flags passed to us (partly done)
+ * - parse Content-Disposition header (partly done)
+ * - parse Content-Encoding header
+ */
+#include <stdio.h>
+#include <stdarg.h>
+#include <stdlib.h>
+#include <string.h>
+#include <assert.h>
+#include <errno.h>
+
+#include "mimeparser.h"
+#include "mm.h"
+#include "mm_internal.h"
+
+int set_boundary(char *,struct parser_state *);
+int mimeparser_yywrap(void);
+void reset_environ(struct parser_state *pstate);
+int PARSER_initialize(struct parser_state *pstate, void *yyscanner);
+
+static char *PARSE_readmessagepart(size_t, size_t, size_t, size_t *,yyscan_t, struct parser_state *);
+FILE *mimeparser_yyget_in (yyscan_t yyscanner );
+
+%}
+
+%pure-parser
+%parse-param {struct parser_state *pstate}
+%parse-param {void *yyscanner}
+%lex-param {void *yyscanner}
+
+%union
+{
+ int number;
+ char *string;
+ struct s_position position;
+}
+
+%token ANY
+%token COLON
+%token DASH
+%token DQUOTE
+%token ENDOFHEADERS
+%token EOL
+%token EOM
+%token EQUAL
+%token MIMEVERSION_HEADER
+%token SEMICOLON
+
+%token <string> CONTENTDISPOSITION_HEADER
+%token <string> CONTENTENCODING_HEADER
+%token <string> CONTENTTYPE_HEADER
+%token <string> MAIL_HEADER
+%token <string> HEADERVALUE
+%token <string> BOUNDARY
+%token <string> ENDBOUNDARY
+%token <string> CONTENTTYPE_VALUE
+%token <string> TSPECIAL
+%token <string> WORD
+
+%token <position> BODY
+%token <position> PREAMBLE
+%token <position> POSTAMBLE
+
+%type <string> content_disposition
+%type <string> contenttype_parameter_value
+%type <string> mimetype
+%type <string> body
+
+%start message
+
+%%
+
+/* This is a parser for a MIME-conform message, which is in either single
+ * part or multi part format.
+ */
+message :
+ multipart_message
+ |
+ singlepart_message
+ ;
+
+multipart_message:
+ headers preamble
+ {
+ mm_context_attachpart(pstate->ctx, pstate->current_mimepart);
+ pstate->current_mimepart = mm_mimepart_new();
+ pstate->have_contenttype = 0;
+ }
+ mimeparts endboundary postamble
+ {
+ dprintf2(pstate,"This was a multipart message\n");
+ }
+ ;
+
+singlepart_message:
+ headers body
+ {
+ dprintf2(pstate,"This was a single part message\n");
+ mm_context_attachpart(pstate->ctx, pstate->current_mimepart);
+ }
+ ;
+
+headers :
+ header headers
+ |
+ end_headers
+ {
+ /* If we did not find a Content-Type header for the current
+ * MIME part (or envelope), we create one and attach it.
+ * According to the RFC, a type of "text/plain" and a
+ * charset of "us-ascii" can be assumed.
+ */
+ struct mm_content *ct;
+ struct mm_param *param;
+
+ if (!pstate->have_contenttype) {
+ ct = mm_content_new();
+ mm_content_settype(ct, "text/plain");
+
+ param = mm_param_new();
+ param->name = xstrdup("charset");
+ param->value = xstrdup("us-ascii");
+
+ mm_content_attachtypeparam(ct, param);
+ mm_mimepart_attachcontenttype(pstate->current_mimepart, ct);
+ }
+ pstate->have_contenttype = 0;
+ }
+ |
+ header
+ ;
+
+preamble:
+ PREAMBLE
+ {
+ char *preamble;
+ size_t offset;
+
+ if ($1.start != $1.end) {
+ preamble = PARSE_readmessagepart(0, $1.start, $1.end,
+ &offset,yyscanner,pstate);
+ if (preamble == NULL) {
+ return(-1);
+ }
+ pstate->ctx->preamble = preamble;
+ dprintf2(pstate,"PREAMBLE:\n%s\n", preamble);
+ }
+ }
+ |
+ ;
+
+postamble:
+ POSTAMBLE
+ {
+ }
+ |
+ ;
+
+mimeparts:
+ mimeparts mimepart
+ |
+ mimepart
+ ;
+
+mimepart:
+ boundary headers body
+ {
+
+ if (mm_context_attachpart(pstate->ctx, pstate->current_mimepart) == -1) {
+ mm_errno = MM_ERROR_ERRNO;
+ return(-1);
+ }
+
+ pstate->temppart = mm_mimepart_new();
+ pstate->current_mimepart = pstate->temppart;
+ pstate->mime_parts++;
+ }
+ ;
+
+header :
+ mail_header
+ |
+ contenttype_header
+ {
+ pstate->have_contenttype = 1;
+ if (mm_content_iscomposite(pstate->envelope->type)) {
+ pstate->ctx->messagetype = MM_MSGTYPE_MULTIPART;
+ } else {
+ pstate->ctx->messagetype = MM_MSGTYPE_FLAT;
+ }
+ }
+ |
+ contentdisposition_header
+ |
+ contentencoding_header
+ |
+ mimeversion_header
+ |
+ invalid_header
+ {
+ if (pstate->parsemode != MM_PARSE_LOOSE) {
+ mm_errno = MM_ERROR_PARSE;
+ mm_error_setmsg("invalid header encountered");
+ mm_error_setlineno(pstate->lstate.lineno);
+ return(-1);
+ } else {
+ /* TODO: attach MM_WARNING_INVHDR */
+ }
+ }
+ ;
+
+mail_header:
+ MAIL_HEADER COLON WORD EOL
+ {
+ struct mm_mimeheader *hdr;
+ hdr = mm_mimeheader_generate($1, $3);
+ mm_mimepart_attachheader(pstate->current_mimepart, hdr);
+ }
+ |
+ MAIL_HEADER COLON EOL
+ {
+ struct mm_mimeheader *hdr;
+
+ if (pstate->parsemode != MM_PARSE_LOOSE) {
+ mm_errno = MM_ERROR_MIME;
+ mm_error_setmsg("invalid header encountered");
+ mm_error_setlineno(pstate->lstate.lineno);
+ return(-1);
+ } else {
+ /* TODO: attach MM_WARNING_INVHDR */
+ }
+
+ hdr = mm_mimeheader_generate($1, xstrdup(""));
+ mm_mimepart_attachheader(pstate->current_mimepart, hdr);
+ }
+ ;
+
+contenttype_header:
+ CONTENTTYPE_HEADER COLON mimetype EOL
+ {
+ mm_content_settype(pstate->ctype, "%s", $3);
+ mm_mimepart_attachcontenttype(pstate->current_mimepart, pstate->ctype);
+ dprintf2(pstate,"Content-Type -> %s\n", $3);
+ pstate->ctype = mm_content_new();
+ }
+ |
+ CONTENTTYPE_HEADER COLON mimetype contenttype_parameters EOL
+ {
+ mm_content_settype(pstate->ctype, "%s", $3);
+ mm_mimepart_attachcontenttype(pstate->current_mimepart, pstate->ctype);
+ dprintf2(pstate,"Content-Type (P) -> %s\n", $3);
+ pstate->ctype = mm_content_new();
+ }
+ ;
+
+contentdisposition_header:
+ CONTENTDISPOSITION_HEADER COLON content_disposition EOL
+ {
+ dprintf2(pstate,"Content-Disposition -> %s\n", $3);
+ pstate->ctype->disposition_type = xstrdup($3);
+ }
+ |
+ CONTENTDISPOSITION_HEADER COLON content_disposition content_disposition_parameters EOL
+ {
+ dprintf2(pstate,"Content-Disposition (P) -> %s; params\n", $3);
+ pstate->ctype->disposition_type = xstrdup($3);
+ }
+ ;
+
+content_disposition:
+ WORD
+ {
+ /*
+ * According to RFC 2183, the content disposition value may
+ * only be "inline", "attachment" or an extension token. We
+ * catch invalid values here if we are not in loose parsing
+ * mode.
+ */
+ if (strcasecmp($1, "inline") && strcasecmp($1, "attachment")
+ && strncasecmp($1, "X-", 2)) {
+ if (pstate->parsemode != MM_PARSE_LOOSE) {
+ mm_errno = MM_ERROR_MIME;
+ mm_error_setmsg("invalid content-disposition");
+ return(-1);
+ }
+ } else {
+ /* TODO: attach MM_WARNING_INVHDR */
+ }
+ $$ = $1;
+ }
+ ;
+
+contentencoding_header:
+ CONTENTENCODING_HEADER COLON WORD EOL
+ {
+ dprintf2(pstate,"Content-Transfer-Encoding -> %s\n", $3);
+ }
+ ;
+
+mimeversion_header:
+ MIMEVERSION_HEADER COLON WORD EOL
+ {
+ dprintf2(pstate,"MIME-Version -> '%s'\n", $3);
+ }
+ ;
+
+invalid_header:
+ any EOL
+ ;
+
+any:
+ any ANY
+ |
+ ANY
+ ;
+
+mimetype:
+ WORD '/' WORD
+ {
+ char type[255];
+ snprintf(type, sizeof(type), "%s/%s", $1, $3);
+ $$ = type;
+ }
+ ;
+
+contenttype_parameters:
+ SEMICOLON contenttype_parameter contenttype_parameters
+ |
+ SEMICOLON contenttype_parameter
+ |
+ SEMICOLON
+ {
+ if (pstate->parsemode != MM_PARSE_LOOSE) {
+ mm_errno = MM_ERROR_MIME;
+ mm_error_setmsg("invalid Content-Type header");
+ mm_error_setlineno(pstate->lstate.lineno);
+ return(-1);
+ } else {
+ /* TODO: attach MM_WARNING_INVHDR */
+ }
+ }
+ ;
+
+content_disposition_parameters:
+ SEMICOLON content_disposition_parameter content_disposition_parameters
+ |
+ SEMICOLON content_disposition_parameter
+ |
+ SEMICOLON
+ {
+ if (pstate->parsemode != MM_PARSE_LOOSE) {
+ mm_errno = MM_ERROR_MIME;
+ mm_error_setmsg("invalid Content-Disposition header");
+ mm_error_setlineno(pstate->lstate.lineno);
+ return(-1);
+ } else {
+ /* TODO: attach MM_WARNING_INVHDR */
+ }
+ }
+ ;
+
+contenttype_parameter:
+ WORD EQUAL contenttype_parameter_value
+ {
+ struct mm_param *param;
+ param = mm_param_new();
+
+ dprintf2(pstate,"Param: '%s', Value: '%s'\n", $1, $3);
+
+ /* Catch an eventual boundary identifier */
+ if (!strcasecmp($1, "boundary")) {
+ if (pstate->lstate.boundary_string == NULL) {
+ set_boundary($3,pstate);
+ } else {
+ if (pstate->parsemode != MM_PARSE_LOOSE) {
+ mm_errno = MM_ERROR_MIME;
+ mm_error_setmsg("duplicate boundary "
+ "found");
+ return -1;
+ } else {
+ /* TODO: attach MM_WARNING_DUPPARAM */
+ }
+ }
+ }
+
+ param->name = xstrdup($1);
+ param->value = xstrdup($3);
+
+ mm_content_attachtypeparam(pstate->ctype, param);
+ }
+ ;
+
+content_disposition_parameter:
+ WORD EQUAL contenttype_parameter_value
+ {
+ struct mm_param *param;
+ param = mm_param_new();
+
+ param->name = xstrdup($1);
+ param->value = xstrdup($3);
+
+ mm_content_attachdispositionparam(pstate->ctype, param);
+
+ }
+ ;
+
+contenttype_parameter_value:
+ WORD
+ {
+ dprintf2(pstate,"contenttype_param_val: WORD=%s\n", $1);
+ $$ = $1;
+ }
+ |
+ TSPECIAL
+ {
+ dprintf2(pstate,"contenttype_param_val: TSPECIAL\n");
+ /* For broken MIME implementation */
+ if (pstate->parsemode != MM_PARSE_LOOSE) {
+ mm_errno = MM_ERROR_MIME;
+ mm_error_setmsg("tspecial without quotes");
+ mm_error_setlineno(pstate->lstate.lineno);
+ return(-1);
+ } else {
+ /* TODO: attach MM_WARNING_INVAL */
+ }
+ $$ = $1;
+ }
+ |
+ '"' TSPECIAL '"'
+ {
+ dprintf2(pstate,"contenttype_param_val: \"TSPECIAL\"\n" );
+ $$ = $2;
+ }
+ ;
+
+end_headers :
+ ENDOFHEADERS
+ {
+ dprintf2(pstate,"End of headers at line %d\n", pstate->lstate.lineno);
+ }
+ ;
+
+boundary :
+ BOUNDARY EOL
+ {
+ if (pstate->lstate.boundary_string == NULL) {
+ mm_errno = MM_ERROR_PARSE;
+ mm_error_setmsg("internal incosistency");
+ mm_error_setlineno(pstate->lstate.lineno);
+ return(-1);
+ }
+ if (strcmp(pstate->lstate.boundary_string, $1)) {
+ mm_errno = MM_ERROR_PARSE;
+ mm_error_setmsg("invalid boundary: '%s' (%d)", $1, strlen($1));
+ mm_error_setlineno(pstate->lstate.lineno);
+ return(-1);
+ }
+ dprintf2(pstate,"New MIME part... (%s)\n", $1);
+ }
+ ;
+
+endboundary :
+ ENDBOUNDARY
+ {
+ if (pstate->lstate.endboundary_string == NULL) {
+ mm_errno = MM_ERROR_PARSE;
+ mm_error_setmsg("internal incosistency");
+ mm_error_setlineno(pstate->lstate.lineno);
+ return(-1);
+ }
+ if (strcmp(pstate->lstate.endboundary_string, $1)) {
+ mm_errno = MM_ERROR_PARSE;
+ mm_error_setmsg("invalid end boundary: %s", $1);
+ mm_error_setlineno(pstate->lstate.lineno);
+ return(-1);
+ }
+ dprintf2(pstate,"End of MIME message\n");
+ }
+ ;
+
+body:
+ BODY
+ {
+ char *body;
+ size_t offset;
+
+ dprintf2(pstate,"BODY (%d/%d), SIZE %d\n", $1.start, $1.end, $1.end - $1.start);
+
+ body = PARSE_readmessagepart($1.opaque_start, $1.start, $1.end,
+ &offset,yyscanner,pstate);
+
+ if (body == NULL) {
+ return(-1);
+ }
+ pstate->current_mimepart->opaque_body = body;
+ pstate->current_mimepart->body = body + offset;
+ pstate->current_mimepart->opaque_length = $1.end - $1.start - 2 + offset;
+ pstate->current_mimepart->length = pstate->current_mimepart->opaque_length - offset;
+ }
+ ;
+
+%%
+
+/*
+ * This function gets the specified part from the currently parsed message.
+ */
+static char *
+PARSE_readmessagepart(size_t opaque_start, size_t real_start, size_t end,
+ size_t *offset, yyscan_t yyscanner, struct parser_state *pstate)
+{
+ size_t body_size;
+ size_t current;
+ size_t start;
+ char *body;
+
+ /* calculate start and offset markers for the opaque and
+ * header stripped body message.
+ */
+ if (opaque_start > 0) {
+ /* Multipart message */
+ if (real_start) {
+ if (real_start < opaque_start) {
+ mm_errno = MM_ERROR_PARSE;
+ mm_error_setmsg("internal incosistency (S:%d/O:%d)",
+ real_start,
+ opaque_start);
+ return(NULL);
+ }
+ start = opaque_start;
+ *offset = real_start - start;
+ /* Flat message */
+ } else {
+ start = opaque_start;
+ *offset = 0;
+ }
+ } else {
+ start = real_start;
+ *offset = 0;
+ }
+
+ /* The next three cases should NOT happen anytime */
+ if (end <= start) {
+ mm_errno = MM_ERROR_PARSE;
+ mm_error_setmsg("internal incosistency,2");
+ mm_error_setlineno(pstate->lstate.lineno);
+ return(NULL);
+ }
+ if (start < *offset) {
+ mm_errno = MM_ERROR_PARSE;
+ mm_error_setmsg("internal incosistency, S:%d,O:%d,L:%d", start, offset, pstate->lstate.lineno);
+ mm_error_setlineno(pstate->lstate.lineno);
+ return(NULL);
+ }
+ if (start < 0 || end < 0) {
+ mm_errno = MM_ERROR_PARSE;
+ mm_error_setmsg("internal incosistency,4");
+ mm_error_setlineno(pstate->lstate.lineno);
+ return(NULL);
+ }
+
+ /* XXX: do we want to enforce a maximum body size? make it a
+ * parser option? */
+
+ /* Read in the body message */
+ body_size = end - start;
+
+ if (body_size < 1) {
+ mm_errno = MM_ERROR_PARSE;
+ mm_error_setmsg("size of body cannot be < 1");
+ mm_error_setlineno(pstate->lstate.lineno);
+ return(NULL);
+ }
+
+ body = (char *)malloc(body_size + 1);
+ if (body == NULL) {
+ mm_errno = MM_ERROR_ERRNO;
+ return(NULL);
+ }
+
+ /* Get the message body either from a stream or a memory
+ * buffer.
+ */
+ if (mimeparser_yyget_in(yyscanner) != NULL) {
+ FILE *x = mimeparser_yyget_in(yyscanner);
+ current = ftell(x);
+ fseek(x, start - 1, SEEK_SET);
+ fread(body, body_size - 1, 1, x);
+ fseek(x, current, SEEK_SET);
+ } else if (pstate->lstate.message_buffer != NULL) {
+ strlcpy(body, pstate->lstate.message_buffer + start - 1, body_size);
+ }
+
+ return(body);
+
+}
+
+int
+yyerror(struct parser_state *pstate, void *yyscanner, const char *str)
+{
+ mm_errno = MM_ERROR_PARSE;
+ mm_error_setmsg("%s", str);
+ mm_error_setlineno(pstate->lstate.lineno);
+ return -1;
+}
+
+int
+mimeparser_yywrap(void)
+{
+ return 1;
+}
+
+/**
+ * Sets the boundary value for the current message
+ */
+int
+set_boundary(char *str, struct parser_state *pstate)
+{
+ size_t blen;
+
+ blen = strlen(str);
+
+ pstate->lstate.boundary_string = (char *)malloc(blen + 3);
+ pstate->lstate.endboundary_string = (char *)malloc(blen + 5);
+
+ if (pstate->lstate.boundary_string == NULL || pstate->lstate.endboundary_string == NULL) {
+ if (pstate->lstate.boundary_string != NULL) {
+ free(pstate->lstate.boundary_string);
+ }
+ if (pstate->lstate.endboundary_string != NULL) {
+ free(pstate->lstate.endboundary_string);
+ }
+ return -1;
+ }
+
+ pstate->ctx->boundary = xstrdup(str);
+
+ snprintf(pstate->lstate.boundary_string, blen + 3, "--%s", str);
+ snprintf(pstate->lstate.endboundary_string, blen + 5, "--%s--", str);
+
+ return 0;
+}
+
+/**
+ * Debug printf()
+ */
+int
+dprintf2(struct parser_state *pstate, const char *fmt, ...)
+{
+ va_list ap;
+ char *msg;
+ if (pstate->debug == 0) return 1;
+
+ va_start(ap, fmt);
+ vasprintf(&msg, fmt, ap);
+ va_end(ap);
+
+ fprintf(stderr, "%s", msg);
+ free(msg);
+
+ return 0;
+
+}
+
+void reset_environ(struct parser_state *pstate)
+{
+ pstate->lstate.lineno = 0;
+ pstate->lstate.boundary_string = NULL;
+ pstate->lstate.endboundary_string = NULL;
+ pstate->lstate.message_buffer = NULL;
+ pstate->mime_parts = 0;
+ pstate->debug = 0;
+ pstate->envelope = NULL;
+ pstate->temppart = NULL;
+ pstate->ctype = NULL;
+ pstate->current_mimepart = NULL;
+
+ pstate->have_contenttype = 0;
+}
+/**
+ * Initializes the parser engine.
+ */
+int
+PARSER_initialize(struct parser_state *pstate, void *yyscanner)
+{
+ void reset_lexer_state(void *yyscanner, struct parser_state *);
+#if 0
+ if (pstate->ctx != NULL) {
+ xfree(pstate->ctx);
+ pstate->ctx = NULL;
+ }
+ if (pstate->envelope != NULL) {
+ xfree(pstate->envelope);
+ pstate->envelope = NULL;
+ }
+ if (pstate->ctype != NULL) {
+ xfree(pstate->ctype);
+ pstate->ctype = NULL;
+ }
+#endif
+ /* yydebug = 1; */
+ reset_environ(pstate);
+ reset_lexer_state(yyscanner,pstate);
+
+ pstate->envelope = mm_mimepart_new();
+ pstate->current_mimepart = pstate->envelope;
+ pstate->ctype = mm_content_new();
+
+ pstate->have_contenttype = 0;
+
+ return 1;
+}
+
+