diff options
Diffstat (limited to 'main/minimime/mimeparser.l')
-rw-r--r-- | main/minimime/mimeparser.l | 484 |
1 files changed, 0 insertions, 484 deletions
diff --git a/main/minimime/mimeparser.l b/main/minimime/mimeparser.l deleted file mode 100644 index 19d42cf3a..000000000 --- a/main/minimime/mimeparser.l +++ /dev/null @@ -1,484 +0,0 @@ -%{ -/* - * Copyright (c) 2004 Jann Fischer. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -/** - * This is a lexer file for parsing MIME compatible messages. It is intended - * to satisfy at least RFC 2045 (Format of Internet Message Bodies). It still - * has quite a few problems: - * - * - The parsing could probably be done in a more elegant way - * - I don't know what performance impact REJECT has on the parser - */ -#include <stdio.h> -#include <string.h> -#include <ctype.h> -#include <errno.h> - -#include "mimeparser.h" -#include "mimeparser.tab.h" - -#define NAMEOF(v) #v -/* BC() is a debug wrapper for lex' BEGIN() macro */ -#define BC(x) do { \ - struct lexer_state *lstate = yyget_extra(yyscanner); \ - BEGIN(x); \ - lstate->condition = x; \ -} while(0); - -#define ZERO(x) memset(x, '\0', sizeof(x)) - -#define PREALLOC_BUFFER 100000 -#undef YY_BUF_SIZE -#define YY_BUF_SIZE 65536 - -enum header_states -{ - STATE_MAIL = 0, - STATE_CTYPE, - STATE_CDISP, - STATE_CENC, - STATE_MIME -}; - - - -%} - -%option reentrant -%option yylineno -%option bison-bridge - -%s headers -%s header -%s headervalue -%s tspecialvalue -%s comment -%s body -%s postamble -%s preamble -%s boundary -%s endboundary -%s endoffile - -STRING [a-zA-Z0-9\-\.\_] -TSPECIAL [a-zA-Z0-9)(<>@,;:/\-.=_\+'? ] -TSPECIAL_LITE [a-zA-Z0-9)(<>@,-._+'?\[\]] - -%% - -<INITIAL,headers>^[a-zA-Z]+[a-zA-Z0-9\-\_]* { - struct lexer_state *lstate = yyget_extra(yyscanner); - - yylval_param->string=strdup(yytext); - lstate->current_pos += yyleng; - BC(header); - - /* Depending on what header we are processing, we enter a different - * state and return a different value. - */ - if (!strcasecmp(yytext, "Content-Type")) { - lstate->header_state = STATE_CTYPE; - return CONTENTTYPE_HEADER; - } else if (!strcasecmp(yytext, "Content-Transfer-Encoding")) { - lstate->header_state = STATE_CENC; - return CONTENTENCODING_HEADER; - } else if (!strcasecmp(yytext, "Content-Disposition")) { - lstate->header_state = STATE_CDISP; - return CONTENTDISPOSITION_HEADER; - } else if (!strcasecmp(yytext, "MIME-Version")) { - lstate->header_state = STATE_MAIL; - return MIMEVERSION_HEADER; - } else { - lstate->header_state = STATE_MAIL; - return MAIL_HEADER; - } -} - -<INITIAL,headers>. { - struct lexer_state *lstate = yyget_extra(yyscanner); - /* dprintf2("Unknown header char: %c\n", *yytext); */ - lstate->current_pos += yyleng; - return ANY; -} - -<headers>^(\r\n|\n) { - struct lexer_state *lstate = yyget_extra(yyscanner); - lstate->lineno++; - - lstate->current_pos += yyleng; - - /* This marks the end of headers. Depending on whether we are in the - * envelope currently we need to parse either a body or the preamble - * now. - */ - if (lstate->is_envelope == 0 || lstate->boundary_string == NULL) { - BC(body); - lstate->body_start = lstate->current_pos; - } else { - lstate->is_envelope = 0; - lstate->preamble_start = lstate->current_pos; - BC(preamble); - } - - return ENDOFHEADERS; -} - -<header>\: { - struct lexer_state *lstate = yyget_extra(yyscanner); - BC(headervalue); - lstate->current_pos += yyleng; - return COLON; -} - -<header>(\r\n|\n) { - struct lexer_state *lstate = yyget_extra(yyscanner); - BC(headers); - /* dprintf2("Invalid header, returning EOL\n"); */ - lstate->current_pos += yyleng; - return EOL; -} - -<headervalue>(\n|\r\n)[\ \t]+ { - struct lexer_state *lstate = yyget_extra(yyscanner); - lstate->current_pos += yyleng; -} - -<headervalue>.+|(.+(\n|\r\n)[\ \t]+.+)+ { - struct lexer_state *lstate = yyget_extra(yyscanner); - if (lstate->header_state != STATE_MAIL && lstate->header_state != STATE_CENC) { - REJECT; - } - lstate->current_pos += yyleng; - while (*yytext && isspace(*yytext)) yytext++; - /* Do we actually have a header value? */ - if (*yytext == '\0') { - yylval_param->string = strdup(""); - } else { - yylval_param->string=strdup(yytext); - lstate->lineno += count_lines(yytext); - } - return WORD; -} - -<headervalue,tspecialvalue>(\r\n|\n) { - struct lexer_state *lstate = yyget_extra(yyscanner); - /* marks the end of one header line */ - lstate->lineno++; - BC(headers); - lstate->current_pos += yyleng; - return EOL; -} - -<headervalue>;|;(\r\n|\n)[\ \t]+ { - struct lexer_state *lstate = yyget_extra(yyscanner); - lstate->lineno += count_lines(yytext); - lstate->current_pos += yyleng; - return SEMICOLON; -} - -<headervalue>\= { - struct lexer_state *lstate = yyget_extra(yyscanner); - lstate->current_pos += yyleng; - return EQUAL; -} - -<headervalue>\" { - struct lexer_state *lstate = yyget_extra(yyscanner); - BC(tspecialvalue); - lstate->current_pos += yyleng; - return *yytext; -} - -<headervalue>{STRING}+|{TSPECIAL_LITE}+ { - struct lexer_state *lstate = yyget_extra(yyscanner); - yylval_param->string=strdup(yytext); - lstate->lineno += count_lines(yytext); - lstate->current_pos += yyleng; - return WORD; -} - -<headervalue>[\ |\t]+ { - struct lexer_state *lstate = yyget_extra(yyscanner); - lstate->current_pos += yyleng; -} - -<tspecialvalue>{TSPECIAL}+ { - struct lexer_state *lstate = yyget_extra(yyscanner); - lstate->lineno += count_lines(yytext); - yylval_param->string=strdup(yytext); - lstate->current_pos += yyleng; - return TSPECIAL; -} - -<tspecialvalue>\" { - struct lexer_state *lstate = yyget_extra(yyscanner); - BC(headervalue); - lstate->current_pos += yyleng; - return *yytext; -} - -<body>^\-\-{TSPECIAL}+\-\- { - struct lexer_state *lstate = yyget_extra(yyscanner); - /** - * Make sure we only catch matching boundaries, and not other lines - * that begin and end with two dashes. If we have catched a valid - * end boundary, which actually ends a body, we save the current - * position, put the token back on the input stream and let the - * endboundary condition parse the actual token. - */ - if (lstate->endboundary_string != NULL) { - if (strcmp(lstate->endboundary_string, yytext)) { - /* dprintf2("YYTEXT != end_boundary: '%s'\n", yytext); */ - REJECT; - } else { - lstate->current_pos += yyleng; - /* dprintf2("YYTEXT == lstate->end_boundary: '%s'\n", yytext); */ - if (lstate->body_start) { - yylval_param->position.opaque_start = - lstate->body_opaque_start; - yylval_param->position.start = lstate->body_start; - yylval_param->position.end = lstate->current_pos - yyleng; - lstate->body_opaque_start = 0; - lstate->body_start = 0; - lstate->body_end = 0; - yyless(0); - BC(endboundary); - return BODY; - } - } - } else { - } - - REJECT; -} - -<body,preamble>^\-\-{TSPECIAL}+ { - struct lexer_state *lstate = yyget_extra(yyscanner); - /** - * Make sure we only catch matching boundaries, and not other lines - * that begin with two dashes. - */ - if (lstate->boundary_string != NULL) { - if (strcmp(lstate->boundary_string, yytext)) { - /* dprintf2("YYTEXT != boundary: '%s'\n", yytext);*/ - REJECT; - } else { - /* dprintf2("YYTEXT == boundary: '%s'\n", yytext);*/ - if (lstate->body_start) { - yylval_param->position.opaque_start = lstate->body_opaque_start; - yylval_param->position.start = lstate->body_start; - yylval_param->position.end = lstate->current_pos; - lstate->body_opaque_start = 0; - lstate->body_start = 0; - lstate->body_end = 0; - yyless(0); - BC(boundary); - return BODY; - } else if (lstate->preamble_start) { - yylval_param->position.start = lstate->preamble_start; - yylval_param->position.end = lstate->current_pos; - lstate->preamble_start = lstate->preamble_end = 0; - yyless(0); - BC(boundary); - return PREAMBLE; - } else { - BC(boundary); - yylval_param->string = strdup(yytext); - lstate->current_pos += yyleng; - return(BOUNDARY); - } - } - } else { - } - - REJECT; -} - -<body>(\r\n|\n) { - struct lexer_state *lstate = yyget_extra(yyscanner); - lstate->current_pos += yyleng; - lstate->lineno++; -} - -<body>\r { - struct lexer_state *lstate = yyget_extra(yyscanner); - lstate->current_pos += yyleng; - /* dprintf2("stray CR in body...\n"); */ -} - -<body>[^\r\n]+ { - struct lexer_state *lstate = yyget_extra(yyscanner); - lstate->current_pos += yyleng; -} - -<body><<EOF>> { - struct lexer_state *lstate = yyget_extra(yyscanner); - if (lstate->boundary_string == NULL && lstate->body_start) { - yylval_param->position.opaque_start = 0; - yylval_param->position.start = lstate->body_start; - yylval_param->position.end = lstate->current_pos; - lstate->body_start = 0; - return BODY; - } else if (lstate->body_start) { - return POSTAMBLE; - } - yyterminate(); -} - -<preamble,postamble>(\r\n|\n) { - struct lexer_state *lstate = yyget_extra(yyscanner); - /* dprintf2("Preamble CR/LF at line %d\n", lineno); */ - lstate->lineno++; - lstate->current_pos += yyleng; -} - -<boundary>[^\r\n]+ { - struct lexer_state *lstate = yyget_extra(yyscanner); - yylval_param->string = strdup(yytext); - lstate->current_pos += yyleng; - return BOUNDARY; -} - -<endboundary>[^\r\n]+ { - struct lexer_state *lstate = yyget_extra(yyscanner); - yylval_param->string = strdup(yytext); - lstate->current_pos += yyleng; - return ENDBOUNDARY; -} - -<boundary>(\r\n|\n) { - struct lexer_state *lstate = yyget_extra(yyscanner); - BC(headers); - lstate->lineno++; - lstate->current_pos += yyleng; - lstate->body_opaque_start = lstate->current_pos; - return EOL; -} - -<endboundary>(\r\n|\n) { - struct lexer_state *lstate = yyget_extra(yyscanner); - BC(postamble); - lstate->lineno++; - lstate->current_pos += yyleng; -} - -<preamble>. { - struct lexer_state *lstate = yyget_extra(yyscanner); - lstate->current_pos += yyleng; -} - - -<postamble>. { - struct lexer_state *lstate = yyget_extra(yyscanner); - lstate->current_pos += yyleng; -} - -(\r\n|\n) { - struct lexer_state *lstate = yyget_extra(yyscanner); - lstate->lineno++; - lstate->current_pos += yyleng; - return EOL; -} - -. { - struct lexer_state *lstate = yyget_extra(yyscanner); - lstate->current_pos += yyleng; - return((int)*yytext); -} - - -%% - -void reset_lexer_state(void *yyscanner, struct parser_state *pstate) -{ - struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; - struct lexer_state *lstate = &(pstate->lstate); - - yyset_extra((void*)lstate, yyscanner); - BEGIN(0); - lstate->header_state = STATE_MAIL; - lstate->lineno = 0; - lstate->current_pos = 1; - lstate->condition = 0; - - lstate->is_envelope = 1; - - lstate->message_len = 0; - lstate->buffer_length = 0; - - /* temporary marker variables */ - lstate->body_opaque_start = 0; - lstate->body_start = 0; - lstate->body_end = 0; - lstate->preamble_start = 0; - lstate->preamble_end = 0; - lstate->postamble_start = 0; - lstate->postamble_end = 0; -} - -void -PARSER_setbuffer(const char *string, yyscan_t scanner) -{ - struct lexer_state *lstate = yyget_extra(scanner); - lstate->message_buffer = string; - yy_scan_string(string, scanner); -} - -void -PARSER_setfp(FILE *fp, yyscan_t scanner) -{ - /* looks like a bug in bison 2.2a -- the wrong code is generated for yyset_in !! */ - struct yyguts_t * yyg = (struct yyguts_t*) scanner; - yyg->yyin_r = fp; - - if (0) { - /* This is just to make a compiler warning go away */ - yyunput(0, NULL, scanner); - } -} - -/** - * Counts how many lines a given string represents in the message (in case of - * folded header values, for example, or a message body). - */ -int -count_lines(char *txt) -{ - char *o; - int line; - - line = 0; - - for (o = txt; *o != '\0'; o++) - if (*o == '\n') - line++; - - return line; -} |