aboutsummaryrefslogtreecommitdiffstats
path: root/main/minimime/mimeparser.l
diff options
context:
space:
mode:
Diffstat (limited to 'main/minimime/mimeparser.l')
-rw-r--r--main/minimime/mimeparser.l484
1 files changed, 0 insertions, 484 deletions
diff --git a/main/minimime/mimeparser.l b/main/minimime/mimeparser.l
deleted file mode 100644
index 19d42cf3a..000000000
--- a/main/minimime/mimeparser.l
+++ /dev/null
@@ -1,484 +0,0 @@
-%{
-/*
- * Copyright (c) 2004 Jann Fischer. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. Neither the name of the University nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-/**
- * This is a lexer file for parsing MIME compatible messages. It is intended
- * to satisfy at least RFC 2045 (Format of Internet Message Bodies). It still
- * has quite a few problems:
- *
- * - The parsing could probably be done in a more elegant way
- * - I don't know what performance impact REJECT has on the parser
- */
-#include <stdio.h>
-#include <string.h>
-#include <ctype.h>
-#include <errno.h>
-
-#include "mimeparser.h"
-#include "mimeparser.tab.h"
-
-#define NAMEOF(v) #v
-/* BC() is a debug wrapper for lex' BEGIN() macro */
-#define BC(x) do { \
- struct lexer_state *lstate = yyget_extra(yyscanner); \
- BEGIN(x); \
- lstate->condition = x; \
-} while(0);
-
-#define ZERO(x) memset(x, '\0', sizeof(x))
-
-#define PREALLOC_BUFFER 100000
-#undef YY_BUF_SIZE
-#define YY_BUF_SIZE 65536
-
-enum header_states
-{
- STATE_MAIL = 0,
- STATE_CTYPE,
- STATE_CDISP,
- STATE_CENC,
- STATE_MIME
-};
-
-
-
-%}
-
-%option reentrant
-%option yylineno
-%option bison-bridge
-
-%s headers
-%s header
-%s headervalue
-%s tspecialvalue
-%s comment
-%s body
-%s postamble
-%s preamble
-%s boundary
-%s endboundary
-%s endoffile
-
-STRING [a-zA-Z0-9\-\.\_]
-TSPECIAL [a-zA-Z0-9)(<>@,;:/\-.=_\+'? ]
-TSPECIAL_LITE [a-zA-Z0-9)(<>@,-._+'?\[\]]
-
-%%
-
-<INITIAL,headers>^[a-zA-Z]+[a-zA-Z0-9\-\_]* {
- struct lexer_state *lstate = yyget_extra(yyscanner);
-
- yylval_param->string=strdup(yytext);
- lstate->current_pos += yyleng;
- BC(header);
-
- /* Depending on what header we are processing, we enter a different
- * state and return a different value.
- */
- if (!strcasecmp(yytext, "Content-Type")) {
- lstate->header_state = STATE_CTYPE;
- return CONTENTTYPE_HEADER;
- } else if (!strcasecmp(yytext, "Content-Transfer-Encoding")) {
- lstate->header_state = STATE_CENC;
- return CONTENTENCODING_HEADER;
- } else if (!strcasecmp(yytext, "Content-Disposition")) {
- lstate->header_state = STATE_CDISP;
- return CONTENTDISPOSITION_HEADER;
- } else if (!strcasecmp(yytext, "MIME-Version")) {
- lstate->header_state = STATE_MAIL;
- return MIMEVERSION_HEADER;
- } else {
- lstate->header_state = STATE_MAIL;
- return MAIL_HEADER;
- }
-}
-
-<INITIAL,headers>. {
- struct lexer_state *lstate = yyget_extra(yyscanner);
- /* dprintf2("Unknown header char: %c\n", *yytext); */
- lstate->current_pos += yyleng;
- return ANY;
-}
-
-<headers>^(\r\n|\n) {
- struct lexer_state *lstate = yyget_extra(yyscanner);
- lstate->lineno++;
-
- lstate->current_pos += yyleng;
-
- /* This marks the end of headers. Depending on whether we are in the
- * envelope currently we need to parse either a body or the preamble
- * now.
- */
- if (lstate->is_envelope == 0 || lstate->boundary_string == NULL) {
- BC(body);
- lstate->body_start = lstate->current_pos;
- } else {
- lstate->is_envelope = 0;
- lstate->preamble_start = lstate->current_pos;
- BC(preamble);
- }
-
- return ENDOFHEADERS;
-}
-
-<header>\: {
- struct lexer_state *lstate = yyget_extra(yyscanner);
- BC(headervalue);
- lstate->current_pos += yyleng;
- return COLON;
-}
-
-<header>(\r\n|\n) {
- struct lexer_state *lstate = yyget_extra(yyscanner);
- BC(headers);
- /* dprintf2("Invalid header, returning EOL\n"); */
- lstate->current_pos += yyleng;
- return EOL;
-}
-
-<headervalue>(\n|\r\n)[\ \t]+ {
- struct lexer_state *lstate = yyget_extra(yyscanner);
- lstate->current_pos += yyleng;
-}
-
-<headervalue>.+|(.+(\n|\r\n)[\ \t]+.+)+ {
- struct lexer_state *lstate = yyget_extra(yyscanner);
- if (lstate->header_state != STATE_MAIL && lstate->header_state != STATE_CENC) {
- REJECT;
- }
- lstate->current_pos += yyleng;
- while (*yytext && isspace(*yytext)) yytext++;
- /* Do we actually have a header value? */
- if (*yytext == '\0') {
- yylval_param->string = strdup("");
- } else {
- yylval_param->string=strdup(yytext);
- lstate->lineno += count_lines(yytext);
- }
- return WORD;
-}
-
-<headervalue,tspecialvalue>(\r\n|\n) {
- struct lexer_state *lstate = yyget_extra(yyscanner);
- /* marks the end of one header line */
- lstate->lineno++;
- BC(headers);
- lstate->current_pos += yyleng;
- return EOL;
-}
-
-<headervalue>;|;(\r\n|\n)[\ \t]+ {
- struct lexer_state *lstate = yyget_extra(yyscanner);
- lstate->lineno += count_lines(yytext);
- lstate->current_pos += yyleng;
- return SEMICOLON;
-}
-
-<headervalue>\= {
- struct lexer_state *lstate = yyget_extra(yyscanner);
- lstate->current_pos += yyleng;
- return EQUAL;
-}
-
-<headervalue>\" {
- struct lexer_state *lstate = yyget_extra(yyscanner);
- BC(tspecialvalue);
- lstate->current_pos += yyleng;
- return *yytext;
-}
-
-<headervalue>{STRING}+|{TSPECIAL_LITE}+ {
- struct lexer_state *lstate = yyget_extra(yyscanner);
- yylval_param->string=strdup(yytext);
- lstate->lineno += count_lines(yytext);
- lstate->current_pos += yyleng;
- return WORD;
-}
-
-<headervalue>[\ |\t]+ {
- struct lexer_state *lstate = yyget_extra(yyscanner);
- lstate->current_pos += yyleng;
-}
-
-<tspecialvalue>{TSPECIAL}+ {
- struct lexer_state *lstate = yyget_extra(yyscanner);
- lstate->lineno += count_lines(yytext);
- yylval_param->string=strdup(yytext);
- lstate->current_pos += yyleng;
- return TSPECIAL;
-}
-
-<tspecialvalue>\" {
- struct lexer_state *lstate = yyget_extra(yyscanner);
- BC(headervalue);
- lstate->current_pos += yyleng;
- return *yytext;
-}
-
-<body>^\-\-{TSPECIAL}+\-\- {
- struct lexer_state *lstate = yyget_extra(yyscanner);
- /**
- * Make sure we only catch matching boundaries, and not other lines
- * that begin and end with two dashes. If we have catched a valid
- * end boundary, which actually ends a body, we save the current
- * position, put the token back on the input stream and let the
- * endboundary condition parse the actual token.
- */
- if (lstate->endboundary_string != NULL) {
- if (strcmp(lstate->endboundary_string, yytext)) {
- /* dprintf2("YYTEXT != end_boundary: '%s'\n", yytext); */
- REJECT;
- } else {
- lstate->current_pos += yyleng;
- /* dprintf2("YYTEXT == lstate->end_boundary: '%s'\n", yytext); */
- if (lstate->body_start) {
- yylval_param->position.opaque_start =
- lstate->body_opaque_start;
- yylval_param->position.start = lstate->body_start;
- yylval_param->position.end = lstate->current_pos - yyleng;
- lstate->body_opaque_start = 0;
- lstate->body_start = 0;
- lstate->body_end = 0;
- yyless(0);
- BC(endboundary);
- return BODY;
- }
- }
- } else {
- }
-
- REJECT;
-}
-
-<body,preamble>^\-\-{TSPECIAL}+ {
- struct lexer_state *lstate = yyget_extra(yyscanner);
- /**
- * Make sure we only catch matching boundaries, and not other lines
- * that begin with two dashes.
- */
- if (lstate->boundary_string != NULL) {
- if (strcmp(lstate->boundary_string, yytext)) {
- /* dprintf2("YYTEXT != boundary: '%s'\n", yytext);*/
- REJECT;
- } else {
- /* dprintf2("YYTEXT == boundary: '%s'\n", yytext);*/
- if (lstate->body_start) {
- yylval_param->position.opaque_start = lstate->body_opaque_start;
- yylval_param->position.start = lstate->body_start;
- yylval_param->position.end = lstate->current_pos;
- lstate->body_opaque_start = 0;
- lstate->body_start = 0;
- lstate->body_end = 0;
- yyless(0);
- BC(boundary);
- return BODY;
- } else if (lstate->preamble_start) {
- yylval_param->position.start = lstate->preamble_start;
- yylval_param->position.end = lstate->current_pos;
- lstate->preamble_start = lstate->preamble_end = 0;
- yyless(0);
- BC(boundary);
- return PREAMBLE;
- } else {
- BC(boundary);
- yylval_param->string = strdup(yytext);
- lstate->current_pos += yyleng;
- return(BOUNDARY);
- }
- }
- } else {
- }
-
- REJECT;
-}
-
-<body>(\r\n|\n) {
- struct lexer_state *lstate = yyget_extra(yyscanner);
- lstate->current_pos += yyleng;
- lstate->lineno++;
-}
-
-<body>\r {
- struct lexer_state *lstate = yyget_extra(yyscanner);
- lstate->current_pos += yyleng;
- /* dprintf2("stray CR in body...\n"); */
-}
-
-<body>[^\r\n]+ {
- struct lexer_state *lstate = yyget_extra(yyscanner);
- lstate->current_pos += yyleng;
-}
-
-<body><<EOF>> {
- struct lexer_state *lstate = yyget_extra(yyscanner);
- if (lstate->boundary_string == NULL && lstate->body_start) {
- yylval_param->position.opaque_start = 0;
- yylval_param->position.start = lstate->body_start;
- yylval_param->position.end = lstate->current_pos;
- lstate->body_start = 0;
- return BODY;
- } else if (lstate->body_start) {
- return POSTAMBLE;
- }
- yyterminate();
-}
-
-<preamble,postamble>(\r\n|\n) {
- struct lexer_state *lstate = yyget_extra(yyscanner);
- /* dprintf2("Preamble CR/LF at line %d\n", lineno); */
- lstate->lineno++;
- lstate->current_pos += yyleng;
-}
-
-<boundary>[^\r\n]+ {
- struct lexer_state *lstate = yyget_extra(yyscanner);
- yylval_param->string = strdup(yytext);
- lstate->current_pos += yyleng;
- return BOUNDARY;
-}
-
-<endboundary>[^\r\n]+ {
- struct lexer_state *lstate = yyget_extra(yyscanner);
- yylval_param->string = strdup(yytext);
- lstate->current_pos += yyleng;
- return ENDBOUNDARY;
-}
-
-<boundary>(\r\n|\n) {
- struct lexer_state *lstate = yyget_extra(yyscanner);
- BC(headers);
- lstate->lineno++;
- lstate->current_pos += yyleng;
- lstate->body_opaque_start = lstate->current_pos;
- return EOL;
-}
-
-<endboundary>(\r\n|\n) {
- struct lexer_state *lstate = yyget_extra(yyscanner);
- BC(postamble);
- lstate->lineno++;
- lstate->current_pos += yyleng;
-}
-
-<preamble>. {
- struct lexer_state *lstate = yyget_extra(yyscanner);
- lstate->current_pos += yyleng;
-}
-
-
-<postamble>. {
- struct lexer_state *lstate = yyget_extra(yyscanner);
- lstate->current_pos += yyleng;
-}
-
-(\r\n|\n) {
- struct lexer_state *lstate = yyget_extra(yyscanner);
- lstate->lineno++;
- lstate->current_pos += yyleng;
- return EOL;
-}
-
-. {
- struct lexer_state *lstate = yyget_extra(yyscanner);
- lstate->current_pos += yyleng;
- return((int)*yytext);
-}
-
-
-%%
-
-void reset_lexer_state(void *yyscanner, struct parser_state *pstate)
-{
- struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
- struct lexer_state *lstate = &(pstate->lstate);
-
- yyset_extra((void*)lstate, yyscanner);
- BEGIN(0);
- lstate->header_state = STATE_MAIL;
- lstate->lineno = 0;
- lstate->current_pos = 1;
- lstate->condition = 0;
-
- lstate->is_envelope = 1;
-
- lstate->message_len = 0;
- lstate->buffer_length = 0;
-
- /* temporary marker variables */
- lstate->body_opaque_start = 0;
- lstate->body_start = 0;
- lstate->body_end = 0;
- lstate->preamble_start = 0;
- lstate->preamble_end = 0;
- lstate->postamble_start = 0;
- lstate->postamble_end = 0;
-}
-
-void
-PARSER_setbuffer(const char *string, yyscan_t scanner)
-{
- struct lexer_state *lstate = yyget_extra(scanner);
- lstate->message_buffer = string;
- yy_scan_string(string, scanner);
-}
-
-void
-PARSER_setfp(FILE *fp, yyscan_t scanner)
-{
- /* looks like a bug in bison 2.2a -- the wrong code is generated for yyset_in !! */
- struct yyguts_t * yyg = (struct yyguts_t*) scanner;
- yyg->yyin_r = fp;
-
- if (0) {
- /* This is just to make a compiler warning go away */
- yyunput(0, NULL, scanner);
- }
-}
-
-/**
- * Counts how many lines a given string represents in the message (in case of
- * folded header values, for example, or a message body).
- */
-int
-count_lines(char *txt)
-{
- char *o;
- int line;
-
- line = 0;
-
- for (o = txt; *o != '\0'; o++)
- if (*o == '\n')
- line++;
-
- return line;
-}