aboutsummaryrefslogtreecommitdiffstats
path: root/main/minimime/mimeparser.l
diff options
context:
space:
mode:
authorrussell <russell@f38db490-d61c-443f-a65b-d21fe96a405b>2007-04-06 21:16:38 +0000
committerrussell <russell@f38db490-d61c-443f-a65b-d21fe96a405b>2007-04-06 21:16:38 +0000
commitfe453b5ef2007fde913e3ebd31e401f0cdb658eb (patch)
treed1a8725b9d1a7d8508205ad650f4a6ed1de11339 /main/minimime/mimeparser.l
parentfb1d21c4f24ce15bb1a102b6a650fea83e77ecd5 (diff)
Merged revisions 60603 via svnmerge from
https://origsvn.digium.com/svn/asterisk/branches/1.4 ........ r60603 | russell | 2007-04-06 15:58:43 -0500 (Fri, 06 Apr 2007) | 13 lines To be able to achieve the things that we would like to achieve with the Asterisk GUI project, we need a fully functional HTTP interface with access to the Asterisk manager interface. One of the things that was intended to be a part of this system, but was never actually implemented, was the ability for the GUI to be able to upload files to Asterisk. So, this commit adds this in the most minimally invasive way that we could come up with. A lot of work on minimime was done by Steve Murphy. He fixed a lot of bugs in the parser, and updated it to be thread-safe. The ability to check permissions of active manager sessions was added by Dwayne Hubbard. Then, hacking this all together and do doing the modifications necessary to the HTTP interface was done by me. ........ git-svn-id: http://svn.digium.com/svn/asterisk/trunk@60604 f38db490-d61c-443f-a65b-d21fe96a405b
Diffstat (limited to 'main/minimime/mimeparser.l')
-rw-r--r--main/minimime/mimeparser.l483
1 files changed, 483 insertions, 0 deletions
diff --git a/main/minimime/mimeparser.l b/main/minimime/mimeparser.l
new file mode 100644
index 000000000..d45295f13
--- /dev/null
+++ b/main/minimime/mimeparser.l
@@ -0,0 +1,483 @@
+%{
+/*
+ * Copyright (c) 2004 Jann Fischer. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/**
+ * This is a lexer file for parsing MIME compatible messages. It is intended
+ * to satisfy at least RFC 2045 (Format of Internet Message Bodies). It still
+ * has quite a few problems:
+ *
+ * - The parsing could probably be done in a more elegant way
+ * - I don't know what performance impact REJECT has on the parser
+ */
+#include <stdio.h>
+#include <string.h>
+#include <ctype.h>
+#include <errno.h>
+
+#include "mimeparser.h"
+#include "mimeparser.tab.h"
+
+#define NAMEOF(v) #v
+/* BC() is a debug wrapper for lex' BEGIN() macro */
+#define BC(x) do { \
+ struct lexer_state *lstate = yyget_extra(yyscanner); \
+ BEGIN(x); \
+ lstate->condition = x; \
+} while(0);
+
+#define ZERO(x) memset(x, '\0', sizeof(x))
+
+#define PREALLOC_BUFFER 100000
+#undef YY_BUF_SIZE
+#define YY_BUF_SIZE 65536
+
+enum header_states
+{
+ STATE_MAIL = 0,
+ STATE_CTYPE,
+ STATE_CDISP,
+ STATE_CENC,
+ STATE_MIME
+};
+
+
+
+%}
+
+%option reentrant
+%option yylineno
+%option bison-bridge
+
+%s headers
+%s header
+%s headervalue
+%s tspecialvalue
+%s comment
+%s body
+%s postamble
+%s preamble
+%s boundary
+%s endboundary
+%s endoffile
+
+STRING [a-zA-Z0-9\-\.\_]
+TSPECIAL [a-zA-Z0-9)(<>@,;:/\-.=_\+'? ]
+TSPECIAL_LITE [a-zA-Z0-9)(<>@,-._+'?\[\]]
+
+%%
+
+<INITIAL,headers>^[a-zA-Z]+[a-zA-Z0-9\-\_]* {
+ yylval_param->string=strdup(yytext);
+ struct lexer_state *lstate = yyget_extra(yyscanner);
+ lstate->current_pos += yyleng;
+ BC(header);
+
+ /* Depending on what header we are processing, we enter a different
+ * state and return a different value.
+ */
+ if (!strcasecmp(yytext, "Content-Type")) {
+ lstate->header_state = STATE_CTYPE;
+ return CONTENTTYPE_HEADER;
+ } else if (!strcasecmp(yytext, "Content-Transfer-Encoding")) {
+ lstate->header_state = STATE_CENC;
+ return CONTENTENCODING_HEADER;
+ } else if (!strcasecmp(yytext, "Content-Disposition")) {
+ lstate->header_state = STATE_CDISP;
+ return CONTENTDISPOSITION_HEADER;
+ } else if (!strcasecmp(yytext, "MIME-Version")) {
+ lstate->header_state = STATE_MAIL;
+ return MIMEVERSION_HEADER;
+ } else {
+ lstate->header_state = STATE_MAIL;
+ return MAIL_HEADER;
+ }
+}
+
+<INITIAL,headers>. {
+ struct lexer_state *lstate = yyget_extra(yyscanner);
+ /* dprintf2("Unknown header char: %c\n", *yytext); */
+ lstate->current_pos += yyleng;
+ return ANY;
+}
+
+<headers>^(\r\n|\n) {
+ struct lexer_state *lstate = yyget_extra(yyscanner);
+ lstate->lineno++;
+
+ lstate->current_pos += yyleng;
+
+ /* This marks the end of headers. Depending on whether we are in the
+ * envelope currently we need to parse either a body or the preamble
+ * now.
+ */
+ if (lstate->is_envelope == 0 || lstate->boundary_string == NULL) {
+ BC(body);
+ lstate->body_start = lstate->current_pos;
+ } else {
+ lstate->is_envelope = 0;
+ lstate->preamble_start = lstate->current_pos;
+ BC(preamble);
+ }
+
+ return ENDOFHEADERS;
+}
+
+<header>\: {
+ struct lexer_state *lstate = yyget_extra(yyscanner);
+ BC(headervalue);
+ lstate->current_pos += yyleng;
+ return COLON;
+}
+
+<header>(\r\n|\n) {
+ struct lexer_state *lstate = yyget_extra(yyscanner);
+ BC(headers);
+ /* dprintf2("Invalid header, returning EOL\n"); */
+ lstate->current_pos += yyleng;
+ return EOL;
+}
+
+<headervalue>(\n|\r\n)[\ \t]+ {
+ struct lexer_state *lstate = yyget_extra(yyscanner);
+ lstate->current_pos += yyleng;
+}
+
+<headervalue>.+|(.+(\n|\r\n)[\ \t]+.+)+ {
+ struct lexer_state *lstate = yyget_extra(yyscanner);
+ if (lstate->header_state != STATE_MAIL && lstate->header_state != STATE_CENC) {
+ REJECT;
+ }
+ lstate->current_pos += yyleng;
+ while (*yytext && isspace(*yytext)) yytext++;
+ /* Do we actually have a header value? */
+ if (*yytext == '\0') {
+ yylval_param->string = strdup("");
+ } else {
+ yylval_param->string=strdup(yytext);
+ lstate->lineno += count_lines(yytext);
+ }
+ return WORD;
+}
+
+<headervalue,tspecialvalue>(\r\n|\n) {
+ struct lexer_state *lstate = yyget_extra(yyscanner);
+ /* marks the end of one header line */
+ lstate->lineno++;
+ BC(headers);
+ lstate->current_pos += yyleng;
+ return EOL;
+}
+
+<headervalue>;|;(\r\n|\n)[\ \t]+ {
+ struct lexer_state *lstate = yyget_extra(yyscanner);
+ lstate->lineno += count_lines(yytext);
+ lstate->current_pos += yyleng;
+ return SEMICOLON;
+}
+
+<headervalue>\= {
+ struct lexer_state *lstate = yyget_extra(yyscanner);
+ lstate->current_pos += yyleng;
+ return EQUAL;
+}
+
+<headervalue>\" {
+ struct lexer_state *lstate = yyget_extra(yyscanner);
+ BC(tspecialvalue);
+ lstate->current_pos += yyleng;
+ return *yytext;
+}
+
+<headervalue>{STRING}+|{TSPECIAL_LITE}+ {
+ struct lexer_state *lstate = yyget_extra(yyscanner);
+ yylval_param->string=strdup(yytext);
+ lstate->lineno += count_lines(yytext);
+ lstate->current_pos += yyleng;
+ return WORD;
+}
+
+<headervalue>[\ |\t]+ {
+ struct lexer_state *lstate = yyget_extra(yyscanner);
+ lstate->current_pos += yyleng;
+}
+
+<tspecialvalue>{TSPECIAL}+ {
+ struct lexer_state *lstate = yyget_extra(yyscanner);
+ lstate->lineno += count_lines(yytext);
+ yylval_param->string=strdup(yytext);
+ lstate->current_pos += yyleng;
+ return TSPECIAL;
+}
+
+<tspecialvalue>\" {
+ struct lexer_state *lstate = yyget_extra(yyscanner);
+ BC(headervalue);
+ lstate->current_pos += yyleng;
+ return *yytext;
+}
+
+<body>^\-\-{TSPECIAL}+\-\- {
+ struct lexer_state *lstate = yyget_extra(yyscanner);
+ /**
+ * Make sure we only catch matching boundaries, and not other lines
+ * that begin and end with two dashes. If we have catched a valid
+ * end boundary, which actually ends a body, we save the current
+ * position, put the token back on the input stream and let the
+ * endboundary condition parse the actual token.
+ */
+ if (lstate->endboundary_string != NULL) {
+ if (strcmp(lstate->endboundary_string, yytext)) {
+ /* dprintf2("YYTEXT != end_boundary: '%s'\n", yytext); */
+ REJECT;
+ } else {
+ lstate->current_pos += yyleng;
+ /* dprintf2("YYTEXT == lstate->end_boundary: '%s'\n", yytext); */
+ if (lstate->body_start) {
+ yylval_param->position.opaque_start =
+ lstate->body_opaque_start;
+ yylval_param->position.start = lstate->body_start;
+ yylval_param->position.end = lstate->current_pos - yyleng;
+ lstate->body_opaque_start = 0;
+ lstate->body_start = 0;
+ lstate->body_end = 0;
+ yyless(0);
+ BC(endboundary);
+ return BODY;
+ }
+ }
+ } else {
+ }
+
+ REJECT;
+}
+
+<body,preamble>^\-\-{TSPECIAL}+ {
+ struct lexer_state *lstate = yyget_extra(yyscanner);
+ /**
+ * Make sure we only catch matching boundaries, and not other lines
+ * that begin with two dashes.
+ */
+ if (lstate->boundary_string != NULL) {
+ if (strcmp(lstate->boundary_string, yytext)) {
+ /* dprintf2("YYTEXT != boundary: '%s'\n", yytext);*/
+ REJECT;
+ } else {
+ /* dprintf2("YYTEXT == boundary: '%s'\n", yytext);*/
+ if (lstate->body_start) {
+ yylval_param->position.opaque_start = lstate->body_opaque_start;
+ yylval_param->position.start = lstate->body_start;
+ yylval_param->position.end = lstate->current_pos;
+ lstate->body_opaque_start = 0;
+ lstate->body_start = 0;
+ lstate->body_end = 0;
+ yyless(0);
+ BC(boundary);
+ return BODY;
+ } else if (lstate->preamble_start) {
+ yylval_param->position.start = lstate->preamble_start;
+ yylval_param->position.end = lstate->current_pos;
+ lstate->preamble_start = lstate->preamble_end = 0;
+ yyless(0);
+ BC(boundary);
+ return PREAMBLE;
+ } else {
+ BC(boundary);
+ yylval_param->string = strdup(yytext);
+ lstate->current_pos += yyleng;
+ return(BOUNDARY);
+ }
+ }
+ } else {
+ }
+
+ REJECT;
+}
+
+<body>(\r\n|\n) {
+ struct lexer_state *lstate = yyget_extra(yyscanner);
+ lstate->current_pos += yyleng;
+ lstate->lineno++;
+}
+
+<body>\r {
+ struct lexer_state *lstate = yyget_extra(yyscanner);
+ lstate->current_pos += yyleng;
+ /* dprintf2("stray CR in body...\n"); */
+}
+
+<body>[^\r\n]+ {
+ struct lexer_state *lstate = yyget_extra(yyscanner);
+ lstate->current_pos += yyleng;
+}
+
+<body><<EOF>> {
+ struct lexer_state *lstate = yyget_extra(yyscanner);
+ if (lstate->boundary_string == NULL && lstate->body_start) {
+ yylval_param->position.opaque_start = 0;
+ yylval_param->position.start = lstate->body_start;
+ yylval_param->position.end = lstate->current_pos;
+ lstate->body_start = 0;
+ return BODY;
+ } else if (lstate->body_start) {
+ return POSTAMBLE;
+ }
+ yyterminate();
+}
+
+<preamble,postamble>(\r\n|\n) {
+ struct lexer_state *lstate = yyget_extra(yyscanner);
+ /* dprintf2("Preamble CR/LF at line %d\n", lineno); */
+ lstate->lineno++;
+ lstate->current_pos += yyleng;
+}
+
+<boundary>[^\r\n]+ {
+ struct lexer_state *lstate = yyget_extra(yyscanner);
+ yylval_param->string = strdup(yytext);
+ lstate->current_pos += yyleng;
+ return BOUNDARY;
+}
+
+<endboundary>[^\r\n]+ {
+ struct lexer_state *lstate = yyget_extra(yyscanner);
+ yylval_param->string = strdup(yytext);
+ lstate->current_pos += yyleng;
+ return ENDBOUNDARY;
+}
+
+<boundary>(\r\n|\n) {
+ struct lexer_state *lstate = yyget_extra(yyscanner);
+ BC(headers);
+ lstate->lineno++;
+ lstate->current_pos += yyleng;
+ lstate->body_opaque_start = lstate->current_pos;
+ return EOL;
+}
+
+<endboundary>(\r\n|\n) {
+ struct lexer_state *lstate = yyget_extra(yyscanner);
+ BC(postamble);
+ lstate->lineno++;
+ lstate->current_pos += yyleng;
+}
+
+<preamble>. {
+ struct lexer_state *lstate = yyget_extra(yyscanner);
+ lstate->current_pos += yyleng;
+}
+
+
+<postamble>. {
+ struct lexer_state *lstate = yyget_extra(yyscanner);
+ lstate->current_pos += yyleng;
+}
+
+(\r\n|\n) {
+ struct lexer_state *lstate = yyget_extra(yyscanner);
+ lstate->lineno++;
+ lstate->current_pos += yyleng;
+ return EOL;
+}
+
+. {
+ struct lexer_state *lstate = yyget_extra(yyscanner);
+ lstate->current_pos += yyleng;
+ return((int)*yytext);
+}
+
+
+%%
+
+void reset_lexer_state(void *yyscanner, struct parser_state *pstate)
+{
+ struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
+ struct lexer_state *lstate = &(pstate->lstate);
+
+ yyset_extra((void*)lstate, yyscanner);
+ BEGIN(0);
+ lstate->header_state = STATE_MAIL;
+ lstate->lineno = 0;
+ lstate->current_pos = 1;
+ lstate->condition = 0;
+
+ lstate->is_envelope = 1;
+
+ lstate->message_len = 0;
+ lstate->buffer_length = 0;
+
+ /* temporary marker variables */
+ lstate->body_opaque_start = 0;
+ lstate->body_start = 0;
+ lstate->body_end = 0;
+ lstate->preamble_start = 0;
+ lstate->preamble_end = 0;
+ lstate->postamble_start = 0;
+ lstate->postamble_end = 0;
+}
+
+void
+PARSER_setbuffer(char *string, yyscan_t scanner)
+{
+ struct lexer_state *lstate = yyget_extra(scanner);
+ lstate->message_buffer = string;
+ yy_scan_string(string, scanner);
+}
+
+void
+PARSER_setfp(FILE *fp, yyscan_t yyscanner)
+{
+ /* looks like a bug in bison 2.2a -- the wrong code is generated for yyset_in !! */
+ struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
+ yyg->yyin_r = fp;
+
+ if (0) {
+ /* This is just to make a compiler warning go away */
+ yyunput(0, NULL, yyscanner);
+ }
+}
+
+/**
+ * Counts how many lines a given string represents in the message (in case of
+ * folded header values, for example, or a message body).
+ */
+int
+count_lines(char *txt)
+{
+ char *o;
+ int line;
+
+ line = 0;
+
+ for (o = txt; *o != '\0'; o++)
+ if (*o == '\n')
+ line++;
+
+ return line;
+}