From 321a942995fed6133298c1ca6779cfca9eb0d065 Mon Sep 17 00:00:00 2001 From: Aleksey Veresov Date: Wed, 18 Mar 2020 00:24:15 +0300 Subject: [magi] Multipart parser rewritten. --- src/multipart.c | 166 +++++++++++++++++++++++--------------------------------- 1 file changed, 69 insertions(+), 97 deletions(-) (limited to 'src/multipart.c') diff --git a/src/multipart.c b/src/multipart.c index ef988b1..2284ced 100644 --- a/src/multipart.c +++ b/src/multipart.c @@ -32,18 +32,10 @@ static int is_str_token(char *str) /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * Automata for multipart/form-data */ -typedef enum st { - st_error = 0, - st_begin, - st_pname_pre, - st_pname, - st_pname_end, - st_pdata, - st_data, - st_end -} st; - -typedef struct automata { +typedef struct automata automata; +typedef void (*state)(automata *a, char c); +struct automata { + state s; magi_request *request; magi_file file; magi_param param; @@ -59,7 +51,7 @@ typedef struct automata { int is_CR_readed; int is_quoted; int readed; -} automata; +}; /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * @@ -232,7 +224,8 @@ static void apply_callback(automata *a) } } -static st data_add_act(automata *a, char c, char **dest, int *len, int *size) +static void state_pname_pre(automata *a, char c); +static void data_add_act(automata *a, char c, char **dest, int *len, int *size) { int pos = a->boundary_pos; for (a->boundary_pos = 0; a->boundary_pos < pos; ++a->boundary_pos) { @@ -249,24 +242,22 @@ static st data_add_act(automata *a, char c, char **dest, int *len, int *size) if (sepget(a) != c) { magi_str_add(dest, len, size, c); apply_callback(a); - return st_data; } else { a->boundary_pos++; if (a->boundary_pos == seplen(a)) { param_end(a); - return st_pname_pre; + a->s = state_pname_pre; } - return st_data; } } -static st data_add(automata *a, char c) +static void data_add(automata *a, char c) { if (a->file.filename) { int max = a->request->callback.addon_max + 1; - return data_add_act(a, c, &a->buf, &a->buf_size, &max); + data_add_act(a, c, &a->buf, &a->buf_size, &max); } else { - return data_add_act(a, c, &a->param.data, &a->len, &a->size); + data_add_act(a, c, &a->param.data, &a->len, &a->size); } } @@ -274,116 +265,113 @@ static st data_add(automata *a, char c) /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * State analysers */ -static st parse_begin(automata *a, char c) +static void state_begin(automata *a, char c) { if (sepget(a) != c) { /* 'c' is not wanted character from separator; */ a->boundary_pos = 0; /* so nullify progress in reading separator. */ } else { a->boundary_pos++; if (a->boundary_pos == seplen(a)) { - return st_pname_pre; /* Separator is completed, so move on. */ + a->s = state_pname_pre; /* Separator is completed, so move on. */ } } - return st_begin; } -static st parse_pname_pre(automata *a, char c) +static void state_data(automata *a, char c); +static void state_pname(automata *a, char c); +static void state_pname_pre(automata *a, char c) { if (a->is_CR_readed) { - if (c != '\n') { - return st_error; + if (c == '\n') { + a->is_CR_readed = 0; + a->boundary_pos = 0; + a->s = state_data; + } else { + a->s = 0; } - a->is_CR_readed = 0; - a->boundary_pos = 0; - return st_data; } else if (c == '\r') { a->is_CR_readed = 1; - return st_pname_pre; - } - if (!is_token(c)) { - return st_error; + } else if (is_token(c)) { + a->s = state_pname; + magi_str_add(&a->subparam.name, &a->len, &a->size, c); + } else { + a->s = 0; } - magi_str_add(&a->subparam.name, &a->len, &a->size, c); - return st_pname; } -static st parse_pname(automata *a, char c) +static void state_pdata(automata *a, char c); +static void state_pname_end(automata *a, char c); +static void state_pname(automata *a, char c) { if (c == ':') { a->len = 0; a->size = 1; - return st_pdata; + a->s = state_pdata; } else if (c == ' ' || c == '\t') { - return st_pname_end; - } - if (!is_token(c)) { - return st_error; + a->s = state_pname_end; + } else if (is_token(c)) { + magi_str_add(&a->subparam.name, &a->len, &a->size, c); + } else { + a->s = 0; } - magi_str_add(&a->subparam.name, &a->len, &a->size, c); - return st_pname; } -static st parse_pname_end(automata *a, char c) +static void state_pname_end(automata *a, char c) { if (c == ':') { a->len = 0; a->size = 1; - return st_pdata; - } else if (c == ' ' || c == '\t') { - return st_pname_end; + a->s = state_pdata; + } else if (c != ' ' && c != '\t') { + a->s = 0; } - return st_error; } -static st parse_pdata(automata *a, char c) +static void state_pdata(automata *a, char c) { if (a->is_CR_readed) { a->is_CR_readed = 0; if (c == '\n') { - if (subparam_end(a)) { - return st_pname_pre; - } - return st_error; + a->s = subparam_end(a) ? state_pname_pre : 0; + } else { + magi_str_add(&a->subparam.data, &a->len, &a->size, '\r'); + magi_str_add(&a->subparam.data, &a->len, &a->size, c); } - magi_str_add(&a->subparam.data, &a->len, &a->size, '\r'); - magi_str_add(&a->subparam.data, &a->len, &a->size, c); - return st_pdata; } else if (c == '\r') { a->is_CR_readed = 1; - return st_pdata; + } else { + magi_str_add(&a->subparam.data, &a->len, &a->size, c); } - magi_str_add(&a->subparam.data, &a->len, &a->size, c); - return st_pdata; } -static st parse_data(automata *a, char c) +static void state_end(automata *a, char c); +static void state_data(automata *a, char c) { if (a->is_end_suspected) { if (endget(a) != c) { - return data_add(a, c); - } - a->boundary_pos++; - if (a->boundary_pos == endlen(a)) { - param_end(a); - return st_end; + data_add(a, c); + } else { + a->boundary_pos++; + if (a->boundary_pos == endlen(a)) { + param_end(a); + a->s = state_end; + } } - return st_data; } else if (sepget(a) == c) { a->boundary_pos++; if (a->boundary_pos == seplen(a)) { param_end(a); - return st_pname_pre; + a->s = state_pname_pre; } - return st_data; } else if ((a->boundary_pos == seplen(a) - 2) && endget(a) == c) { a->is_end_suspected = 1; a->boundary_pos++; - return st_data; + } else { + data_add(a, c); } - return data_add(a, c); } -static st parse_end() { return st_end; } +static void state_end(automata *a, char c) { (void)a; (void)c; } /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * @@ -393,33 +381,19 @@ static void run_automata(automata *a, int (*next)(void *next_userdata), void *next_userdata) { - st state = st_begin; int c = next(next_userdata); int maxr = a->request->limits.params_body; - while (state && state != st_end && c != EOF && - (!maxr || a->readed != maxr)) { - switch (state) { - case st_begin: state = parse_begin(a, c); break; - case st_pname_pre: state = parse_pname_pre(a, c); break; - case st_pname: state = parse_pname(a, c); break; - case st_pname_end: state = parse_pname_end(a, c); break; - case st_pdata: state = parse_pdata(a, c); break; - case st_data: state = parse_data(a, c); break; - case st_end: state = parse_end(); break; - default: break; - } - c = next(next_userdata); + for (a->s = state_begin; + a->s && a->s != state_end && c != EOF && (!maxr || a->readed != maxr); + c = next(next_userdata)) { + a->s(a, c); ++a->readed; } if (maxr && a->readed == maxr) { a->request->error = magi_error_limit; - return; - } - if (state == st_data && is_semiend(a)) { - state = st_end; + } else if (a->s == state_data && is_semiend(a)) { param_end(a); - } - if (state != st_end) { + } else if (a->s != state_end) { a->request->error = magi_error_multipart; free(a->subparam.name); free(a->subparam.data); @@ -436,14 +410,12 @@ void magi_parse_multipart(magi_request *request, void *next_userdata) { automata a = { - 0, { 0, 0, 0 }, { 0, 0 }, { 0, 0 }, 0, 0, 1, 0, 0, 2, 0, 0, 0, 0, 0 + 0, 0, { 0, 0, 0 }, { 0, 0 }, { 0, 0 }, 0, 0, 1, 0, 0, 2, 0, 0, 0, 0, 0 }; a.request = request; a.boundary = boundary; a.boundary_len = strlen(boundary); a.buf = malloc(request->callback.addon_max + 1); - if (a.buf) { - run_automata(&a, next, next_userdata); - free(a.buf); - } + run_automata(&a, next, next_userdata); + free(a.buf); } -- cgit v1.2.3