From 437cd5d183be6c7af99f99022908c1e7fee3a99b Mon Sep 17 00:00:00 2001 From: Bron Gondwana Date: Mon, 23 Mar 2009 14:25:14 +1100 Subject: [PATCH] Pass a pre-utf-8-encoded body to sieve for tests --- imap/message.c | 134 ++++++++++++++++++++++++++++++++++---------------------- imap/message.h | 4 +- 2 files changed, 83 insertions(+), 55 deletions(-) diff --git a/imap/message.c b/imap/message.c index 9e9d208..811125d 100644 --- a/imap/message.c +++ b/imap/message.c @@ -140,6 +140,11 @@ struct body { */ struct ibuf cacheheaders; + /* + * decoded body. Filled in as needed. + */ + char *decoded_body; + /* Message GUID. Only filled in at top level */ struct message_guid guid; }; @@ -177,6 +182,7 @@ static int message_parse_headers P((struct msg *msg, struct boundary *boundaries)); static void message_parse_address P((char *hdr, struct address **addrp)); static void message_parse_encoding P((char *hdr, char **hdrp)); +static void message_parse_charset P((struct body *body, int *encoding, int *charset)); static void message_parse_string P((char *hdr, char **hdrp)); static void message_parse_header P((char *hdr, struct ibuf *ibuf)); static void message_parse_type P((char *hdr, struct body *body)); @@ -477,13 +483,19 @@ static void message_find_part(struct body *body, const char *section, fatal("body part exceeds size of message file", EC_OSFILE); } + if (!body->decoded_body) { + int encoding, charset; + message_parse_charset(body, &encoding, &charset); + body->decoded_body = charset_to_utf8( + msg_base + body->content_offset, body->content_size, + charset, encoding); /* returns a cstring */ + } + /* grow the array and add the new part */ *parts = xrealloc(*parts, (*n+2)*sizeof(struct bodypart *)); (*parts)[*n] = xmalloc(sizeof(struct bodypart)); strlcpy((*parts)[*n]->section, section, sizeof((*parts)[*n]->section)); - (*parts)[*n]->content = msg_base + body->content_offset; - (*parts)[*n]->encoding = body->encoding; - (*parts)[*n]->size = body->content_size; + (*parts)[*n]->decoded_body = body->decoded_body; (*parts)[++(*n)] = NULL; } else if (!strcmp(body->type, "MULTIPART")) { @@ -952,7 +964,68 @@ char **hdrp; if (Uislower(*p)) *p = toupper((int) *p); } } - + +/* + * parse a charset and encoding out of a body structure + */ +static void +message_parse_charset(struct body *body, int *e_ptr, int *c_ptr) +{ + int encoding = ENCODING_NONE; + int charset = 0; + struct param *param; + + if (body->encoding) { + switch (body->encoding[0]) { + case '7': + case '8': + if (!strcmp(body->encoding+1, "BIT")) + encoding = ENCODING_NONE; + else + encoding = ENCODING_UNKNOWN; + break; + + case 'B': + if (!strcmp(body->encoding, "BASE64")) + encoding = ENCODING_BASE64; + else if (!strcmp(body->encoding, "BINARY")) + encoding = ENCODING_NONE; + else + encoding = ENCODING_UNKNOWN; + break; + + case 'Q': + if (!strcmp(body->encoding, "QUOTED-PRINTABLE")) + encoding = ENCODING_QP; + else + encoding = ENCODING_UNKNOWN; + break; + + default: + encoding = ENCODING_UNKNOWN; + } + } + + if (!body->type || !strcmp(body->type, "TEXT")) { + for (param = body->params; param; param = param->next) { + if (!strcasecmp(param->attribute, "charset")) { + charset = charset_lookupname(param->value); + break; + } + } + } + else if (!strcmp(body->type, "MESSAGE")) { + if (!strcmp(body->subtype, "RFC822")) + charset = -1; + encoding = ENCODING_NONE; + } + else + charset = -1; + + if (e_ptr) *e_ptr = encoding; + if (c_ptr) *c_ptr = charset; +} + /* * Parse an uninterpreted header */ @@ -2547,56 +2620,10 @@ struct ibuf *ibuf; struct body *body; { int encoding, charset; - struct param *param; - - if (!body->encoding) encoding = ENCODING_NONE; - else { - switch (body->encoding[0]) { - case '7': - case '8': - if (!strcmp(body->encoding+1, "BIT")) encoding = ENCODING_NONE; - else encoding = ENCODING_UNKNOWN; - break; - case 'B': - if (!strcmp(body->encoding, "BASE64")) encoding = ENCODING_BASE64; - else if (!strcmp(body->encoding, "BINARY")) - encoding = ENCODING_NONE; - else encoding = ENCODING_UNKNOWN; - break; - - case 'Q': - if (!strcmp(body->encoding, "QUOTED-PRINTABLE")) - encoding = ENCODING_QP; - else encoding = ENCODING_UNKNOWN; - break; + message_parse_charset(body, &encoding, &charset); - default: - encoding = ENCODING_UNKNOWN; - } - } - - if (!body->type || !strcmp(body->type, "TEXT")) { - charset = 0; /* Default is us-ascii */ - for (param = body->params; param; param = param->next) { - if (!strcasecmp(param->attribute, "charset")) { - charset = charset_lookupname(param->value); - break; - } - } - message_write_bit32(ibuf, (charset<<16)|encoding); - } - else if (!strcmp(body->type, "MESSAGE")) { - if (!strcmp(body->subtype, "RFC822")) { - message_write_bit32(ibuf, (-1<<16)|ENCODING_NONE); - } - else { - message_write_bit32(ibuf, (0<<16)|ENCODING_NONE); - } - } - else { - message_write_bit32(ibuf, (-1<<16)|encoding); - } + message_write_bit32(ibuf, (charset<<16)|encoding); } /* @@ -2814,7 +2841,10 @@ struct body *body; } free(body->subpart); } + if (body->cacheheaders.start) { message_ibuf_free(&body->cacheheaders); } + + if (body->decoded_body) free(body->decoded_body); } diff --git a/imap/message.h b/imap/message.h index a08de12..ed14571 100644 --- a/imap/message.h +++ b/imap/message.h @@ -82,9 +82,7 @@ struct message_content { /* MUST keep this struct sync'd with sieve_bodypart in sieve_interface.h */ struct bodypart { char section[128]; - const char *content; - const char *encoding; - unsigned long size; + const char *decoded_body; }; extern int message_parse_binary_file P((FILE *infile, struct body **body)); -- 1.5.6.5