From 437cd5d183be6c7af99f99022908c1e7fee3a99b Mon Sep 17 00:00:00 2001
From: Bron Gondwana <brong@fastmail.fm>
Date: Mon, 23 Mar 2009 14:25:14 +1100
Subject: [PATCH] Pass a pre-utf-8-encoded body to sieve for tests

---
 imap/message.c |  134 ++++++++++++++++++++++++++++++++++----------------------
 imap/message.h |    4 +-
 2 files changed, 83 insertions(+), 55 deletions(-)

diff --git a/imap/message.c b/imap/message.c
index 9e9d208..811125d 100644
--- a/imap/message.c
+++ b/imap/message.c
@@ -140,6 +140,11 @@ struct body {
      */
     struct ibuf cacheheaders;
 
+    /*
+     * decoded body.  Filled in as needed.
+     */
+    char *decoded_body;
+
     /* Message GUID. Only filled in at top level */
     struct message_guid guid;
 };
@@ -177,6 +182,7 @@ static int message_parse_headers P((struct msg *msg,
 				    struct boundary *boundaries));
 static void message_parse_address P((char *hdr, struct address **addrp));
 static void message_parse_encoding P((char *hdr, char **hdrp));
+static void message_parse_charset P((struct body *body, int *encoding, int *charset));
 static void message_parse_string P((char *hdr, char **hdrp));
 static void message_parse_header P((char *hdr, struct ibuf *ibuf));
 static void message_parse_type P((char *hdr, struct body *body));
@@ -477,13 +483,19 @@ static void message_find_part(struct body *body, const char *section,
 	    fatal("body part exceeds size of message file", EC_OSFILE);
 	}
 
+	if (!body->decoded_body) {
+	    int encoding, charset;
+	    message_parse_charset(body, &encoding, &charset);
+	    body->decoded_body = charset_to_utf8(
+		msg_base + body->content_offset, body->content_size,
+		charset, encoding); /* returns a cstring */
+	}
+
 	/* grow the array and add the new part */
 	*parts = xrealloc(*parts, (*n+2)*sizeof(struct bodypart *));
 	(*parts)[*n] = xmalloc(sizeof(struct bodypart));
 	strlcpy((*parts)[*n]->section, section, sizeof((*parts)[*n]->section));
-	(*parts)[*n]->content = msg_base + body->content_offset;
-	(*parts)[*n]->encoding = body->encoding;
-	(*parts)[*n]->size = body->content_size;
+	(*parts)[*n]->decoded_body = body->decoded_body;
 	(*parts)[++(*n)] = NULL;
     }
     else if (!strcmp(body->type, "MULTIPART")) {
@@ -952,7 +964,68 @@ char **hdrp;
 	if (Uislower(*p)) *p = toupper((int) *p);
     }
 }
-	
+
+/* 
+ * parse a charset and encoding out of a body structure
+ */
+static void
+message_parse_charset(struct body *body, int *e_ptr, int *c_ptr)
+{
+    int encoding = ENCODING_NONE;
+    int charset = 0;
+    struct param *param;
+
+    if (body->encoding) {
+	switch (body->encoding[0]) {
+	case '7':
+	case '8':
+	    if (!strcmp(body->encoding+1, "BIT")) 
+		encoding = ENCODING_NONE;
+	    else 
+		encoding = ENCODING_UNKNOWN;
+	    break;
+
+	case 'B':
+	    if (!strcmp(body->encoding, "BASE64")) 
+		encoding = ENCODING_BASE64;
+	    else if (!strcmp(body->encoding, "BINARY"))
+		encoding = ENCODING_NONE;
+	    else 
+		encoding = ENCODING_UNKNOWN;
+	    break;
+
+	case 'Q':
+	    if (!strcmp(body->encoding, "QUOTED-PRINTABLE"))
+		encoding = ENCODING_QP;
+	    else 
+		encoding = ENCODING_UNKNOWN;
+	    break;
+
+	default:
+	    encoding = ENCODING_UNKNOWN;
+	}
+    }
+
+    if (!body->type || !strcmp(body->type, "TEXT")) {
+	for (param = body->params; param; param = param->next) {
+	    if (!strcasecmp(param->attribute, "charset")) {
+		charset = charset_lookupname(param->value);
+		break;
+	    }
+	}
+    }
+    else if (!strcmp(body->type, "MESSAGE")) {
+	if (!strcmp(body->subtype, "RFC822"))
+	    charset = -1;
+	encoding = ENCODING_NONE;
+    }
+    else
+	charset = -1;
+
+    if (e_ptr) *e_ptr = encoding;
+    if (c_ptr) *c_ptr = charset;
+}
+
 /*
  * Parse an uninterpreted header
  */
@@ -2547,56 +2620,10 @@ struct ibuf *ibuf;
 struct body *body;
 {
     int encoding, charset;
-    struct param *param;
-
-    if (!body->encoding) encoding = ENCODING_NONE;
-    else {
-	switch (body->encoding[0]) {
-	case '7':
-	case '8':
-	    if (!strcmp(body->encoding+1, "BIT")) encoding = ENCODING_NONE;
-	    else encoding = ENCODING_UNKNOWN;
-	    break;
 
-	case 'B':
-	    if (!strcmp(body->encoding, "BASE64")) encoding = ENCODING_BASE64;
-	    else if (!strcmp(body->encoding, "BINARY"))
-	      encoding = ENCODING_NONE;
-	    else encoding = ENCODING_UNKNOWN;
-	    break;
-
-	case 'Q':
-	    if (!strcmp(body->encoding, "QUOTED-PRINTABLE"))
-	      encoding = ENCODING_QP;
-	    else encoding = ENCODING_UNKNOWN;
-	    break;
+    message_parse_charset(body, &encoding, &charset);
 
-	default:
-	    encoding = ENCODING_UNKNOWN;
-	}
-    }
-	
-    if (!body->type || !strcmp(body->type, "TEXT")) {
-	charset = 0;		/* Default is us-ascii */
-	for (param = body->params; param; param = param->next) {
-	    if (!strcasecmp(param->attribute, "charset")) {
-		charset = charset_lookupname(param->value);
-		break;
-	    }
-	}
-	message_write_bit32(ibuf, (charset<<16)|encoding);
-    }
-    else if (!strcmp(body->type, "MESSAGE")) {
-	if (!strcmp(body->subtype, "RFC822")) {
-	    message_write_bit32(ibuf, (-1<<16)|ENCODING_NONE);
-	}
-	else {
-	    message_write_bit32(ibuf, (0<<16)|ENCODING_NONE);
-	}
-    }
-    else {
-	message_write_bit32(ibuf, (-1<<16)|encoding);
-    }
+    message_write_bit32(ibuf, (charset<<16)|encoding);
 }
 
 /*
@@ -2814,7 +2841,10 @@ struct body *body;
 	}
 	free(body->subpart);
     }
+
     if (body->cacheheaders.start) {
 	message_ibuf_free(&body->cacheheaders);
     }
+
+    if (body->decoded_body) free(body->decoded_body);
 }
diff --git a/imap/message.h b/imap/message.h
index a08de12..ed14571 100644
--- a/imap/message.h
+++ b/imap/message.h
@@ -82,9 +82,7 @@ struct message_content {
 /* MUST keep this struct sync'd with sieve_bodypart in sieve_interface.h */
 struct bodypart {
     char section[128];
-    const char *content;
-    const char *encoding;
-    unsigned long size;
+    const char *decoded_body;
 };
 
 extern int message_parse_binary_file P((FILE *infile, struct body **body));
-- 
1.5.6.5