From 79425efea806599b611de4e3b25bdc36516e7522 Mon Sep 17 00:00:00 2001 From: Jonathan McDowell Date: Fri, 22 Sep 2023 19:19:00 +0100 Subject: [PATCH] Improve HTML escaping Our existing HTML escaping was very primitive, looking only for a single < and > around an email address. Extend this to cover multiple instance of <, >, &, ", ' which should cover all the important bits. Also move to using a caller provided buffer rather than a static buffer. --- keyindex.c | 145 +++++++++++++++++++++++++++++++++++------------------ keyindex.h | 13 +++-- stats.c | 5 +- 3 files changed, 108 insertions(+), 55 deletions(-) diff --git a/keyindex.c b/keyindex.c index fbd32c7..3ce8735 100644 --- a/keyindex.c +++ b/keyindex.c @@ -74,51 +74,82 @@ char pkalgo2char(uint8_t algo) } /** - * txt2html - Takes a string and converts it to HTML. - * @string: The string to HTMLize. + * html_escape - Takes a string and converts it to HTML. + * @src: The string to HTMLize. + * @src_len: The length of the source string + * @dst: A buffer to put the escaped string into + * @dst_len: Length of the destination buffer (including a trailing NULL) * - * Takes a string and escapes any HTML entities. + * Takes a string and escapes any HTML entities (<, >, &, ", '). Returns + * dst. */ -const char *txt2html(const char *string) +const char *html_escape(const char *src, size_t src_len, + char *dst, size_t dst_len) { - static char buf[1024]; - char *ptr = NULL; - char *nextptr = NULL; + size_t in_pos, out_pos; - if (strlen(string) > 1000) { - return string; - } - - memset(buf, 0, 1024); - - ptr = strchr(string, '<'); - if (ptr != NULL) { - nextptr = ptr + 1; - *ptr = 0; - strncpy(buf, string, 1023); - strncat(buf, "<", 1023 - strlen(buf)); - string = nextptr; - } + dst_len--; - ptr = strchr(string, '>'); - if (ptr != NULL) { - nextptr = ptr + 1; - *ptr = 0; - strncat(buf, string, 1023 - strlen(buf)); - strncat(buf, ">", 1023 - strlen(buf)); - string = nextptr; + for (in_pos = 0, out_pos = 0; + in_pos < src_len && out_pos < (dst_len - 1); + in_pos++, out_pos++) { + switch (src[in_pos]) { + case '<': + if ((out_pos + 4) >= dst_len) { + break; + } + dst[out_pos++] = '&'; + dst[out_pos++] = 'l'; + dst[out_pos++] = 't'; + dst[out_pos] = ';'; + break; + case '>': + if ((out_pos + 4) >= dst_len) { + break; + } + dst[out_pos++] = '&'; + dst[out_pos++] = 'g'; + dst[out_pos++] = 't'; + dst[out_pos] = ';'; + break; + case '"': + if ((out_pos + 6) >= dst_len) { + break; + } + dst[out_pos++] = '&'; + dst[out_pos++] = 'q'; + dst[out_pos++] = 'u'; + dst[out_pos++] = 'o'; + dst[out_pos++] = 't'; + dst[out_pos] = ';'; + break; + case '\'': + if ((out_pos + 5) >= dst_len) { + break; + } + dst[out_pos++] = '&'; + dst[out_pos++] = '#'; + dst[out_pos++] = '3'; + dst[out_pos++] = '9'; + dst[out_pos] = ';'; + break; + case '&': + if ((out_pos + 5) >= dst_len) { + break; + } + dst[out_pos++] = '&'; + dst[out_pos++] = 'a'; + dst[out_pos++] = 'm'; + dst[out_pos++] = 'p'; + dst[out_pos] = ';'; + break; + default: + dst[out_pos] = src[in_pos]; + } } + dst[out_pos] = 0; - /* - * TODO: We need to while() this really as each entity may appear more - * than once. We need to start with & and ; as we replace with those - * throughout. Fuck it for the moment though; it's Easter and < & > are - * the most common and tend to only appear once. - */ - - strncat(buf, string, 1023 - strlen(buf)); - - return buf; + return dst; } /* @@ -191,6 +222,7 @@ int list_sigs(struct onak_dbctx *dbctx, char *uid = NULL; uint64_t sigid = 0; char *sig = NULL; + char buf[1024]; while (sigs != NULL) { sigid = sig_keyid(sigs->packet); @@ -214,7 +246,7 @@ int list_sigs(struct onak_dbctx *dbctx, sigid, sigid, sigid, - txt2html(uid)); + html_escape(uid, strlen(uid), buf, sizeof(buf))); } else if (html && uid == NULL) { printf("%s 0x%016" PRIX64 " " "[User id not found]\n", @@ -250,8 +282,17 @@ int list_uids(struct onak_dbctx *dbctx, snprintf(buf, 1023, "%.*s", (int) uids->packet->length, uids->packet->data); - printf(" %s\n", - (html) ? txt2html(buf) : buf); + if (html) { + printf(" %s\n", + html_escape((char *) uids->packet->data, + uids->packet->length, + buf, + sizeof(buf))); + } else { + printf(" %.*s\n", + (int) uids->packet->length, + uids->packet->data); + } } else if (uids->packet->tag == OPENPGP_PACKET_UAT) { printf(" "); if (html) { @@ -457,18 +498,22 @@ int key_index(struct onak_dbctx *dbctx, curuid = keys->uids; if (curuid != NULL && curuid->packet->tag == OPENPGP_PACKET_UID) { - snprintf(buf, 1023, "%.*s", - (int) curuid->packet->length, - curuid->packet->data); if (html) { printf("", - keyid); + "search=0x%016" PRIX64 "\">" + "%s%s\n", + keyid, + html_escape((char *) curuid->packet->data, + curuid->packet->length, + buf, + sizeof(buf)), + (keys->revoked) ? " *** REVOKED ***" : ""); + } else { + printf("%.*s%s\n", + (int) curuid->packet->length, + curuid->packet->data, + (keys->revoked) ? " *** REVOKED ***" : ""); } - printf("%s%s%s\n", - (html) ? txt2html(buf) : buf, - (html) ? "" : "", - (keys->revoked) ? " *** REVOKED ***" : ""); if (skshash) { display_skshash(keys, html); } diff --git a/keyindex.h b/keyindex.h index c3e78ef..961121e 100644 --- a/keyindex.h +++ b/keyindex.h @@ -49,10 +49,15 @@ int key_index(struct onak_dbctx *dbctx, int mrkey_index(struct openpgp_publickey *keys); /** - * txt2html - Takes a string and converts it to HTML. - * @string: The string to HTMLize. + * html_escape - Takes a string and converts it to HTML. + * @src: The string to HTMLize. + * @src_len: The length of the source string + * @dst: A buffer to put the escaped string into + * @dst_len: Length of the destination buffer (including a trailing NULL) * - * Takes a string and escapes any HTML entities. + * Takes a string and escapes any HTML entities (<, >, &, ", '). Returns + * dst. */ -const char *txt2html(const char *string); +const char *html_escape(const char *src, size_t src_len, + char *dst, size_t dst_len); #endif diff --git a/stats.c b/stats.c index 7fa6894..59b42a5 100644 --- a/stats.c +++ b/stats.c @@ -19,6 +19,7 @@ #include #include #include +#include #include "cleanup.h" #include "hash.h" @@ -139,6 +140,7 @@ void dofindpath(struct onak_dbctx *dbctx, int rec; int pathnum; char *uid; + char buf[1024]; /* * Make sure the keys we have and want are in the cache. @@ -212,7 +214,8 @@ void dofindpath(struct onak_dbctx *dbctx, curkey->keyid, curkey->keyid, curkey->keyid, - txt2html(uid), + html_escape(uid, strlen(uid), + buf, sizeof(buf)), (curkey->keyid == want) ? "" : " signs"); } else { -- 2.39.5