]> the.earth.li Git - onak.git/commitdiff
Improve HTML escaping
authorJonathan McDowell <noodles@earth.li>
Fri, 22 Sep 2023 18:19:00 +0000 (19:19 +0100)
committerJonathan McDowell <noodles@earth.li>
Fri, 22 Sep 2023 18:30:16 +0000 (19:30 +0100)
Our existing HTML escaping was very primitive, looking only for a single
< and > around an email address. Extend this to cover multiple instance
of <, >, &, ", ' which should cover all the important bits. Also move to
using a caller provided buffer rather than a static buffer.

keyindex.c
keyindex.h
stats.c

index fbd32c710959ebc0ce6c30fa8543e2d92bf9d3a1..3ce87359311a3f71d257875d5d6f69d3c6944954 100644 (file)
@@ -74,51 +74,82 @@ char pkalgo2char(uint8_t algo)
 }
 
 /**
- *     txt2html - Takes a string and converts it to HTML.
- *     @string: The string to HTMLize.
+ *     html_escape - Takes a string and converts it to HTML.
+ *     @src: The string to HTMLize.
+ *     @src_len: The length of the source string
+ *     @dst: A buffer to put the escaped string into
+ *     @dst_len: Length of the destination buffer (including a trailing NULL)
  *
- *     Takes a string and escapes any HTML entities.
+ *     Takes a string and escapes any HTML entities (<, >, &, ", '). Returns
+ *     dst.
  */
-const char *txt2html(const char *string)
+const char *html_escape(const char *src, size_t src_len,
+               char *dst, size_t dst_len)
 {
-       static char buf[1024];
-       char *ptr = NULL;
-       char *nextptr = NULL;
+       size_t in_pos, out_pos;
 
-       if (strlen(string) > 1000) {
-               return string;
-       }
-
-       memset(buf, 0, 1024);
-
-       ptr = strchr(string, '<');
-       if (ptr != NULL) {
-               nextptr = ptr + 1;
-               *ptr = 0;
-               strncpy(buf, string, 1023);
-               strncat(buf, "&lt;", 1023 - strlen(buf));
-               string = nextptr;
-       }
+       dst_len--;
 
-       ptr = strchr(string, '>');
-       if (ptr != NULL) {
-               nextptr = ptr + 1;
-               *ptr = 0;
-               strncat(buf, string, 1023 - strlen(buf));
-               strncat(buf, "&gt;", 1023 - strlen(buf));
-               string = nextptr;
+       for (in_pos = 0, out_pos = 0;
+                       in_pos < src_len && out_pos < (dst_len - 1);
+                       in_pos++, out_pos++) {
+               switch (src[in_pos]) {
+               case '<':
+                       if ((out_pos + 4) >= dst_len) {
+                               break;
+                       }
+                       dst[out_pos++] = '&';
+                       dst[out_pos++] = 'l';
+                       dst[out_pos++] = 't';
+                       dst[out_pos] = ';';
+                       break;
+               case '>':
+                       if ((out_pos + 4) >= dst_len) {
+                               break;
+                       }
+                       dst[out_pos++] = '&';
+                       dst[out_pos++] = 'g';
+                       dst[out_pos++] = 't';
+                       dst[out_pos] = ';';
+                       break;
+               case '"':
+                       if ((out_pos + 6) >= dst_len) {
+                               break;
+                       }
+                       dst[out_pos++] = '&';
+                       dst[out_pos++] = 'q';
+                       dst[out_pos++] = 'u';
+                       dst[out_pos++] = 'o';
+                       dst[out_pos++] = 't';
+                       dst[out_pos] = ';';
+                       break;
+               case '\'':
+                       if ((out_pos + 5) >= dst_len) {
+                               break;
+                       }
+                       dst[out_pos++] = '&';
+                       dst[out_pos++] = '#';
+                       dst[out_pos++] = '3';
+                       dst[out_pos++] = '9';
+                       dst[out_pos] = ';';
+                       break;
+               case '&':
+                       if ((out_pos + 5) >= dst_len) {
+                               break;
+                       }
+                       dst[out_pos++] = '&';
+                       dst[out_pos++] = 'a';
+                       dst[out_pos++] = 'm';
+                       dst[out_pos++] = 'p';
+                       dst[out_pos] = ';';
+                       break;
+               default:
+                       dst[out_pos] = src[in_pos];
+               }
        }
+       dst[out_pos] = 0;
 
-       /*
-        * TODO: We need to while() this really as each entity may appear more
-        * than once. We need to start with & and ; as we replace with those
-        * throughout. Fuck it for the moment though; it's Easter and < & > are
-        * the most common and tend to only appear once.
-        */
-
-       strncat(buf, string, 1023 - strlen(buf));
-
-       return buf;
+       return dst;
 }
 
 /*
@@ -191,6 +222,7 @@ int list_sigs(struct onak_dbctx *dbctx,
        char *uid = NULL;
        uint64_t sigid = 0;
        char *sig = NULL;
+       char buf[1024];
 
        while (sigs != NULL) {
                sigid = sig_keyid(sigs->packet);
@@ -214,7 +246,7 @@ int list_sigs(struct onak_dbctx *dbctx,
                                sigid,
                                sigid,
                                sigid,
-                               txt2html(uid));
+                               html_escape(uid, strlen(uid), buf, sizeof(buf)));
                } else if (html && uid == NULL) {
                        printf("%s         0x%016" PRIX64 "             "
                                "[User id not found]\n",
@@ -250,8 +282,17 @@ int list_uids(struct onak_dbctx *dbctx,
                        snprintf(buf, 1023, "%.*s",
                                (int) uids->packet->length,
                                uids->packet->data);
-                       printf("                                %s\n",
-                               (html) ? txt2html(buf) : buf);
+                       if (html) {
+                               printf("                                %s\n",
+                                       html_escape((char *) uids->packet->data,
+                                               uids->packet->length,
+                                               buf,
+                                               sizeof(buf)));
+                       } else {
+                               printf("                                %.*s\n",
+                                       (int) uids->packet->length,
+                                       uids->packet->data);
+                       }
                } else if (uids->packet->tag == OPENPGP_PACKET_UAT) {
                        printf("                                ");
                        if (html) {
@@ -457,18 +498,22 @@ int key_index(struct onak_dbctx *dbctx,
                curuid = keys->uids;
                if (curuid != NULL &&
                                curuid->packet->tag == OPENPGP_PACKET_UID) {
-                       snprintf(buf, 1023, "%.*s",
-                               (int) curuid->packet->length,
-                               curuid->packet->data);
                        if (html) {
                                printf("<a href=\"lookup?op=vindex&"
-                                       "search=0x%016" PRIX64 "\">",
-                                       keyid);
+                                       "search=0x%016" PRIX64 "\">"
+                                       "%s</a>%s\n",
+                                       keyid,
+                                       html_escape((char *) curuid->packet->data,
+                                               curuid->packet->length,
+                                               buf,
+                                               sizeof(buf)),
+                                       (keys->revoked) ? " *** REVOKED ***" : "");
+                       } else {
+                               printf("%.*s%s\n",
+                                       (int) curuid->packet->length,
+                                       curuid->packet->data,
+                                       (keys->revoked) ? " *** REVOKED ***" : "");
                        }
-                       printf("%s%s%s\n", 
-                               (html) ? txt2html(buf) : buf,
-                               (html) ? "</a>" : "",
-                               (keys->revoked) ? " *** REVOKED ***" : "");
                        if (skshash) {
                                display_skshash(keys, html);
                        }
index c3e78efa17e617cec47299bd6cec57ca6718ad79..961121e973d81d3b527745531eb4d3a22c796cc8 100644 (file)
@@ -49,10 +49,15 @@ int key_index(struct onak_dbctx *dbctx,
 int mrkey_index(struct openpgp_publickey *keys);
 
 /**
- *     txt2html - Takes a string and converts it to HTML.
- *     @string: The string to HTMLize.
+ *     html_escape - Takes a string and converts it to HTML.
+ *     @src: The string to HTMLize.
+ *     @src_len: The length of the source string
+ *     @dst: A buffer to put the escaped string into
+ *     @dst_len: Length of the destination buffer (including a trailing NULL)
  *
- *     Takes a string and escapes any HTML entities.
+ *     Takes a string and escapes any HTML entities (<, >, &, ", '). Returns
+ *     dst.
  */
-const char *txt2html(const char *string);
+const char *html_escape(const char *src, size_t src_len,
+               char *dst, size_t dst_len);
 #endif
diff --git a/stats.c b/stats.c
index 7fa6894d0e576eff7817fb47742ef3ce2d49516f..59b42a574d4315634684d60bb525795d9d8663d1 100644 (file)
--- a/stats.c
+++ b/stats.c
@@ -19,6 +19,7 @@
 #include <inttypes.h>
 #include <stdio.h>
 #include <stdlib.h>
+#include <string.h>
 
 #include "cleanup.h"
 #include "hash.h"
@@ -139,6 +140,7 @@ void dofindpath(struct onak_dbctx *dbctx,
        int rec;
        int pathnum;
        char *uid;
+       char buf[1024];
 
        /*
         * Make sure the keys we have and want are in the cache.
@@ -212,7 +214,8 @@ void dofindpath(struct onak_dbctx *dbctx,
                                                curkey->keyid,
                                                curkey->keyid,
                                                curkey->keyid,
-                                               txt2html(uid),
+                                               html_escape(uid, strlen(uid),
+                                                       buf, sizeof(buf)),
                                                (curkey->keyid == want) ?
                                                "" : " signs");
                                } else {