Commit 33eaeb61137b671d740d8747f21e7498b9b1bee9

Thomas de Grivel 2024-09-05T20:31:28

httpd: fix html and url escapes

diff --git a/http/http_request.c b/http/http_request.c
index 9a2aed3..ccded27 100644
--- a/http/http_request.c
+++ b/http/http_request.c
@@ -60,6 +60,7 @@ s_tag * http_request_buf_parse (s_tag *req, s_buf *buf)
     err_write_1("\n");
   }
   url_unescape(&url, &tmp_req.url);
+  str_clean(&url);
   if (! buf_read_until_1_into_str(buf, "\r\n", &tmp_req.protocol)) {
     err_puts("http_request_buf_parse: invalid protocol");
     goto restore;
diff --git a/http/url.c b/http/url.c
index 2caf41d..87beb1a 100644
--- a/http/url.c
+++ b/http/url.c
@@ -15,11 +15,14 @@
 
 s_str * url_escape (const s_str *src, s_str *dest)
 {
+  s_buf buf;
+  character c;
   s_str *escapes;
   s_tag escapes_tag;
   s_ident ident;
   s_str s;
   sw size;
+  u8 u;
   ident_init(&ident, sym_1("URL"), sym_1("escapes"));
   if (! ident_get(&ident, &escapes_tag)) {
     err_puts("url_escape: missing URL.escapes");
@@ -40,13 +43,19 @@ s_str * url_escape (const s_str *src, s_str *dest)
     return NULL;
   s = *src;
   while (str_read_character_utf8(&s, &c) > 0) {
+    /*if (c == ' ') { 
+      if (buf_write_u8(&buf, '+') < 0)
+        goto clean;
+    }
+    else*/
     if (str_character_position(escapes, c) >= 0) {
-      if (buf_write_u8(buf, '%') < 0)
+      if (buf_write_u8(&buf, '%') < 0)
         goto clean;
-      if (buf_write_u8_hex(buf, c) < 0)
+      u = c;
+      if (buf_u8_to_hex(&buf, &u) != 2)
         goto clean;
     }
-    else if (buf_write_character_utf8(buf, c) < 0)
+    else if (buf_write_character_utf8(&buf, c) < 0)
       goto clean;
   }
   tag_clean(&escapes_tag);
@@ -62,6 +71,7 @@ s_str * url_escape (const s_str *src, s_str *dest)
 
 sw url_escape_size (const s_str *src)
 {
+  character c;
   s_str *escapes;
   s_tag escapes_tag;
   s_ident ident;
@@ -72,16 +82,19 @@ sw url_escape_size (const s_str *src)
   if (! ident_get(&ident, &escapes_tag)) {
     err_puts("url_escape_size: missing URL.escapes");
     assert(! "url_escape_size: missing URL.escapes");
-    return NULL;
+    return -1;
   }
   if (escapes_tag.type != TAG_STR) {
     err_puts("url_escape_size: URL.escapes is not a Str");
     assert(! "url_escape_size: URL.escapes is not a Str");
-    return NULL;
+    return -1;
   }
   escapes = &escapes_tag.data.str;
   s = *src;
   while ((r = str_read_character_utf8(&s, &c)) > 0) {
+    /*if (c == ' ')
+      result += 1;
+      else*/
     if (str_character_position(escapes, c) >= 0)
       result += 3;
     else
@@ -93,5 +106,66 @@ sw url_escape_size (const s_str *src)
 
 s_str * url_unescape (const s_str *url, s_str *dest)
 {
-  
+  character c;
+  u8 digit[2];
+  s_buf in;
+  s_buf out;
+  sw r;
+  sw size;
+  u8 u;
+  assert(url);
+  assert(dest);
+  if ((size = url_unescape_size(url)) < 0)
+    return NULL;
+  if (! size)
+    return str_init_empty(dest);
+  if (! buf_init_alloc(&out, size))
+    return NULL;
+  buf_init_str_const(&in, url);
+  while ((r = buf_read_character_utf8(&in, &c)) > 0) {
+    if (c == '+') {
+      if (buf_write_u8(&out, ' ') < 0)
+        goto clean;
+    }
+    else if (c == '%') {
+      if ((r = buf_parse_digit_hex(&in, &digit[0])) <= 0)
+        goto ok;
+      if ((r = buf_parse_digit_hex(&in, &digit[1])) <= 0)
+        goto ok;
+      u = digit[0] * 16 + digit[1];
+      if (buf_write_u8(&out, u) < 0)
+        goto clean;
+    }
+    else
+      if (buf_write_character_utf8(&out, c) < 0)
+        goto clean;
+  }
+ ok:
+  buf_to_str(&out, dest);
+  return dest;
+ clean:
+  buf_clean(&out);
+  return NULL;
+}
+
+sw url_unescape_size (const s_str *url)
+{
+  character c;
+  u8 digit[2];
+  s_buf in;
+  sw r;
+  sw result = 0;
+  assert(url);
+  buf_init_str_const(&in, url);
+  while ((r = buf_read_character_utf8(&in, &c)) > 0) {
+    if (c == '%') {
+      if ((r = buf_parse_digit_hex(&in, &digit[0])) <= 0 ||
+          (r = buf_parse_digit_hex(&in, &digit[1])) <= 0)
+        return result;
+      result++;
+    }
+    else
+      result += r;
+  }
+  return result;
 }
diff --git a/http/url.h b/http/url.h
index 738351c..f1ae124 100644
--- a/http/url.h
+++ b/http/url.h
@@ -16,6 +16,8 @@
 #include <libkc3/types.h>
 
 s_str * url_escape (const s_str *src, s_str *dest);
+sw      url_escape_size (const s_str *src);
 s_str * url_unescape (const s_str *url, s_str *dest);
+sw      url_unescape_size (const s_str *url);
 
 #endif /* URL_H */
diff --git a/lib/kc3/0.1/httpd.kc3 b/lib/kc3/0.1/httpd.kc3
index 956d696..21c7344 100644
--- a/lib/kc3/0.1/httpd.kc3
+++ b/lib/kc3/0.1/httpd.kc3
@@ -3,6 +3,7 @@ defmodule HTTPd do
   require Event
   require Facts
   require File
+  require HTML
   require HTTP
   require HTTP.Request
   require HTTP.Response
@@ -11,6 +12,7 @@ defmodule HTTPd do
   require Socket
   require Socket.Buf
   require Str
+  require URL
 
   def root_dir = "./static"
 
@@ -110,7 +112,7 @@ defmodule HTTPd do
   <body>
     <h1>404 Not Found</h1>
     <p>
-      The requested URL #{request.url} was not found on this server.
+      The requested URL #{HTML.escape(inspect(request.url))} was not found on this server.
     </p>
   </body>
 </html>
diff --git a/lib/kc3/0.1/kc3.facts b/lib/kc3/0.1/kc3.facts
index 097b164..56a10da 100644
--- a/lib/kc3/0.1/kc3.facts
+++ b/lib/kc3/0.1/kc3.facts
@@ -291,3 +291,5 @@ add {KC3, :symbol, KC3.offsetof}
 replace {KC3.offsetof, :symbol_value, cfn Uw "kc3_offsetof" (Sym, Sym, Result)}
 add {KC3, :symbol, KC3.to_lisp}
 replace {KC3.to_lisp, :symbol_value, cfn Tag "to_lisp" (Tag, Result)}
+add {KC3, :symbol, KC3.inspect}
+replace {KC3.inspect, :symbol_value, cfn Str "inspect_tag" (Tag, Result)}
diff --git a/lib/kc3/0.1/url.kc3 b/lib/kc3/0.1/url.kc3
index 4183912..07e23b5 100644
--- a/lib/kc3/0.1/url.kc3
+++ b/lib/kc3/0.1/url.kc3
@@ -1,5 +1,9 @@
 defmodule URL do
 
-  def escapes = " <>'\""
+  dlopen(__DIR__ + "http.so")
+
+  def escape = cfn Str "url_escape" (Str, Result)
+
+  def escapes = "%#<>'\\\""
 
 end