Commit f2821cbd8ed1cfbfd55de102cfa1249d4172e7dd

Martin Mitas 2017-07-14T17:10:45

md_analyze_permissive_email_autolink: Make it compatible with CMark-gfm.

diff --git a/md4c/md4c.c b/md4c/md4c.c
index ce7cd8d..5163b53 100644
--- a/md4c/md4c.c
+++ b/md4c/md4c.c
@@ -3381,45 +3381,28 @@ md_analyze_permissive_email_autolink(MD_CTX* ctx, int mark_index)
     MD_MARK* closer;
     OFF beg = opener->beg;
     OFF end = opener->end;
-    int right_dot_count = 0;
+    int dot_count = 0;
 
     MD_ASSERT(CH(beg) == _T('@'));
 
-    /* Accept any alphanumeric sequences delimited with dot before the '@'.
-     * There must be a whitespace or start of line before it. */
-    while(1) {
-        while(beg > 0  &&  ISALNUM(beg-1))
-            beg--;
+    /* Scan for name before '@'. */
+    while(beg > 0  &&  (ISALNUM(beg-1) || ISANYOF(beg-1, _T(".-_+"))))
+        beg--;
 
-        if(beg > 1 && CH(beg-1) == _T('.') && ISALNUM(beg-2))
-            beg -= 2;
-        else if(beg == 0 || ISWHITESPACE(beg-1) || ISNEWLINE(beg-1))
-            break;
-        else
-            return;
+    /* Scan for domain after '@'. */
+    while(end < ctx->size  &&  (ISALNUM(end) || ISANYOF(end, _T(".-_")))) {
+        if(CH(end) == _T('.'))
+            dot_count++;
+        end++;
     }
-
-    /* Accept any alphanumeric sequences delimited with dot after the '@',
-     * limiting the sequences length by 64 characters. */
-    while(1) {
-        OFF label_start = end;
-        while(end + 1 < ctx->size  &&  ISALNUM(end))
-            end++;
-        if(end - label_start > 63)
-            return;
-
-        if(end + 1 < ctx->size && CH(end) == _T('.') && ISALNUM(end+1)) {
-            right_dot_count++;
-            end += 2;
-        } else if(right_dot_count > 0) {
-            /* Although "user@machine" is technically correct e-mail address,
-             * we request at least one dot, as in e.g. "user@machine.com" to
-             * prevent some false positives with this very loose format. */
-            break;
-        } else {
-            return;
-        }
+    if(CH(end-1) == _T('.')) {  /* Final '.' not part of it. */
+        dot_count--;
+        end--;
     }
+    else if(ISANYOF2(end-1, _T('-'), _T('_'))) /* These are forbidden at the end. */
+        return;
+    if(CH(end-1) == _T('@')  ||  dot_count == 0)
+        return;
 
     /* Ok. Lets call it auto-link. Adapt opener and create closer to zero
      * length so all the contents becomes the link text. */
diff --git a/test/permissive-email-autolinks.txt b/test/permissive-email-autolinks.txt
index 7d7f572..12e8786 100644
--- a/test/permissive-email-autolinks.txt
+++ b/test/permissive-email-autolinks.txt
@@ -1,9 +1,10 @@
 
 # Permissive E-mail Autolinks
 
-With the flag `MD_FLAG_PERMISSIVEEMAILAUTOLINKS`, MD4C enables more permissive recognition
-of e-mail addresses and transforms them to autolinks, even if they do not exactly follow
-the syntax of autolink as specified in CommonMark specification.
+With the flag `MD_FLAG_PERMISSIVEEMAILAUTOLINKS`, MD4C enables more permissive
+recognition of e-mail addresses and transforms them to autolinks, even if they
+do not exactly follow the syntax of autolink as specified in CommonMark
+specification.
 
 This is standard CommonMark e-mail autolink:
 
@@ -20,3 +21,30 @@ E-mail: john.doe@gmail.com
 .
 <p>E-mail: <a href="mailto:john.doe@gmail.com">john.doe@gmail.com</a></p>
 ````````````````````````````````
+
+`+` can occur before the `@`, but not after.
+
+```````````````````````````````` example
+hello@mail+xyz.example isn't valid, but hello+xyz@mail.example is.
+.
+<p>hello@mail+xyz.example isn't valid, but <a href="mailto:hello+xyz@mail.example">hello+xyz@mail.example</a> is.</p>
+````````````````````````````````
+
+`.`, `-`, and `_` can occur on both sides of the `@`, but only `.` may occur at
+the end of the email address, in which case it will not be considered part of
+the address:
+
+```````````````````````````````` example
+a.b-c_d@a.b
+
+a.b-c_d@a.b.
+
+a.b-c_d@a.b-
+
+a.b-c_d@a.b_
+.
+<p><a href="mailto:a.b-c_d@a.b">a.b-c_d@a.b</a></p>
+<p><a href="mailto:a.b-c_d@a.b">a.b-c_d@a.b</a>.</p>
+<p>a.b-c_d@a.b-</p>
+<p>a.b-c_d@a.b_</p>
+````````````````````````````````