XMPP/JID validation

Copyright 2009 Patrick Georgi  Licensed under ISC-L, which is compatible with everything else that keeps the copyright notice intact. <?php $jids=array( "a", "a@b", "a@@b", "a.@b", "a @b", "a@b/c", "a/b", "a@b/c/d", "a@b/c@d", "a@b/c@d/e" );

/* arrays must be sorted */ $forbidden_for_node_and_resource=array( // C.2.1 "\x00", "\x01", "\x02", "\x03", "\x04", "\x05", "\x06", "\x07", "\x08", "\x09", "\x0a", "\x0b", "\x0c", "\x0d", "\x0e", "\x0f", "\x10", "\x11", "\x12", "\x13", "\x14", "\x15", "\x16", "\x17", "\x18", "\x19", "\x1a", "\x1b", "\x1c", "\x1d", "\x1e", "\x1f", // C.1.1 "\x20", // C.2.1 "\x7f", // C.2.2 "\x80", "\x81", "\x82", "\x83", "\x84", "\x85", "\x86", "\x87", "\x88", "\x89", "\x8a", "\x8b", "\x8c", "\x8d", "\x8e", "\x8f", "\x90", "\x91", "\x92", "\x93", "\x94", "\x95", "\x96", "\x97", "\x98", "\x99", "\x9a", "\x9b", "\x9c", "\x9d", "\x9e", "\x9f", // C.1.2 "\xa0" );

$forbidden_for_node=array( "\x22", "\x26", "\x27", "\x2f", "\x3a", "\x3c", "\x3e", "\x40" );

function validatejid($jid) { global $forbidden_for_node_and_resource, $forbidden_for_node; $parts=explode("/", $jid, 2); $resource=$parts[1];

$node=explode("@", $parts[0]); if ((count($node)>2) || (count($node)==0)) return false; if (count($node)==1) { $server=$node[0]; $node=""; } else { $server=$node[1]; $node=$node[0]; }       $n=str_split($node); sort($n); $idx=0; foreach ($n as $c) { while (($idx < count($forbidden_for_node_and_resource)) && ($forbidden_for_node_and_resource[$idx]<$c)) $idx++; if ($forbidden_for_node_and_resource[$idx]==$c) return false; if ($idx > count($forbidden_for_node_and_resource)) break; }       $idx=0; foreach ($n as $c) { while (($idx < count($forbidden_for_node)) && ($forbidden_for_node[$idx]<$c)) $idx++; if ($forbidden_for_node[$idx]==$c) return false; if ($idx >= count($forbidden_for_node)) break; }       $r=str_split($resource); sort($r); $idx=0; foreach ($r as $c) { while (($idx < count($forbidden_for_node_and_resource)) && ($forbidden_for_node_and_resource[$idx]<$c)) $idx++; if ($forbidden_for_node_and_resource[$idx]==$c) return false; if ($idx > count($forbidden_for_node_and_resource)) break; }       return true; } foreach ($jids as $jid) { print validatejid($jid)." ".$jid."\n"; } ?> Same test using preg, and full unicode tables function validatejid2($jid) { /* the following definitions come from stringprep, Appendix C,          which is used in its entirety by nodeprop, Chapter 5, "Prohibited Output" */ /* C1.1 ASCII space characters */ $chars .= "\x{20}"; /* C1.2 Non-ASCII space characters */ $chars .= "\x{a0}\x{1680}\x{2000}-\x{200b}\x{202f}\x{205f}\x{3000a}"; /* C2.1 ASCII control characters */ $chars .= "\x{00}-\x{1f}\x{7f}"; /* C2.2 Non-ASCII control characters */ $chars .= "\x{80}-\x{9f}\x{6dd}\x{70f}\x{180e}\x{200c}\x{200d}\x{2028}\x{2029}\x{2060}-\x{2063}\x{206a}-\x{206f}\x{feff}\x{fff9}-\x{fffc}\x{1d173}-\x{1d17a}"; /* C3 - Private Use */ $chars .= "\x{e000}-\x{f8ff}\x{f0000}-\x{ffffd}\x{100000}-\x{10fffd}"; /* C4 - Non-character code points */ $chars .= "\x{fdd0}-\x{fdef}\x{fffe}\x{ffff}\x{1fffe}\x{1ffff}\x{2fffe}\x{2ffff}\x{3fffe}\x{3ffff}\x{4fffe}\x{4ffff}\x{5fffe}\x{5ffff}\x{6fffe}\x{6ffff}\x{7fffe}\x{7ffff}\x{8fffe}\x{8ffff}\x{9fffe}\x{9ffff}\x{afffe}\x{affff}\x{bfffe}\x{bffff}\x{cfffe}\x{cffff}\x{dfffe}\x{dffff}\x{efffe}\x{effff}\x{ffffe}\x{fffff}\x{10fffe}\x{10ffff}"; /* C5 - Surrogate codes */ $chars .= "\x{d800}-\x{dfff}"; /* C6 - Inappropriate for plain text */ $chars .= "\x{fff9}-\x{fffd}"; /* C7 - Inappropriate for canonical representation */ $chars .= "\x{2ff0}-\x{2ffb}"; /* C8 - Change display properties or are deprecated */ $chars .= "\x{340}\x{341}\x{200e}\x{200f}\x{202a}-\x{202e}\x{206a}-\x{206f}"; /* C9 - Tagging characters */ $chars .= "\x{e0001}\x{e0020}-\x{e007f}";

/* Nodeprep forbids some more characters */ $nodeprepchars = $chars; $nodeprepchars .= "\x{22}\x{26}\x{27}\x{2f}\x{3a}\x{3c}\x{3e}\x{40}";

$parts=preg_split("/\//", $jid, 2); $resource=$parts[1]; $node=preg_split("/@/", $parts[0]); if ((count($node)>2) || (count($node)==0)) return false; if (count($node)==1) { $server=$node[0]; $node=""; } else { $server=$node[1]; $node=$node[0]; }

if (preg_match("/[".$nodeprepchars."]/u", $node)) return false; if (preg_match("/[".$chars."]/u", $resource)) return false; return true; }