XMPP/JID validation
From StatusNet
Copyright 2009 Patrick Georgi <patrick@georgi-clan.de> Licensed under ISC-L, which is compatible with everything else that keeps the copyright notice intact.
<?php
$jids=array(
"a",
"a@b",
"a@@b",
"a.@b",
"a @b",
"a@b/c",
"a/b",
"a@b/c/d",
"a@b/c@d",
"a@b/c@d/e"
);
/* arrays must be sorted */
$forbidden_for_node_and_resource=array(
// C.2.1
"\x00", "\x01", "\x02", "\x03", "\x04", "\x05", "\x06", "\x07", "\x08", "\x09", "\x0a", "\x0b", "\x0c", "\x0d", "\x0e", "\x0f",
"\x10", "\x11", "\x12", "\x13", "\x14", "\x15", "\x16", "\x17", "\x18", "\x19", "\x1a", "\x1b", "\x1c", "\x1d", "\x1e", "\x1f",
// C.1.1
"\x20",
// C.2.1
"\x7f",
// C.2.2
"\x80", "\x81", "\x82", "\x83", "\x84", "\x85", "\x86", "\x87", "\x88", "\x89", "\x8a", "\x8b", "\x8c", "\x8d", "\x8e", "\x8f",
"\x90", "\x91", "\x92", "\x93", "\x94", "\x95", "\x96", "\x97", "\x98", "\x99", "\x9a", "\x9b", "\x9c", "\x9d", "\x9e", "\x9f",
// C.1.2
"\xa0"
);
$forbidden_for_node=array(
"\x22",
"\x26",
"\x27",
"\x2f",
"\x3a",
"\x3c",
"\x3e",
"\x40"
);
function validatejid($jid) {
global $forbidden_for_node_and_resource, $forbidden_for_node;
$parts=explode("/", $jid, 2);
$resource=$parts[1];
$node=explode("@", $parts[0]);
if ((count($node)>2) || (count($node)==0)) return false;
if (count($node)==1) {
$server=$node[0];
$node="";
} else {
$server=$node[1];
$node=$node[0];
}
$n=str_split($node);
sort($n);
$idx=0;
foreach ($n as $c) {
while (($idx < count($forbidden_for_node_and_resource)) && ($forbidden_for_node_and_resource[$idx]<$c)) $idx++;
if ($forbidden_for_node_and_resource[$idx]==$c) return false;
if ($idx > count($forbidden_for_node_and_resource)) break;
}
$idx=0;
foreach ($n as $c) {
while (($idx < count($forbidden_for_node)) && ($forbidden_for_node[$idx]<$c)) $idx++;
if ($forbidden_for_node[$idx]==$c) return false;
if ($idx >= count($forbidden_for_node)) break;
}
$r=str_split($resource);
sort($r);
$idx=0;
foreach ($r as $c) {
while (($idx < count($forbidden_for_node_and_resource)) && ($forbidden_for_node_and_resource[$idx]<$c)) $idx++;
if ($forbidden_for_node_and_resource[$idx]==$c) return false;
if ($idx > count($forbidden_for_node_and_resource)) break;
}
return true;
}
foreach ($jids as $jid) {
print validatejid($jid)." ".$jid."\n";
}
?>
Same test using preg, and full unicode tables
function validatejid2($jid) {
/* the following definitions come from stringprep, Appendix C,
which is used in its entirety by nodeprop, Chapter 5, "Prohibited Output" */
/* C1.1 ASCII space characters */
$chars .= "\x{20}";
/* C1.2 Non-ASCII space characters */
$chars .= "\x{a0}\x{1680}\x{2000}-\x{200b}\x{202f}\x{205f}\x{3000a}";
/* C2.1 ASCII control characters */
$chars .= "\x{00}-\x{1f}\x{7f}";
/* C2.2 Non-ASCII control characters */
$chars .= "\x{80}-\x{9f}\x{6dd}\x{70f}\x{180e}\x{200c}\x{200d}\x{2028}\x{2029}\x{2060}-\x{2063}\x{206a}-\x{206f}\x{feff}\x{fff9}-\x{fffc}\x{1d173}-\x{1d17a}";
/* C3 - Private Use */
$chars .= "\x{e000}-\x{f8ff}\x{f0000}-\x{ffffd}\x{100000}-\x{10fffd}";
/* C4 - Non-character code points */
$chars .= "\x{fdd0}-\x{fdef}\x{fffe}\x{ffff}\x{1fffe}\x{1ffff}\x{2fffe}\x{2ffff}\x{3fffe}\x{3ffff}\x{4fffe}\x{4ffff}\x{5fffe}\x{5ffff}\x{6fffe}\x{6ffff}\x{7fffe}\x{7ffff}\x{8fffe}\x{8ffff}\x{9fffe}\x{9ffff}\x{afffe}\x{affff}\x{bfffe}\x{bffff}\x{cfffe}\x{cffff}\x{dfffe}\x{dffff}\x{efffe}\x{effff}\x{ffffe}\x{fffff}\x{10fffe}\x{10ffff}";
/* C5 - Surrogate codes */
$chars .= "\x{d800}-\x{dfff}";
/* C6 - Inappropriate for plain text */
$chars .= "\x{fff9}-\x{fffd}";
/* C7 - Inappropriate for canonical representation */
$chars .= "\x{2ff0}-\x{2ffb}";
/* C8 - Change display properties or are deprecated */
$chars .= "\x{340}\x{341}\x{200e}\x{200f}\x{202a}-\x{202e}\x{206a}-\x{206f}";
/* C9 - Tagging characters */
$chars .= "\x{e0001}\x{e0020}-\x{e007f}";
/* Nodeprep forbids some more characters */
$nodeprepchars = $chars;
$nodeprepchars .= "\x{22}\x{26}\x{27}\x{2f}\x{3a}\x{3c}\x{3e}\x{40}";
$parts=preg_split("/\//", $jid, 2);
$resource=$parts[1];
$node=preg_split("/@/", $parts[0]);
if ((count($node)>2) || (count($node)==0)) return false;
if (count($node)==1) {
$server=$node[0];
$node="";
} else {
$server=$node[1];
$node=$node[0];
}
if (preg_match("/[".$nodeprepchars."]/u", $node)) return false;
if (preg_match("/[".$chars."]/u", $resource)) return false;
return true;
}