This module provides some tests on unicode characters. The productions are taken from the XQuery working draft od december 2001. These functions can as well be used for a XML lexer. module XmlChar where > import Char ]]> isNmstart c = c==' ' || isLetter c ]]> isNmchar c > = isDigit c || c=='.' || c=='-' || c=='_' || isLetter c || isCombiningChar c || isExtender c ]]> isLetter c = isBaseChar c || isIdeographic c ]]> isBaseChar c > = 0x0041<=n && (n<=0x005A > || 0x0061<=n && (n<=0x007A > || 0x00C0<=n && (n<=0x00D6 > || 0x00D8<=n && (n<=0x00F6 > || 0x00F8<=n && (n<=0x00FF > || 0x0100<=n && (n<=0x0131 > || 0x0134<=n && (n<=0x013E > || 0x0141<=n && (n<=0x0148 > || 0x014A<=n && (n<=0x017E > || 0x0180<=n && (n<=0x01C3 > || 0x01CD<=n && (n<=0x01F0 > || 0x01F4<=n && (n<=0x01F5 > || 0x01FA<=n && (n<=0x0217 > || 0x0250<=n && (n<=0x02A8 > || 0x02BB<=n && (n<=0x02C1 > || 0x0386==n > || 0x0388<=n && (n<=0x038A > || 0x038C==n > || 0x038E<=n && (n<=0x03A1 > || 0x03A3<=n && (n<=0x03CE > || 0x03D0<=n && (n<=0x03D6 > || 0x03DA==n > || 0x03DC==n > || 0x03DE==n > || 0x03E0==n > || 0x03E2<=n && (n<=0x03F3 > || 0x0401<=n && (n<=0x040C > || 0x040E<=n && (n<=0x044F > || 0x0451<=n && (n<=0x045C > || 0x045E<=n && (n<=0x0481 > || 0x0490<=n && (n<=0x04C4 > || 0x04C7<=n && (n<=0x04C8 > || 0x04CB<=n && (n<=0x04CC > || 0x04D0<=n && (n<=0x04EB > || 0x04EE<=n && (n<=0x04F5 > || 0x04F8<=n && (n<=0x04F9 > || 0x0531<=n && (n<=0x0556 > || 0x0559==n > || 0x0561<=n && (n<=0x0586 > || 0x05D0<=n && (n<=0x05EA > || 0x05F0<=n && (n<=0x05F2 > || 0x0621<=n && (n<=0x063A > || 0x0641<=n && (n<=0x064A > || 0x0671<=n && (n<=0x06B7 > || 0x06BA<=n && (n<=0x06BE > || 0x06C0<=n && (n<=0x06CE > || 0x06D0<=n && (n<=0x06D3 > || 0x06D5==n > || 0x06E5<=n && (n<=0x06E6 > || 0x0905<=n && (n<=0x0939 > || 0x093D==n > || 0x0958<=n && (n<=0x0961 > || 0x0985<=n && (n<=0x098C > || 0x098F<=n && (n<=0x0990 > || 0x0993<=n && (n<=0x09A8 > || 0x09AA<=n && (n<=0x09B0 > || 0x09B2==n > || 0x09B6<=n && (n<=0x09B9 > || 0x09DC<=n && (n<=0x09DD > || 0x09DF<=n && (n<=0x09E1 > || 0x09F0<=n && (n<=0x09F1 > || 0x0A05<=n && (n<=0x0A0A > || 0x0A0F<=n && (n<=0x0A10 > || 0x0A13<=n && (n<=0x0A28 > || 0x0A2A<=n && (n<=0x0A30 > || 0x0A32<=n && (n<=0x0A33 > || 0x0A35<=n && (n<=0x0A36 > || 0x0A38<=n && (n<=0x0A39 > || 0x0A59<=n && (n<=0x0A5C > || 0x0A5E==n > || 0x0A72<=n && (n<=0x0A74 > || 0x0A85<=n && (n<=0x0A8B > || 0x0A8D==n > || 0x0A8F<=n && (n<=0x0A91 > || 0x0A93<=n && (n<=0x0AA8 > || 0x0AAA<=n && (n<=0x0AB0 > || 0x0AB2<=n && (n<=0x0AB3 > || 0x0AB5<=n && (n<=0x0AB9 > || 0x0ABD==n > || 0x0AE0==n > || 0x0B05<=n && (n<=0x0B0C > || 0x0B0F<=n && (n<=0x0B10 > || 0x0B13<=n && (n<=0x0B28 > || 0x0B2A<=n && (n<=0x0B30 > || 0x0B32<=n && (n<=0x0B33 > || 0x0B36<=n && (n<=0x0B39 > || 0x0B3D==n > || 0x0B5C<=n && (n<=0x0B5D > || 0x0B5F<=n && (n<=0x0B61 > || 0x0B85<=n && (n<=0x0B8A > || 0x0B8E<=n && (n<=0x0B90 > || 0x0B92<=n && (n<=0x0B95 > || 0x0B99<=n && (n<=0x0B9A > || 0x0B9C==n > || 0x0B9E<=n && (n<=0x0B9F > || 0x0BA3<=n && (n<=0x0BA4 > || 0x0BA8<=n && (n<=0x0BAA > || 0x0BAE<=n && (n<=0x0BB5 > || 0x0BB7<=n && (n<=0x0BB9 > || 0x0C05<=n && (n<=0x0C0C > || 0x0C0E<=n && (n<=0x0C10 > || 0x0C12<=n && (n<=0x0C28 > || 0x0C2A<=n && (n<=0x0C33 > || 0x0C35<=n && (n<=0x0C39 > || 0x0C60<=n && (n<=0x0C61 > || 0x0C85<=n && (n<=0x0C8C > || 0x0C8E<=n && (n<=0x0C90 > || 0x0C92<=n && (n<=0x0CA8 > || 0x0CAA<=n && (n<=0x0CB3 > || 0x0CB5<=n && (n<=0x0CB9 > || 0x0CDE==n > || 0x0CE0<=n && (n<=0x0CE1 > || 0x0D05<=n && (n<=0x0D0C > || 0x0D0E<=n && (n<=0x0D10 > || 0x0D12<=n && (n<=0x0D28 > || 0x0D2A<=n && (n<=0x0D39 > || 0x0D60<=n && (n<=0x0D61 > || 0x0E01<=n && (n<=0x0E2E > || 0x0E30==n > || 0x0E32<=n && (n<=0x0E33 > || 0x0E40<=n && (n<=0x0E45 > || 0x0E81<=n && (n<=0x0E82 > || 0x0E84==n > || 0x0E87<=n && (n<=0x0E88 > || 0x0E8A==n > || 0x0E8D==n > || 0x0E94<=n && (n<=0x0E97 > || 0x0E99<=n && (n<=0x0E9F > || 0x0EA1<=n && (n<=0x0EA3 > || 0x0EA5==n > || 0x0EA7==n > || 0x0EAA<=n && (n<=0x0EAB > || 0x0EAD<=n && (n<=0x0EAE > || 0x0EB0==n > || 0x0EB2<=n && (n<=0x0EB3 > || 0x0EBD==n > || 0x0EC0<=n && (n<=0x0EC4 > || 0x0F40<=n && (n<=0x0F47 > || 0x0F49<=n && (n<=0x0F69 > || 0x10A0<=n && (n<=0x10C5 > || 0x10D0<=n && (n<=0x10F6 > || 0x1100==n > || 0x1102<=n && (n<=0x1103 > || 0x1105<=n && (n<=0x1107 > || 0x1109==n > || 0x110B<=n && (n<=0x110C > || 0x110E<=n && (n<=0x1112 > || 0x113C==n > || 0x113E==n > || 0x1140==n > || 0x114C==n > || 0x114E==n > || 0x1150==n > || 0x1154<=n && (n<=0x1155 > || 0x1159==n > || 0x115F<=n && (n<=0x1161 > || 0x1163==n > || 0x1165==n > || 0x1167==n > || 0x1169==n > || 0x116D<=n && (n<=0x116E > || 0x1172<=n && (n<=0x1173 > || 0x1175==n > || 0x119E==n > || 0x11A8==n > || 0x11AB==n > || 0x11AE<=n && (n<=0x11AF > || 0x11B7<=n && (n<=0x11B8 > || 0x11BA==n > || 0x11BC<=n && (n<=0x11C2 > || 0x11EB==n > || 0x11F0==n > || 0x11F9==n > || 0x1E00<=n && (n<=0x1E9B > || 0x1EA0<=n && (n<=0x1EF9 > || 0x1F00<=n && (n<=0x1F15 > || 0x1F18<=n && (n<=0x1F1D > || 0x1F20<=n && (n<=0x1F45 > || 0x1F48<=n && (n<=0x1F4D > || 0x1F50<=n && (n<=0x1F57 > || 0x1F59==n > || 0x1F5B==n > || 0x1F5D==n > || 0x1F5F<=n && (n<=0x1F7D > || 0x1F80<=n && (n<=0x1FB4 > || 0x1FB6<=n && (n<=0x1FBC > || 0x1FBE==n > || 0x1FC2<=n && (n<=0x1FC4 > || 0x1FC6<=n && (n<=0x1FCC > || 0x1FD0<=n && (n<=0x1FD3 > || 0x1FD6<=n && (n<=0x1FDB > || 0x1FE0<=n && (n<=0x1FEC > || 0x1FF2<=n && (n<=0x1FF4 > || 0x1FF6<=n && (n<=0x1FFC > || 0x2126==n > || 0x212A<=n && (n<=0x212B > || 0x212E==n > || 0x2180<=n && (n<=0x2182 > || 0x3041<=n && (n<=0x3094 > || 0x30A1<=n && (n<=0x30FA > || 0x3105<=n && (n<=0x312C > || 0xAC00<=n && n<=0xD7A3))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))) > where > n = ord c ]]> isIdeographic c > = > 0x4E00<=n && (n<=0x9FA5 > || 0x3007==n > || 0x3021<=n && n<=0x3029) > where > n = ord c ]]> isCombiningChar c > = 0x0300<=n && (n<=0x0345 > || 0x0360<=n && (n<=0x0361 > || 0x0483<=n && (n<=0x0486 > || 0x0591<=n && (n<=0x05A1 > || 0x05A3<=n && (n<=0x05B9 > || 0x05BB<=n && (n<=0x05BD > || 0x05BF==n > || 0x05C1<=n && (n<=0x05C2 > || 0x05C4==n > || 0x064B<=n && (n<=0x0652 > || 0x0670==n > || 0x06D6<=n && (n<=0x06DC > || 0x06DD<=n && (n<=0x06DF > || 0x06E0<=n && (n<=0x06E > || 0x06E7<=n && (n<=0x06E8 > || 0x06EA<=n && (n<=0x06ED > || 0x0901<=n && (n<=0x0903 > || 0x093C==n > || 0x093E<=n && (n<=0x094C > || 0x094D==n > || 0x0951<=n && (n<=0x0954 > || 0x0962<=n && (n<=0x0963 > || 0x0981<=n && (n<=0x0983 > || 0x09BC==n > || 0x09BE==n > || 0x09BF==n > || 0x09C0<=n && (n<=0x09C4 > || 0x09C7<=n && (n<=0x09C8 > || 0x09CB<=n && (n<=0x09CD > || 0x09D7==n > || 0x09E2<=n && (n<=0x09E3 > || 0x0A02==n > || 0x0A3C==n > || 0x0A3E==n > || 0x0A3F==n > || 0x0A40<=n && (n<=0x0A42 > || 0x0A47<=n && (n<=0x0A48 > || 0x0A4B<=n && (n<=0x0A4D > || 0x0A70<=n && (n<=0x0A71 > || 0x0A81<=n && (n<=0x0A83 > || 0x0ABC==n > || 0x0ABE<=n && (n<=0x0AC5 > || 0x0AC7<=n && (n<=0x0AC9 > || 0x0ACB<=n && (n<=0x0ACD > || 0x0B01<=n && (n<=0x0B03 > || 0x0B3C==n > || 0x0B3E<=n && (n<=0x0B43 > || 0x0B47<=n && (n<=0x0B48 > || 0x0B4B<=n && (n<=0x0B4D > || 0x0B56<=n && (n<=0x0B57 > || 0x0B82<=n && (n<=0x0B83 > || 0x0BBE<=n && (n<=0x0BC2 > || 0x0BC6<=n && (n<=0x0BC8 > || 0x0BCA<=n && (n<=0x0BCD > || 0x0BD7==n > || 0x0C01<=n && (n<=0x0C03 > || 0x0C3E<=n && (n<=0x0C44 > || 0x0C46<=n && (n<=0x0C48 > || 0x0C4A<=n && (n<=0x0C4D > || 0x0C55<=n && (n<=0x0C56 > || 0x0C82<=n && (n<=0x0C83 > || 0x0CBE<=n && (n<=0x0CC4 > || 0x0CC6<=n && (n<=0x0CC8 > || 0x0CCA<=n && (n<=0x0CCD > || 0x0CD5<=n && (n<=0x0CD6 > || 0x0D02<=n && (n<=0x0D03 > || 0x0D3E<=n && (n<=0x0D43 > || 0x0D46<=n && (n<=0x0D48 > || 0x0D4A<=n && (n<=0x0D4D > || 0x0D57==n > || 0x0E31==n > || 0x0E34<=n && (n<=0x0E3A > || 0x0E47<=n && (n<=0x0E4E > || 0x0EB1==n > || 0x0EB4<=n && (n<=0x0EB9 > || 0x0EBB<=n && (n<=0x0EBC > || 0x0EC8<=n && (n<=0x0ECD > || 0x0F18<=n && (n<=0x0F19 > || 0x0F35==n > || 0x0F37==n > || 0x0F39==n > || 0x0F3E==n > || 0x0F3F==n > || 0x0F71<=n && (n<=0x0F84 > || 0x0F86<=n && (n<=0x0F8B > || 0x0F90<=n && (n<=0x0F95 > || 0x0F97==n > || 0x0F99<=n && (n<=0x0FAD > || 0x0FB1<=n && (n<=0x0FB7 > || 0x0FB9==n > || 0x20D0<=n && (n<=0x20DC > || 0x20E1==n > || 0x302A<=n && (n<=0x302F > || 0x3099==n > || 0x309A==n)))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))) > where > n = ord c ]]> isTDigit c > = 0x0030<=n && (n<=0x0039 > || 0x0660<=n && (n<=0x0669 > || 0x06F0<=n && (n<=0x06F9 > || 0x0966<=n && (n<=0x096F > || 0x09E6<=n && (n<=0x09EF > || 0x0A66<=n && (n<=0x0A6F > || 0x0AE6<=n && (n<=0x0AEF > || 0x0B66<=n && (n<=0x0B6F > || 0x0BE7<=n && (n<=0x0BEF > || 0x0C66<=n && (n<=0x0C6F > || 0x0CE6<=n && (n<=0x0CEF > || 0x0D66<=n && (n<=0x0D6F > || 0x0E50<=n && (n<=0x0E59 > || 0x0ED0<=n && (n<=0x0ED9 > || 0x0F20<=n && n<=0x0F29)))))))))))))) > where > n = ord c ]]> isExtender c > = > 0x00B7==n > || 0x02D0==n > || 0x02D1==n > || 0x0387==n > || 0x0640==n > || 0x0E46==n > || 0x0EC6==n > || 0x3005==n > || 0x3031<=n && (n<=0x3035 > || 0x309D<=n && (n<=0x309E > || 0x30FC<=n && (n<=0x30FE))) > where > n = ord c ]]>