/* * HTML Entity & Encoding normalization. * * Copyright (C) 2006 Török Edvin * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, * MA 02110-1301, USA. * */ #include static struct element entities_htable_elements[] = { /* don't modify the order of elements here, an elements index is its hashcode, if you move it * lookup won't work anymore correctly. * You can safely change the value on the right.*/ {NULL, 0}, {NULL, 0}, {NULL, 0}, {(const unsigned char*)"asymp", 8776}, {(const unsigned char*)"sup", 8835}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {(const unsigned char*)"crarr", 8629}, {(const unsigned char*)"copy", 169}, {NULL, 0}, {NULL, 0}, {(const unsigned char*)"ensp", 8194}, {NULL, 0}, {(const unsigned char*)"forall", 8704}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {(const unsigned char*)"theta", 952}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {(const unsigned char*)"nbsp", 160}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {(const unsigned char*)"plusmn", 177}, {(const unsigned char*)"mdash", 8212}, {(const unsigned char*)"Omicron", 927}, {(const unsigned char*)"Mu", 924}, {(const unsigned char*)"Nu", 925}, {(const unsigned char*)"ndash", 8211}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {(const unsigned char*)"Chi", 935}, {(const unsigned char*)"Pi", 928}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {(const unsigned char*)"ETH", 208}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {(const unsigned char*)"Xi", 926}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {(const unsigned char*)"Phi", 934}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {(const unsigned char*)"bull", 8226}, {NULL, 0}, {(const unsigned char*)"omicron", 959}, {(const unsigned char*)"mu", 956}, {(const unsigned char*)"nu", 957}, {NULL, 0}, {(const unsigned char*)"or", 8744}, {(const unsigned char*)"circ", 710}, {(const unsigned char*)"ni", 8715}, {(const unsigned char*)"chi", 967}, {(const unsigned char*)"pi", 960}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {(const unsigned char*)"xi", 958}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {(const unsigned char*)"phi", 966}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {(const unsigned char*)"thetasym", 977}, {(const unsigned char*)"hearts", 9829}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {(const unsigned char*)"nsub", 8836}, {(const unsigned char*)"quot", 34}, {(const unsigned char*)"there4", 8756}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {(const unsigned char*)"part", 8706}, {(const unsigned char*)"sect", 167}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {(const unsigned char*)"euro", 8364}, {(const unsigned char*)"tilde", 732}, {(const unsigned char*)"Dagger", 8225}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {(const unsigned char*)"thinsp", 8201}, {(const unsigned char*)"loz", 9674}, {NULL, 0}, {(const unsigned char*)"sup3", 179}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {(const unsigned char*)"curren", 164}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {(const unsigned char*)"yen", 165}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {(const unsigned char*)"dagger", 8224}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {(const unsigned char*)"ordm", 186}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {(const unsigned char*)"lang", 9001}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {(const unsigned char*)"rang", 9002}, {(const unsigned char*)"Auml", 196}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {(const unsigned char*)"Euml", 203}, {(const unsigned char*)"nbsp2", 160}, {NULL, 0}, {(const unsigned char*)"Eta", 919}, {(const unsigned char*)"Iuml", 207}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {(const unsigned char*)"Sigma", 931}, {(const unsigned char*)"Agrave", 192}, {(const unsigned char*)"Ouml", 214}, {NULL, 0}, {(const unsigned char*)"Egrave", 200}, {NULL, 0}, {NULL, 0}, {(const unsigned char*)"Uuml", 220}, {(const unsigned char*)"Igrave", 204}, {NULL, 0}, {NULL, 0}, {(const unsigned char*)"sup2", 178}, {(const unsigned char*)"nbsp3", 160}, {NULL, 0}, {(const unsigned char*)"Ograve", 210}, {NULL, 0}, {NULL, 0}, {(const unsigned char*)"Yuml", 376}, {NULL, 0}, {(const unsigned char*)"auml", 228}, {(const unsigned char*)"Ugrave", 217}, {NULL, 0}, {NULL, 0}, {(const unsigned char*)"Beta", 914}, {(const unsigned char*)"euml", 235}, {NULL, 0}, {(const unsigned char*)"eta", 951}, {(const unsigned char*)"iuml", 239}, {NULL, 0}, {(const unsigned char*)"darr", 8595}, {NULL, 0}, {NULL, 0}, {(const unsigned char*)"sigma", 963}, {(const unsigned char*)"harr", 8596}, {(const unsigned char*)"ouml", 246}, {(const unsigned char*)"cap", 8745}, {(const unsigned char*)"egrave", 232}, {(const unsigned char*)"larr", 8592}, {(const unsigned char*)"agrave", 224}, {(const unsigned char*)"uuml", 252}, {(const unsigned char*)"real", 8476}, {(const unsigned char*)"sum", 8721}, {NULL, 0}, {(const unsigned char*)"piv", 982}, {(const unsigned char*)"rarr", 8594}, {(const unsigned char*)"sim", 8764}, {(const unsigned char*)"uarr", 8593}, {(const unsigned char*)"Zeta", 918}, {(const unsigned char*)"zwj", 8205}, {(const unsigned char*)"radic", 8730}, {(const unsigned char*)"igrave", 236}, {(const unsigned char*)"Atilde", 195}, {(const unsigned char*)"ograve", 242}, {(const unsigned char*)"ugrave", 249}, {NULL, 0}, {(const unsigned char*)"beta", 946}, {(const unsigned char*)"lrm", 8206}, {NULL, 0}, {(const unsigned char*)"yuml", 255}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {(const unsigned char*)"prod", 8719}, {(const unsigned char*)"Ntilde", 209}, {(const unsigned char*)"Lambda", 923}, {NULL, 0}, {(const unsigned char*)"infin", 8734}, {(const unsigned char*)"sup1", 185}, {(const unsigned char*)"middot", 183}, {(const unsigned char*)"Otilde", 213}, {(const unsigned char*)"zwnj", 8204}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {(const unsigned char*)"zeta", 950}, {(const unsigned char*)"Acirc", 194}, {(const unsigned char*)"Kappa", 922}, {NULL, 0}, {(const unsigned char*)"Ecirc", 202}, {(const unsigned char*)"atilde", 227}, {NULL, 0}, {NULL, 0}, {(const unsigned char*)"Icirc", 206}, {(const unsigned char*)"rlm", 8207}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {(const unsigned char*)"micro", 181}, {(const unsigned char*)"sube", 8838}, {(const unsigned char*)"Ocirc", 212}, {NULL, 0}, {(const unsigned char*)"ntilde", 241}, {(const unsigned char*)"otilde", 245}, {(const unsigned char*)"Rho", 929}, {(const unsigned char*)"lambda", 955}, {(const unsigned char*)"Ucirc", 219}, {NULL, 0}, {(const unsigned char*)"diams", 9830}, {(const unsigned char*)"Tau", 932}, {(const unsigned char*)"Aacute", 193}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {(const unsigned char*)"Eacute", 201}, {NULL, 0}, {NULL, 0}, {(const unsigned char*)"acirc", 226}, {(const unsigned char*)"Iacute", 205}, {(const unsigned char*)"kappa", 954}, {(const unsigned char*)"pound", 163}, {(const unsigned char*)"ecirc", 234}, {(const unsigned char*)"frac14", 188}, {NULL, 0}, {(const unsigned char*)"Oacute", 211}, {(const unsigned char*)"icirc", 238}, {NULL, 0}, {NULL, 0}, {(const unsigned char*)"lceil", 8968}, {NULL, 0}, {(const unsigned char*)"Uacute", 218}, {(const unsigned char*)"ocirc", 244}, {NULL, 0}, {NULL, 0}, {(const unsigned char*)"rceil", 8969}, {(const unsigned char*)"Yacute", 221}, {(const unsigned char*)"rho", 961}, {(const unsigned char*)"uml", 168}, {(const unsigned char*)"ucirc", 251}, {NULL, 0}, {NULL, 0}, {(const unsigned char*)"tau", 964}, {(const unsigned char*)"aacute", 225}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {(const unsigned char*)"eacute", 233}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {(const unsigned char*)"iacute", 237}, {NULL, 0}, {(const unsigned char*)"spades", 9824}, {NULL, 0}, {NULL, 0}, {(const unsigned char*)"prop", 8733}, {(const unsigned char*)"oacute", 243}, {(const unsigned char*)"frac34", 190}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {(const unsigned char*)"sigmaf", 962}, {(const unsigned char*)"uacute", 250}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {(const unsigned char*)"yacute", 253}, {NULL, 0}, {(const unsigned char*)"THORN", 222}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {(const unsigned char*)"brvbar", 166}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {(const unsigned char*)"supe", 8839}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {(const unsigned char*)"sdot", 8901}, {(const unsigned char*)"Delta", 916}, {(const unsigned char*)"aelig", 230}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {(const unsigned char*)"szlig", 223}, {NULL, 0}, {NULL, 0}, {(const unsigned char*)"oelig", 339}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {(const unsigned char*)"int", 8747}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {(const unsigned char*)"delta", 948}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {(const unsigned char*)"lsaquo", 8249}, {NULL, 0}, {(const unsigned char*)"para", 182}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {(const unsigned char*)"rsaquo", 8250}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {(const unsigned char*)"Omega", 937}, {NULL, 0}, {NULL, 0}, {(const unsigned char*)"oplus", 8853}, {(const unsigned char*)"isin", 8712}, {(const unsigned char*)"bdquo", 8222}, {(const unsigned char*)"nabla", 8711}, {NULL, 0}, {NULL, 0}, {(const unsigned char*)"cent", 162}, {(const unsigned char*)"lsquo", 8216}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {(const unsigned char*)"ldquo", 8220}, {(const unsigned char*)"laquo", 171}, {(const unsigned char*)"Alpha", 913}, {NULL, 0}, {(const unsigned char*)"rsquo", 8217}, {NULL, 0}, {(const unsigned char*)"rdquo", 8221}, {(const unsigned char*)"raquo", 187}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {(const unsigned char*)"ang", 8736}, {NULL, 0}, {(const unsigned char*)"Iota", 921}, {(const unsigned char*)"omega", 969}, {NULL, 0}, {NULL, 0}, {(const unsigned char*)"deg", 176}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {(const unsigned char*)"Epsilon", 917}, {NULL, 0}, {NULL, 0}, {(const unsigned char*)"dArr", 8659}, {NULL, 0}, {NULL, 0}, {(const unsigned char*)"alpha", 945}, {(const unsigned char*)"hArr", 8660}, {(const unsigned char*)"reg", 174}, {(const unsigned char*)"clubs", 9827}, {(const unsigned char*)"alefsym", 8501}, {(const unsigned char*)"lArr", 8656}, {NULL, 0}, {(const unsigned char*)"shy", 173}, {NULL, 0}, {NULL, 0}, {(const unsigned char*)"Upsilon", 933}, {(const unsigned char*)"rArr", 8658}, {(const unsigned char*)"lfloor", 8970}, {(const unsigned char*)"Scaron", 352}, {(const unsigned char*)"uArr", 8657}, {(const unsigned char*)"emsp", 8195}, {(const unsigned char*)"iota", 953}, {NULL, 0}, {(const unsigned char*)"rfloor", 8971}, {NULL, 0}, {(const unsigned char*)"times", 215}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {(const unsigned char*)"epsilon", 949}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {(const unsigned char*)"empty", 8709}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {(const unsigned char*)"thorn", 254}, {NULL, 0}, {(const unsigned char*)"minus", 8722}, {(const unsigned char*)"upsilon", 965}, {NULL, 0}, {(const unsigned char*)"scaron", 353}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {(const unsigned char*)"Psi", 936}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {(const unsigned char*)"permil", 8240}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {(const unsigned char*)"gt", 62}, {NULL, 0}, {NULL, 0}, {(const unsigned char*)"trade", 8482}, {NULL, 0}, {(const unsigned char*)"ge", 8805}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {(const unsigned char*)"le", 8804}, {NULL, 0}, {(const unsigned char*)"ne", 8800}, {NULL, 0}, {NULL, 0}, {(const unsigned char*)"exist", 8707}, {(const unsigned char*)"ordf", 170}, {NULL, 0}, {(const unsigned char*)"hellip", 8230}, {(const unsigned char*)"iexcl", 161}, {(const unsigned char*)"Aring", 197}, {(const unsigned char*)"psi", 968}, {NULL, 0}, {(const unsigned char*)"frac12", 189}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {(const unsigned char*)"oline", 8254}, {NULL, 0}, {NULL, 0}, {(const unsigned char*)"apos ", 39}, {NULL, 0}, {NULL, 0}, {(const unsigned char*)"upsih", 978}, {(const unsigned char*)"frasl", 8260}, {NULL, 0}, {(const unsigned char*)"Ccedil", 199}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {(const unsigned char*)"perp", 8869}, {(const unsigned char*)"aring", 229}, {(const unsigned char*)"Prime", 8243}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {(const unsigned char*)"notin", 8713}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {(const unsigned char*)"AElig", 198}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {(const unsigned char*)"iquest", 191}, {(const unsigned char*)"ccedil", 231}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {(const unsigned char*)"divide", 247}, {NULL, 0}, {NULL, 0}, {(const unsigned char*)"OElig", 338}, {NULL, 0}, {(const unsigned char*)"image", 8465}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {(const unsigned char*)"prime", 8242}, {NULL, 0}, {(const unsigned char*)"Oslash", 216}, {NULL, 0}, {NULL, 0}, {(const unsigned char*)"weierp", 8472}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {(const unsigned char*)"fnof", 402}, {(const unsigned char*)"cedil", 184}, {NULL, 0}, {(const unsigned char*)"eth", 240}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {(const unsigned char*)"not", 172}, {(const unsigned char*)"sub", 8834}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {(const unsigned char*)"equiv", 8801}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {(const unsigned char*)"oslash", 248}, {(const unsigned char*)"cong", 8773}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {(const unsigned char*)"otimes", 8855}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {(const unsigned char*)"lowast", 8727}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {(const unsigned char*)"Gamma", 915}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {(const unsigned char*)"sbquo", 8218}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {(const unsigned char*)"gamma", 947}, {(const unsigned char*)"and", 8743}, {(const unsigned char*)"cup", 8746}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {(const unsigned char*)"Theta", 920}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {NULL, 0}, {(const unsigned char*)"macr", 175}, {(const unsigned char*)"acute", 180}, }; const struct hashtable entities_htable = { entities_htable_elements, 769, 253, 615 };