htmlparser.js 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209
  1. /**
  2. * Created with JetBrains PhpStorm.
  3. * User: luqiong
  4. * Date: 13-3-14
  5. * Time: 下午2:31
  6. * To change this template use File | Settings | File Templates.
  7. */
  8. module( 'core.htmlparser' );
  9. test( '普通标签处理', function() {
  10. var root = UE.htmlparser('<i>sdfsdfsdfsf</i>');
  11. equals(root.toHtml(),'<i>sdfsdfsdfsf</i>','单个普通标签');
  12. root = UE.htmlparser('<i>sdf<b>sdfsdsd</b>fsdfsf</i>');
  13. equals(root.toHtml(),'<i>sdf<b>sdfsdsd</b>fsdfsf</i>','多个普通标签');
  14. root = UE.htmlparser('<i dsf="sdf" sdf="wewe" readonly >sdf</i>');
  15. ua.checkSameHtml(root.toHtml(),'<i dsf="sdf" sdf="wewe" readonly=\"\" >sdf</i>','添加属性的标签');
  16. root = UE.htmlparser('<img src="file:///C:/DOCUME~1/DONGYA~1/LOCALS~1/Temp/msohtmlclip1/01/clip_image002.jpg" width="553" height="275" />');
  17. ua.checkSameHtml(root.toHtml(),'<img src="file:///C:/DOCUME~1/DONGYA~1/LOCALS~1/Temp/msohtmlclip1/01/clip_image002.jpg" width="553" height="275" />','img标签');
  18. });
  19. test( '特殊标签处理', function() {
  20. var root = UE.htmlparser('<i dsf="sdf" sdf="wewe" readonly >sd<!--fasdf-->f</i>');
  21. ua.checkSameHtml(root.toHtml(),'<i dsf="sdf" sdf="wewe" readonly=\"\" >sd<!--fasdf-->f</i>','包含注释');
  22. root = UE.htmlparser('<script type="text/javascript" charset="utf-8" src="editor_api.js"></script>');
  23. equals(root.toHtml().replace(/[ ]+>/g,'>'),'<script type="text/javascript" charset="utf-8" src="editor_api.js"></script>','script标签');
  24. root = UE.htmlparser('<table width="960"><tbody><tr><td width="939" valign="top"><br></td></tr></tbody></table><p><br></p>');
  25. equals(root.toHtml().replace(/[ ]+>/g,'>'),'<table width="960"><tbody><tr><td width="939" valign="top"><br/></td></tr></tbody></table><p><br/></p>','br标签');
  26. root = UE.htmlparser('<li>sdfsdfsdf<li>sdfsdfsdfsdf');
  27. equals(root.toHtml(),'<ul><li>sdfsdfsdf</li><li>sdfsdfsdfsdf</li></ul>','以文本结束的html');
  28. });
  29. //test( '补全不完整table', function() {//TODO 1.2.6
  30. // var root = UE.htmlparser('<p><td></td></p>');
  31. // equals(root.toHtml(),'<p><table><tbody><tr><td></td></tr></tbody></table></p>','td完整,补全table');
  32. // root = UE.htmlparser('<p><td>sdfsdfsdf</p>');
  33. // equals(root.toHtml(),'<p><table><tbody><tr><td>sdfsdfsdf</td></tr></tbody></table></p>','td不完整,补全table');
  34. // root = UE.htmlparser('<td></td>' + '\n\r' + '<td></td>');
  35. // equals(root.toHtml(),'<table><tbody><tr><td></td><td></td></tr></tbody></table>','包含\n,补全table');
  36. // root = UE.htmlparser('<table>');
  37. // equals( root.toHtml().toLowerCase(), '<table></table>', '<table>--不补孩子' );
  38. // /*补parent*/
  39. // root = UE.htmlparser('<td>');
  40. // equals( root.toHtml().toLowerCase(), '<table><tbody><tr><td></td></tr></tbody></table>', '<td>--补父亲' );
  41. // /*补parent和child*/
  42. // root = UE.htmlparser('<tr>hello');
  43. // equals( root.toHtml().toLowerCase(), '<table><tbody><tr>hello</tr></tbody></table>', '<tr>hello--补父亲不补孩子' );
  44. //
  45. // root = UE.htmlparser('<td>123');
  46. // equals( root.toHtml().toLowerCase(), '<table><tbody><tr><td>123</td></tr></tbody></table>', '<td>123--文本放在table里' );
  47. //
  48. // root = UE.htmlparser('123<td>');
  49. // equals( root.toHtml().toLowerCase(), '123<table><tbody><tr><td></td></tr></tbody></table>', '123<td>' );
  50. //
  51. // root = UE.htmlparser('<tr><td>123');
  52. // equals( root.toHtml().toLowerCase(), '<table><tbody><tr><td>123</td></tr></tbody></table>', '<tr><td>123' );
  53. //
  54. // root = UE.htmlparser('<td>123<tr>');
  55. // equals( root.toHtml().toLowerCase(), '<table><tbody><tr><td>123</td></tr><tr></tr></tbody></table>', '<td>123<tr>' );
  56. //
  57. // /*补充为2个td*/
  58. //// root = UE.htmlparser('<tr>123<td>');
  59. //// equals( root.toHtml().toLowerCase(), '<table><tbody><tr></tr></tbody></table>123<table><tbody><tr><td></td></tr></tbody></table>', '<tr>123<td>--tr和td之间有文字' );//TODO 1.2.6
  60. //
  61. // root = UE.htmlparser('<td><td>123');
  62. // equals( root.toHtml().toLowerCase(), '<table><tbody><tr><td></td><td>123</td></tr></tbody></table>', '<td><td>123' );
  63. //
  64. // root = UE.htmlparser('<td>123<td>');
  65. // equals( root.toHtml().toLowerCase(), '<table><tbody><tr><td>123</td><td></td></tr></tbody></table>', '<td>123<td>' );
  66. //
  67. // /*补2个table*/
  68. //// root = UE.htmlparser('<td>123</td>132<tr>');
  69. //// equals( root.toHtml().toLowerCase(), '<table><tbody><tr><td>123</td></tr></tbody></table>132<table><tbody><tr><td></td></tr></tbody></table>', '<td>123</td>132<tr>--补全2个table' );//TODO 1.2.6
  70. //
  71. // /*开标签、文本与闭标签混合*/
  72. //// root = UE.htmlparser('<tr>123</td>');
  73. //// equals( root.toHtml().toLowerCase(), '<table><tbody><tr></tr></tbody></table>123', '<tr>123</td>--tr和td之间有文字' );//TODO 1.2.6
  74. //
  75. //// root = UE.htmlparser('<tr></td>123');
  76. //// equals( root.toHtml().toLowerCase(), '<table><tbody><tr></tr></tbody></table>123', '<tr></td>123--td闭标签后面有文字' );//TODO 1.2.6
  77. //
  78. // root = UE.htmlparser('123</tr><td>');
  79. // equals( root.toHtml().toLowerCase(), '123<table><tbody><tr><td></td></tr></tbody></table>', '123</tr><td>' );
  80. //
  81. // root = UE.htmlparser('</tr><td>123');
  82. // equals( root.toHtml().toLowerCase(), '<table><tbody><tr><td>123</td></tr></tbody></table>', '</tr><td>123' );
  83. //
  84. // root = UE.htmlparser('</tr>123<td>');
  85. // equals( root.toHtml().toLowerCase(), '123<table><tbody><tr><td></td></tr></tbody></table>', '</tr>123<td>' );
  86. // /*闭标签、文本与闭标签混合*/
  87. // root = UE.htmlparser('</td>123</tr>');
  88. // equals( root.toHtml().toLowerCase(), '123', '</td>123</tr>' );
  89. //
  90. // root = UE.htmlparser('</tr>123</td>');
  91. // equals( root.toHtml().toLowerCase(), '123', '</td>123</tr>' );
  92. //
  93. // root = UE.htmlparser('</tr>123<tr>');
  94. // equals( root.toHtml().toLowerCase(), '123<table><tbody><tr><td></td></tr></tbody></table>', '</td>123</tr>', '</tr>123<tr>' );
  95. //
  96. // /*补前面的标签*/
  97. // root = UE.htmlparser('</td>123');
  98. // equals( root.toHtml().toLowerCase(), '123', '</td>123' );
  99. //
  100. // root = UE.htmlparser('123</td>');
  101. // equals( root.toHtml().toLowerCase(), '123', '123</td>' );
  102. // /*补全tr前面的标签*/
  103. // root = UE.htmlparser('123</tr>');
  104. // equals( root.toHtml().toLowerCase(), '123', '123</tr>--删除tr前后的标签,前面有文本' );
  105. // /*补全table前面的标签*/
  106. // root = UE.htmlparser('123</table>');
  107. // equals( root.toHtml().toLowerCase(), '123', '123</table>--删除table前后的标签,前面有文本' );
  108. // /*复杂结构*/
  109. // root = UE.htmlparser('<table><tr><td>123<tr>456');
  110. // equals( root.toHtml().toLowerCase(), '<table><tr><td>123</td></tr><tr><td>456</td></tr></table>', '<table><tr><td>123<tr>456' );
  111. //
  112. // root = UE.htmlparser('<td><span>hello1</span>hello2</tbody>');
  113. // equals( root.toHtml().toLowerCase(), '<table><tbody><tr><td><span>hello1</span>hello2</td></tr></tbody></table>', '解析<td><span>hello1</span>hello2</tbody>' );
  114. //
  115. // root = UE.htmlparser('<table><td><span>hello1</span>hello2</tbody>');
  116. // equals( root.toHtml().toLowerCase(), '<table><tr><td><span>hello1</span>hello2<table><tbody><tr><td></td></tr></tbody></table></td></tr></table>', '解析<table><td><span>hello1</span>hello2</tbody>' );
  117. //
  118. // root = UE.htmlparser('<table><tr></td>123');
  119. // equals( root.toHtml().toLowerCase(), '<table><tr><td></td></tr></table>123', '<table><tr></td>123' );
  120. //});
  121. test( '补全不完整li', function() {
  122. var root = UE.htmlparser('<ol><li><em><u>sdf<li>sdfsdf</ol>');
  123. equals(root.toHtml().replace(/[ ]+>/g,'>'),'<ol><li><em><u>sdf</u></em></li><li>sdfsdf</li></ol>','补全u,em');
  124. root = UE.htmlparser('<ol><li><em>sdf</em></li><ul><li>a</li><li>b</li><li>c</ul><li>jkl</ol>');
  125. equals(root.toHtml().replace(/[ ]+>/g,'>'),'<ol><li><em>sdf</em></li><ul><li>a</li><li>b</li><li>c</li></ul><li>jkl</li></ol>','补全li');
  126. root = UE.htmlparser('<li>123');
  127. equals(root.toHtml().replace(/[ ]+>/g,'>'), '<ul><li>123</li></ul>', '<li>123--补全li的parent--ul,前面有文本' );
  128. /*补ul的child*/
  129. root = UE.htmlparser('<ul>123');
  130. equals(root.toHtml().replace(/[ ]+>/g,'>'), '<ul><li>123</li></ul>', '<ul>123--补全ul的child--li,前面有文本' );
  131. /*补li开始标签*/
  132. root = UE.htmlparser('</li>123');
  133. equals(root.toHtml().replace(/[ ]+>/g,'>'), '123', '</li>123--删掉标签' );
  134. });
  135. test( '属性引号问题', function() {
  136. var root = UE.htmlparser('<img width=200 height=200 />');
  137. equals(root.toHtml().replace(/[ ]+>/g,'>'),'<img width="200" height="200"/>');
  138. root = UE.htmlparser("<img width='200' height='200' />");
  139. equals(root.toHtml().replace(/[ ]+>/g,'>'),'<img width="200" height="200"/>');
  140. root = UE.htmlparser('<img width="200" height="200" />');
  141. equals(root.toHtml().replace(/[ ]+>/g,'>'),'<img width="200" height="200"/>');
  142. });
  143. test( '大小写', function() {
  144. var root = UE.htmlparser('<p><TD></TD></p>');
  145. equals(root.toHtml().replace(/[ ]+>/g,'>'),'<p><table><tbody><tr><td></td></tr></tbody></table></p>');
  146. root = UE.htmlparser('<OL><LI><em><u>sdf<LI>sdfsdf</OL>');
  147. equals(root.toHtml().replace(/[ ]+>/g,'>'),'<ol><li><em><u>sdf</u></em></li><li>sdfsdf</li></ol>','补全u,em');
  148. root = UE.htmlparser('<IMG width=200 height=200 />');
  149. equals(root.toHtml().replace(/[ ]+>/g,'>'),'<img width="200" height="200"/>');
  150. });
  151. test( '裸字', function() {
  152. var root = UE.htmlparser('sdfasdfasdf');
  153. equals(root.toHtml().replace(/[ ]+>/g,'>'),'sdfasdfasdf');
  154. });
  155. test( '只有结束标签的情况', function() {
  156. var root = UE.htmlparser('<p>hello1</a></p><p>hello2</p>');
  157. equals(root.toHtml().replace(/[ ]+>/g,'>'),'<p>hello1</p><p>hello2</p>');
  158. });
  159. test( '开始标签与后面文本的空格过滤,其他不过滤inline节点之间的空格,过滤block节点之间的空格', function () {
  160. /*inline节点之间的空格要留着*/
  161. var root = UE.htmlparser('<a href="www.baidu.com">baidu</a> <a> hello </a>');
  162. equals(root.toHtml().replace(/[ ]+>/g,'>'),'<a href="www.baidu.com">baidu</a> <a> hello </a>');
  163. root = UE.htmlparser('<span> <span> hello </span></span> <span> he llo2<span> hello </span> </span>');
  164. equals(root.toHtml().replace(/[ ]+>/g,'>'),'<span> <span> hello </span></span> <span> he llo2<span> hello </span> </span>' );
  165. /*block节点之间的空格不要留着 这个太纠结,不必了。会把ol拆开,后面的变成ul*/
  166. // html = '<ol> <li> li_test </li> <li> li test2 </li> </ol> ';
  167. // node = serialize.parseHTML( html );
  168. // node = serialize.filter( node );
  169. // equal( serialize.toHTML( node ), '<ol><li>li_test&nbsp;</li><li>li&nbsp;test2&nbsp;</li></ol>&nbsp;' );
  170. } );
  171. /*特殊字符需要转义*/
  172. test( '文本包含特殊字符,如尖括号', function () {
  173. var root = UE.htmlparser('<span><td hello</span>');
  174. equals(root.toHtml().replace(/[ ]+>/g,'>'),'<span>&lt;td &nbsp;hello</span>', '字符转义' );
  175. } );
  176. test( 'br', function () {
  177. var root = UE.htmlparser('<br />');
  178. equals(root.toHtml().replace(/[ ]+>/g,'>'),'<br/>', '对br不操作');
  179. root = UE.htmlparser('<br>');
  180. equals(root.toHtml().replace(/[ ]+>/g,'>'),'<br/>', '补充br后面的斜杠');
  181. } );
  182. /*考察标签之间嵌套关系*/
  183. test( '复杂标签嵌套', function() {
  184. var root = UE.htmlparser('<span>hello1<p><img>hello2<div>hello3<p>hello4');
  185. equals(root.toHtml().replace(/[ ]+>/g,'>'),'<span>hello1<p><img/>hello2<div>hello3<p>hello4</p></div></p></span>');
  186. } );
  187. test( 'trace 1727:过滤超链接后面的空格', function () {
  188. var root = UE.htmlparser('<a href="www.baidu.com">baidu</a> ddd');
  189. equals(root.toHtml().replace(/[ ]+>/g,'>'),'<a href="www.baidu.com">baidu</a> &nbsp;ddd','过滤超链接后面的空格');
  190. } );
  191. //test( '转换img标签', function () {
  192. // var root = UE.htmlparser('<img src="file:///C:/DOCUME~1/DONGYA~1/LOCALS~1/Temp/msohtmlclip1/01/clip_image002.jpg" width="553" height="275" />');
  193. // var spa=ua.browser.ie==6?' orgSrc="'+te.obj[1].options.UEDITOR_HOME_URL+'themes/default/images/spacer.gif"':'';
  194. // equals(root.toHtml().replace(/[ ]+>/g,'>'), '<img src="'+te.obj[1].options.UEDITOR_HOME_URL+'themes/default/images/spacer.gif" width="553" height="275" word_img="file:///C:/DOCUME~1/DONGYA~1/LOCALS~1/Temp/msohtmlclip1/01/clip_image002.jpg" style="background:url('+te.obj[1].options.UEDITOR_HOME_URL+'lang/'+te.obj[1].options.lang+'/images/localimage.png) no-repeat center center;border:1px solid #ddd"'+spa+' />' , '转换img标签');
  195. //} );