1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203
|
/**
* Based on XML_Utility functions submitted by troels_kn.
* credit also to adios, who helped with reg exps:
* http://www.sitepoint.com/forums/showthread.php?t=201052
*
* A replacement for HTMLArea.getHTML
*
* Features:
* - Generates XHTML code
* - Much faster than HTMLArea.getHTML
* - Eliminates the hacks to accomodate browser quirks
* - Returns correct code for Flash objects and scripts
* - Formats html in an indented, readable format in html mode
* - Preserves script and pre formatting
* - Preserves formatting in comments
* - Removes contenteditable from body tag in full-page mode
* - Supports only7BitPrintablesInURLs config option
* - Supports htmlRemoveTags config option
*/
function GetHtml(editor) {
this.editor = editor;
}
GetHtml._pluginInfo = {
name : "GetHtml",
version : "1.0",
developer : "Nelson Bright",
developer_url : "http://www.brightworkweb.com/",
license : "htmlArea"
};
HTMLArea.RegExpCache = [
/*00*/ new RegExp().compile(/<\s*\/?([^\s\/>]+)[\s*\/>]/gi),//lowercase tags
/*01*/ new RegExp().compile(/(\S*\s*=\s*)?_moz[^=>]*(=\s*[^>]*)?/gi),//strip _moz attributes
/*02*/ new RegExp().compile(/\s*=\s*(([^'"][^>\s]*)([>\s])|"([^"]+)"|'([^']+)')/g),// find attributes
/*03*/ new RegExp().compile(/\/>/g),//strip singlet terminators
/*04*/ // new RegExp().compile(/<(br|hr|img|input|link|meta|param|embed)([^>]*)>/g),//terminate singlet tags
/*04*/ new RegExp().compile(/<(br|hr|img|input|link|meta|param|embed|area)((\s*\S*="[^"]*")*)>/g),//terminate singlet tags
/*05*/ new RegExp().compile(/(checked|compact|declare|defer|disabled|ismap|multiple|no(href|resize|shade|wrap)|readonly|selected)([\s>])/gi),//expand singlet attributes
/*06*/ new RegExp().compile(/(="[^']*)'([^'"]*")/),//check quote nesting
/*07*/ new RegExp().compile(/&(?=[^<]*>)/g),//expand query ampersands
/*08*/ new RegExp().compile(/<\s+/g),//strip tagstart whitespace
/*09*/ new RegExp().compile(/\s+(\/)?>/g),//trim whitespace
/*10*/ new RegExp().compile(/\s{2,}/g),//trim extra whitespace
/*11*/ new RegExp().compile(/\s+([^=\s]+)(="[^"]+")/g),// lowercase attribute names
/*12*/ new RegExp().compile(/(\S*\s*=\s*)?contenteditable[^=>]*(=\s*[^>\s\/]*)?/gi),//strip contenteditable
/*13*/ new RegExp().compile(/((href|src)=")([^\s]*)"/g), //find href and src for stripBaseHref()
/*14*/ new RegExp().compile(/<\/?(div|p|h[1-6]|table|tr|td|th|ul|ol|li|blockquote|object|br|hr|img|embed|param|pre|script|html|head|body|meta|link|title|area)[^>]*>/g),
/*15*/ new RegExp().compile(/<\/(div|p|h[1-6]|table|tr|td|th|ul|ol|li|blockquote|object|html|head|body|script)( [^>]*)?>/g),//blocklevel closing tag
/*16*/ new RegExp().compile(/<(div|p|h[1-6]|table|tr|td|th|ul|ol|li|blockquote|object|html|head|body|script)( [^>]*)?>/g),//blocklevel opening tag
/*17*/ new RegExp().compile(/<(br|hr|img|embed|param|pre|meta|link|title|area)[^>]*>/g),//singlet tag
/*18*/ new RegExp().compile(/(^|<\/(pre|script)>)(\s|[^\s])*?(<(pre|script)[^>]*>|$)/g),//find content NOT inside pre and script tags
/*19*/ new RegExp().compile(/(<pre[^>]*>)(\s|[^\s])*?(<\/pre>)/g),//find content inside pre tags
/*20*/ new RegExp().compile(/(^|<!--(\s|\S)*?-->)((\s|\S)*?)(?=<!--(\s|\S)*?-->|$)/g),//find content NOT inside comments
/*21*/ new RegExp().compile(/\S*=""/g), //find empty attributes
/*22*/ new RegExp().compile(/<!--[\s\S]*?-->|<\?[\s\S]*?\?>|<[^>]*>/g) //find all tags, including comments and php
];
/**
* Cleans HTML into wellformed xhtml
*/
HTMLArea.prototype.cleanHTML = function(sHtml) {
var c = HTMLArea.RegExpCache;
sHtml = sHtml.
replace(c[0], function(str) { return str.toLowerCase(); } ).//lowercase tags/attribute names
replace(c[1], ' ').//strip _moz attributes
replace(c[12], ' ').//strip contenteditable
replace(c[2], '="$2$4$5"$3').//add attribute quotes
replace(c[21], ' ').//strip empty attributes
replace(c[11], function(str, p1, p2) { return ' '+p1.toLowerCase()+p2; }).//lowercase attribute names
replace(c[3], '>').//strip singlet terminators
replace(c[9], '$1>').//trim whitespace
replace(c[5], '$1="$1"$3').//expand singlet attributes
replace(c[4], '<$1$2 />').//terminate singlet tags
replace(c[6], '$1$2').//check quote nesting
// replace(c[7], '&').//expand query ampersands
replace(c[8], '<').//strip tagstart whitespace
replace(c[10], ' ');//trim extra whitespace
if(HTMLArea.is_ie && c[13].test(sHtml)) {//
sHtml = sHtml.replace(c[13],'$1'+this.stripBaseURL(RegExp.$3)+'"');
}
if(this.config.only7BitPrintablesInURLs && c[13].test(sHtml)) {
sHtml = sHtml.replace(c[13], '$1'+RegExp.$3.replace(/([^!-~]+)/g,function(chr){return escape(chr);})+'"');
}
return sHtml;
};
/**
* Prettyfies html by inserting linebreaks before tags, and indenting blocklevel tags
*/
HTMLArea.indent = function(s, sindentChar) {
HTMLArea.__nindent = 0;
HTMLArea.__sindent = "";
HTMLArea.__sindentChar = (typeof sindentChar == "undefined") ? " " : sindentChar;
var c = HTMLArea.RegExpCache;
if(HTMLArea.is_gecko) { //moz changes returns into <br> inside <pre> tags
s = s.replace(c[19], function(str){return str.replace(/<br \/>/g,"\n")});
}
s = s.replace(c[18], function(strn) { //skip pre and script tags
strn = strn.replace(c[20], function(st,$1,$2,$3) { //exclude comments
string = $3.replace(/[\n\r]/gi, " ").replace(/\s+/gi," ").replace(c[14], function(str) {
if (str.match(c[16])) {
var s = "\n" + HTMLArea.__sindent + str;
// blocklevel openingtag - increase indent
HTMLArea.__sindent += HTMLArea.__sindentChar;
++HTMLArea.__nindent;
return s;
} else if (str.match(c[15])) {
// blocklevel closingtag - decrease indent
--HTMLArea.__nindent;
HTMLArea.__sindent = "";
for (var i=HTMLArea.__nindent;i>0;--i) {
HTMLArea.__sindent += HTMLArea.__sindentChar;
}
return "\n" + HTMLArea.__sindent + str;
} else if (str.match(c[17])) {
// singlet tag
return "\n" + HTMLArea.__sindent + str;
}
return str; // this won't actually happen
});
return $1 + string;
});return strn;
});
if (s.charAt(0) == "\n") {
return s.substring(1, s.length);
}
s = s.replace(/ *\n/g,'\n');//strip spaces at end of lines
return s;
};
HTMLArea.getHTML = function(root, outputRoot, editor) {
var html = "";
var c = HTMLArea.RegExpCache;
if(root.nodeType == 11) {//document fragment
//we can't get innerHTML from the root (type 11) node, so we
//copy all the child nodes into a new div and get innerHTML from the div
var div = document.createElement("div");
var temp = root.insertBefore(div,root.firstChild);
for (j = temp.nextSibling; j; j = j.nextSibling) {
temp.appendChild(j.cloneNode(true));
}
html += temp.innerHTML.replace(c[22], function(tag){
if(/^<[!\?]/.test(tag)) return tag; //skip comments and php tags
else return editor.cleanHTML(tag)});
} else {
var root_tag = (root.nodeType == 1) ? root.tagName.toLowerCase() : '';
if (outputRoot) { //only happens with <html> tag in fullpage mode
html += "<" + root_tag;
var attrs = root.attributes; // strangely, this doesn't work in moz
for (i = 0; i < attrs.length; ++i) {
var a = attrs.item(i);
if (!a.specified) {
continue;
}
var name = a.nodeName.toLowerCase();
var value = a.nodeValue;
html += " " + name + '="' + value + '"';
}
html += ">";
}
if(root_tag == "html") {
innerhtml = editor._doc.documentElement.innerHTML;
} else {
innerhtml = root.innerHTML;
}
//pass tags to cleanHTML() one at a time
//includes support for htmlRemoveTags config option
html += innerhtml.replace(c[22], function(tag){
if(/^<[!\?]/.test(tag)) return tag; //skip comments and php tags
else if(!(editor.config.htmlRemoveTags && editor.config.htmlRemoveTags.test(tag.replace(/<([^\s>\/]+)/,'$1'))))
return editor.cleanHTML(tag);
else return ''});
//IE drops all </li> tags in a list except the last one
if(HTMLArea.is_ie) {
html = html.replace(/<li( [^>]*)?>/g,'</li><li$1>').
replace(/(<(ul|ol)[^>]*>)[\s\n]*<\/li>/g, '$1').
replace(/<\/li>([\s\n]*<\/li>)+/g, '<\/li>');
}
if(HTMLArea.is_gecko)
html = html.replace(/(.*)<br \/>\n$/, '$1'). //strip trailing <br> added by moz
replace(/^\n(.*)/, '$1'); //strip leading newline added by moz
if (outputRoot) {
html += "</" + root_tag + ">";
}
// html = HTMLArea.indent(html);//see bug #6106
};
// html = HTMLArea.htmlEncode(html);
return html;
};
//override (hack) outwardHtml() to handle onclick suppression
HTMLArea.prototype._origOutwardHtml = HTMLArea.prototype.outwardHtml;
HTMLArea.prototype.outwardHtml = function(html) {
html = html.replace("onclick=\"try{if(document.designMode && document.designMode == 'on') return false;}catch(e){} window.open(", "onclick=\"window.open(");
html = this._origOutwardHtml(html);
return html;
};
|