oberddan / syntaxhighlighter

Automatically exported from code.google.com/p/syntaxhighlighter
GNU General Public License v3.0
0 stars 0 forks source link

js not interpreted correctly when nested in html #96

Open GoogleCodeExporter opened 8 years ago

GoogleCodeExporter commented 8 years ago
What steps will reproduce the problem?
when trying to syntax highlight the following:

<pre class="html" name="code">&lt;meta
content=&#39;5;url=http://YOUR_NEW_BLOG_DOMAIN_HERE/&#39;
http-equiv=&#39;refresh&#39;/&gt;</pre>
    This will handle the front page redirection of the blog. </li>
    <li>To let the users know that your blog has moved, and that they will be
redirected, add a message to the page: 
    <pre class="html" name="code">&lt;div style=&#39;position: absolute; top:
30px; left: 30px; border: solid 2px #333; color: #000; background-color:
yellow; padding: 5px; width: 400px; z-index: 5; font-family: Verdana,
Geneva, Arial, Helvetica, sans-serif; font-size: large;opacity: .4;filter:
alpha(opacity=40);&#39;&gt;
&lt;p&gt;&lt;strong&gt;My blog has moved!&lt;/strong&gt;&lt;/p&gt;
&lt;p&gt;You should be automatically redirected in 5 seconds. If not,
visit&lt;br/&gt; &lt;a href=&#39;http://YOUR_NEW_BLOG_DOMAIN_HERE/&#39;&gt;
&lt;strong&gt;YOUR_NEW_BLOG_DOMAIN_HERE&lt;/strong&gt;&lt;/a&gt;
&lt;br/&gt; and update your bookmarks.&lt;/p&gt;
&lt;/div&gt;</pre>
    Place this right after the opening <code>&lt;body&gt;</code> tag. 
    <br />
    <a rel="lightbox"
href="/blog/image.axd?picture=WindowsLiveWriter/MovingfromBloggertoBlogEngine.NE
T_1442/redirect_2.jpg"
target="_blank"><img style="border-width: 0px"
src="/blog/image.axd?picture=WindowsLiveWriter/MovingfromBloggertoBlogEngine.NET
_1442/redirect_thumb.jpg"
border="0" alt="redirect" width="244" height="115" /></a> </li>
    <li>Because your new blog will contain all the posts from the old Blogger
blog you need to avoid being penalised for duplicate content, add this to
the &quot;head&quot; section as in point one... 
    <pre class="html" name="code">&lt;META NAME=&quot;ROBOTS&quot;
CONTENT=&quot;NOINDEX, FOLLOW&quot;/&gt;
&lt;META NAME=&quot;GOOGLEBOT&quot; CONTENT=&quot;NOINDEX,
FOLLOW&quot;/&gt;</pre>
    <code>NOINDEX FOLLOW</code> tells the crawler not to index the Blogger
page but to <em><code>FOLLOW</code></em> the redirect. <code>NOINDEX</code>
will also instruct the crawler to drop any already indexed version of page.
</li>
    <li>To make the individual posts forward to their new counterparts a few
things need to be done: 
    <ol>
        <li>On the Blogger control panel in &ldquo;Settings -&gt;
Formatting&rdquo;, change the Timestamps on posts to the
<code>MM/DD/YYYY</code> format. </li>
        <li>Look for this code in the Blogger Template </li></ol>
        <pre class="html" name="code">&lt;b:section class=&#39;main&#39;
id=&#39;main&#39; showaddelement=&#39;no&#39;&gt;</pre>

        <span style="text-indent:10px;">and enter the following widget code under
it: </span>
<pre class="html" name="code">&lt;b:widget id=&#39;Redirector&#39;
locked=&#39;true&#39; title=&#39;Blog Posts&#39; type=&#39;Blog&#39; &gt;
&lt;b:includable id=&#39;main&#39; &gt;
&lt;b:if cond=&#39;data:blog.pageType == &quot;item&quot;&#39; &gt;
&lt;b:loop values=&#39;data:posts&#39; var=&#39;post&#39; &gt;
&lt;div id=&#39;redirectorTitle&#39; style=&#39;visibility:hidden&#39; &gt;
&lt;data:post.title/ &gt; &lt;/div &gt;
&lt;script type=&#39;text/javascript&#39; &gt;
var new_domain = &#39;http://www.lehmkuhl.za.net/blog/post&#39;
function utf8_uri_encode( str ) {
   var high_code = new RegExp(/[\u0080-\uffff]+/);;
   new_str = str;;
   while( m = high_code.exec( new_str ) ) {
      new_str = new_str.replace(m,encodeURIComponent(m));;
   }
   return new_str;;
}
var title = document.getElementById(&#39;redirectorTitle&#39;).innerHTML;;
// [INCOMPLETE] Keep percent signs that aren&#39;t part of an octet?
title = title.replace(/&lt;[^&gt;]*?&gt;/g,&#39;&#39;);; // remove tags
title = title.replace(/&amp;.+?;/g,&#39;&#39;);; // remove entities
title = utf8_uri_encode(title);; // handle UTF-8 characters
title = title.toLowerCase();;
title = title.replace(/[^%a-z0-9 _-]/g,&#39;&#39;);; // remove punctuation
title = title.replace(/\s+/g,&#39;-&#39;);; // turn spaces into hyphens
title = title.replace(/-+/g, &#39;-&#39;);; // collapse runs of hyphens
title = title.replace(/^-+/g,&#39;&#39;);; // remove prefixed hyphens
title = title.replace(/-+$/g,&#39;&#39;);; // remove suffixed hyphens
var timestamp = &#39; &lt;data:post.timestamp/ &gt;&#39;;
timestamp = timestamp.split(&#39;/&#39;);
timestamp = timestamp[2]+&#39;/&#39;+timestamp[0]+&#39;/&#39;+timestamp[1];
var new_page = new_domain + &#39;/&#39; + timestamp + &#39;/&#39; + title +
&#39;.aspx&#39;;;
document.location.href = new_page;
&lt;/script &gt;
&lt;/b:loop &gt;
&lt;/b:if &gt;
&lt;/b:includable &gt;
&lt;/b:widget &gt;</pre>

parts of the the js is outputted incorectly:
the section...
title = title.replace(/&lt;[^&gt;]*?&gt;/g,&#39;&#39;);; // remove tags
title = title.replace(/&amp;.+?;/g,&#39;&#39;);; // remove entities
title = utf8_uri_encode(title);; // handle UTF-8 characters
title = title.toLowerCase();;
title = title.replace(/[^%a-z0-9 _-]/g,&#39;&#39;);; // remove punctuation
title = title.replace(/\s+/g,&#39;-&#39;);; // turn spaces into hyphens
title = title.replace(/-+/g, &#39;-&#39;);; // collapse runs of hyphens
title = title.replace(/^-+/g,&#39;&#39;);; // remove prefixed hyphens
title = title.replace(/-+$/g,&#39;&#39;);; // remove suffixed hyphens

is outputted as...
# titletitle = title.replace(/<[^>]*?>/g,'');; // remove tags  
# titletitle = title.replace(/&.+?;/g,'');; // remove entities  
# title = utf8_uri_encode(title);; // handle UTF-8 characters  
# titletitle = title.toLowerCase();;  
# titletitle = title.replace(/[^%a-z0-9 _-]/g,'');; // remove punctuation  
# titletitle = title.replace(/\s+/g,'-');; // turn spaces into hyphens  
# titletitle = title.replace(/-+/g, '-');; // collapse runs of hyphens  
# titletitle = title.replace(/^-+/g,'');; // remove prefixed hyphens  
# titletitle = title.replace(/-+$/g,'');; // remove suffixed hyphens  

the generated source is...
<span class="attribute">title</span><span
class="attribute-value">title</span><span>&nbsp;=&nbsp;title.replace(/</span><sp
an
class="tag">&lt;</span><span>[^</span><span
class="tag">&gt;</span><span>]*</span><span
class="tag">?&gt;</span><span>/g,'');;&nbsp;//&nbsp;remove&nbsp;tags&nbsp;&nbsp;
</span></span></li><li
class="alt"><span><span class="attribute">title</span><span
class="attribute-value">title</span><span>&nbsp;=&nbsp;title.replace(/&amp;.+?;/
g,'');;&nbsp;//&nbsp;remove&nbsp;entities&nbsp;&nbsp;</span></span></li><li
class=""><span><span
class="attribute">title</span><span>&nbsp;=&nbsp;</span><span
class="attribute-value">utf8_uri_encode</span><span>(title);;&nbsp;//&nbsp;handl
e&nbsp;UTF-8&nbsp;characters&nbsp;&nbsp;</span></span></li><li
class="alt"><span><span class="attribute">title</span><span
class="attribute-value">title</span><span>&nbsp;=&nbsp;title.toLowerCase();;&nbs
p;&nbsp;</span></span></li><li
class=""><span><span class="attribute">title</span><span
class="attribute-value">title</span><span>&nbsp;=&nbsp;title.replace(/[^%a-z0-9&
nbsp;_-]/g,'');;&nbsp;//&nbsp;remove&nbsp;punctuation&nbsp;&nbsp;</span></span><
/li><li
class="alt"><span><span class="attribute">title</span><span
class="attribute-value">title</span><span>&nbsp;=&nbsp;title.replace(/\s+/g,'-')
;;&nbsp;//&nbsp;turn&nbsp;spaces&nbsp;into&nbsp;hyphens&nbsp;&nbsp;</span></span
></li><li
class=""><span><span class="attribute">title</span><span
class="attribute-value">title</span><span>&nbsp;=&nbsp;title.replace(/-+/g,&nbsp
;'-');;&nbsp;//&nbsp;collapse&nbsp;runs&nbsp;of&nbsp;hyphens&nbsp;&nbsp;</span><
/span></li><li
class="alt"><span><span class="attribute">title</span><span
class="attribute-value">title</span><span>&nbsp;=&nbsp;title.replace(/^-+/g,'');
;&nbsp;//&nbsp;remove&nbsp;prefixed&nbsp;hyphens&nbsp;&nbsp;</span></span></li><
li
class=""><span><span class="attribute">title</span><span
class="attribute-value">title</span><span>&nbsp;=&nbsp;title.replace(/-+$/g,'');
;&nbsp;//&nbsp;remove&nbsp;suffixed&nbsp;hyphens&nbsp;&nbsp;</span>

What is the expected output? What do you see instead?
the js should be output as text if not as js.

What version of the product are you using? On what operating system?
using version 1.5.1 on windows xp with firefox 3 and ie7

Original issue reported on code.google.com by ria...@gmail.com on 26 Jul 2008 at 11:32

GoogleCodeExporter commented 8 years ago
I'd like to be able to syntax highlight mixed html and javascript, too.  

Original comment by JoeCoval...@gmail.com on 7 Dec 2008 at 1:56

GoogleCodeExporter commented 8 years ago
I hacked together a way to get javascript script blocks in HTML to be formatted
correctly.  I did this by making shBrushXML use shBrushJScript.  I would submit 
a
diff, but I have other hacks in my copy, so I'll just describe the changes 
instead.  

First, a few tweaks to jsCore.js.  Add this function:

// Cull matches that are inside other matches.
// This process gets rid of highlighted strings inside comments, keywords inside
strings, etc.
// Part of Highlight() implementation exposed for use by complex brush 
implementations  
dp.sh.Highlighter.prototype.CullNested = function()
{
    if(this.hasOwnProperty('culledOnce')){
    //rewrite matches array, squishing gaps
    //this is required to allow CullNested to be run multiple 
        //times on the same data
        var newMatches = [];
        for(var i = 0; i < this.matches.length; i++){
            if(this.matches[i])
                newMatches[newMatches.length] = this.matches[i];
        }
        this.matches = newMatches;
    }

    // sort the matches
    this.matches = this.matches.sort(dp.sh.Highlighter.SortCallback);
    for(var i = 0; i < this.matches.length; i++){
        if(this.IsInside(this.matches[i]))
            this.matches[i] = null;
    }
    this.culledOnce = true
}

Then, in jsCore.js, modify dp.sh.Highlighter.prototype.Highlight to make use of 
this
new function:
        . . .
        // if no matches found, add entire code as plain text
        if(this.matches.length == 0)
        {
                this.AddBit(this.code, null);
                this.SwitchToList();
                this.div.appendChild(this.bar);
                this.div.appendChild(this.ol);
                return;
        }

        this.CullNested();

        // Finally, go through the final list of matches and pull the all
        // together adding everything in between that isn't a match.
        for(var i = 0; i < this.matches.length; i++)
        . . .

Modify shBrushXml.js.  First, modify the constructor to have the javascript 
brush css
class as well:

dp.sh.Brushes.Xml = function()
{
    this.CssClass = 'dp-xml dp-c';
        . . .

Then, modify the function ProcessRegexList in shBrushXml.js to end like so:
        //script blocks <script>(.*)</script>
        //we'll mark these with cssClass 'javascript' and then 
        //reprocess with javascript brush below
        regex = new
RegExp('(?:\<|<)script(?:\>|>)((?:.|\\n)*)(?:\<|<)/script(?:\>|>)', 'gm');
    while((match = regex.exec(this.code)) != null)
    {
        if(match[1] == null)
        {
            continue;
        }
        push(this.matches, new dp.sh.Match(match[1], 
                                                  match.index+match[0].indexOf(match[1]), 

'javascript'));
    }

    //Do this once so that javascript blocks cull nested XML
    //matches before we find the javascript matches in the javascript block
        this.CullNested()

       //Replace the javascript block match with matches found by javascript brush
       //within the block
       for(var i = 0; i < this.matches.length; i ++){
               var match = this.matches[i];
               if(match && match.css == 'javascript'){
                   this.matches[i] = null;
                   var brush = new dp.sh.Brushes.JScript();
                   brush.matches=[]
                   brush.code = match.value;
                   brush.ProcessRegexList();
                   for(var j = 0; j < brush.matches.length; j++){
                        var brushMatch = brush.matches[j];
                        brushMatch.index += match.index;
                        this.matches[this.matches.length] = brushMatch;
                   }
               }
       }

Original comment by JoeCoval...@gmail.com on 8 Dec 2008 at 4:46

GoogleCodeExporter commented 8 years ago
That appears a little jumbled, in case it helps, here's my files.  Like I said, 
there
are other hacks in here, too.

Original comment by JoeCoval...@gmail.com on 8 Dec 2008 at 4:48

Attachments: