1<?php 2# 3# Markdown Extra - A text-to-HTML conversion tool for web writers 4# 5# PHP Markdown & Extra 6# Copyright (c) 2004-2012 Michel Fortin 7# <http://michelf.com/projects/php-markdown/> 8# 9# Original Markdown 10# Copyright (c) 2004-2006 John Gruber 11# <http://daringfireball.net/projects/markdown/> 12# 13 14 15define( 'MARKDOWN_VERSION', "1.0.1o" ); # Sun 8 Jan 2012 16define( 'MARKDOWNEXTRA_VERSION', "1.2.5" ); # Sun 8 Jan 2012 17 18 19# 20# Global default settings: 21# 22 23# Change to ">" for HTML output 24@define( 'MARKDOWN_EMPTY_ELEMENT_SUFFIX', " />"); 25 26# Define the width of a tab for code blocks. 27@define( 'MARKDOWN_TAB_WIDTH', 4 ); 28 29# Optional title attribute for footnote links and backlinks. 30@define( 'MARKDOWN_FN_LINK_TITLE', "" ); 31@define( 'MARKDOWN_FN_BACKLINK_TITLE', "" ); 32 33# Optional class attribute for footnote links and backlinks. 34@define( 'MARKDOWN_FN_LINK_CLASS', "" ); 35@define( 'MARKDOWN_FN_BACKLINK_CLASS', "" ); 36 37 38# 39# WordPress settings: 40# 41 42# Change to false to remove Markdown from posts and/or comments. 43@define( 'MARKDOWN_WP_POSTS', true ); 44@define( 'MARKDOWN_WP_COMMENTS', true ); 45 46 47 48### Standard Function Interface ### 49 50@define( 'MARKDOWN_PARSER_CLASS', 'MarkdownExtra_Parser' ); 51 52function Markdown($text) { 53# 54# Initialize the parser and return the result of its transform method. 55# 56 # Setup static parser variable. 57 static $parser; 58 if (!isset($parser)) { 59 $parser_class = MARKDOWN_PARSER_CLASS; 60 $parser = new $parser_class; 61 } 62 63 # Transform text using parser. 64 return $parser->transform($text); 65} 66 67 68### WordPress Plugin Interface ### 69 70/* 71Plugin Name: Markdown Extra 72Plugin URI: http://michelf.com/projects/php-markdown/ 73Description: <a href="http://daringfireball.net/projects/markdown/syntax">Markdown syntax</a> allows you to write using an easy-to-read, easy-to-write plain text format. Based on the original Perl version by <a href="http://daringfireball.net/">John Gruber</a>. <a href="http://michelf.com/projects/php-markdown/">More...</a> 74Version: 1.2.5 75Author: Michel Fortin 76Author URI: http://michelf.com/ 77*/ 78 79if (isset($wp_version)) { 80 # More details about how it works here: 81 # <http://michelf.com/weblog/2005/wordpress-text-flow-vs-markdown/> 82 83 # Post content and excerpts 84 # - Remove WordPress paragraph generator. 85 # - Run Markdown on excerpt, then remove all tags. 86 # - Add paragraph tag around the excerpt, but remove it for the excerpt rss. 87 if (MARKDOWN_WP_POSTS) { 88 remove_filter('the_content', 'wpautop'); 89 remove_filter('the_content_rss', 'wpautop'); 90 remove_filter('the_excerpt', 'wpautop'); 91 add_filter('the_content', 'mdwp_MarkdownPost', 6); 92 add_filter('the_content_rss', 'mdwp_MarkdownPost', 6); 93 add_filter('get_the_excerpt', 'mdwp_MarkdownPost', 6); 94 add_filter('get_the_excerpt', 'trim', 7); 95 add_filter('the_excerpt', 'mdwp_add_p'); 96 add_filter('the_excerpt_rss', 'mdwp_strip_p'); 97 98 remove_filter('content_save_pre', 'balanceTags', 50); 99 remove_filter('excerpt_save_pre', 'balanceTags', 50); 100 add_filter('the_content', 'balanceTags', 50); 101 add_filter('get_the_excerpt', 'balanceTags', 9); 102 } 103 104 # Add a footnote id prefix to posts when inside a loop. 105 function mdwp_MarkdownPost($text) { 106 static $parser; 107 if (!$parser) { 108 $parser_class = MARKDOWN_PARSER_CLASS; 109 $parser = new $parser_class; 110 } 111 if (is_single() || is_page() || is_feed()) { 112 $parser->fn_id_prefix = ""; 113 } else { 114 $parser->fn_id_prefix = get_the_ID() . "."; 115 } 116 return $parser->transform($text); 117 } 118 119 # Comments 120 # - Remove WordPress paragraph generator. 121 # - Remove WordPress auto-link generator. 122 # - Scramble important tags before passing them to the kses filter. 123 # - Run Markdown on excerpt then remove paragraph tags. 124 if (MARKDOWN_WP_COMMENTS) { 125 remove_filter('comment_text', 'wpautop', 30); 126 remove_filter('comment_text', 'make_clickable'); 127 add_filter('pre_comment_content', 'Markdown', 6); 128 add_filter('pre_comment_content', 'mdwp_hide_tags', 8); 129 add_filter('pre_comment_content', 'mdwp_show_tags', 12); 130 add_filter('get_comment_text', 'Markdown', 6); 131 add_filter('get_comment_excerpt', 'Markdown', 6); 132 add_filter('get_comment_excerpt', 'mdwp_strip_p', 7); 133 134 global $mdwp_hidden_tags, $mdwp_placeholders; 135 $mdwp_hidden_tags = explode(' ', 136 '<p> </p> <pre> </pre> <ol> </ol> <ul> </ul> <li> </li>'); 137 $mdwp_placeholders = explode(' ', str_rot13( 138 'pEj07ZbbBZ U1kqgh4w4p pre2zmeN6K QTi31t9pre ol0MP1jzJR '. 139 'ML5IjmbRol ulANi1NsGY J7zRLJqPul liA8ctl16T K9nhooUHli')); 140 } 141 142 function mdwp_add_p($text) { 143 if (!preg_match('{^$|^<(p|ul|ol|dl|pre|blockquote)>}i', $text)) { 144 $text = '<p>'.$text.'</p>'; 145 $text = preg_replace('{\n{2,}}', "</p>\n\n<p>", $text); 146 } 147 return $text; 148 } 149 150 function mdwp_strip_p($t) { return preg_replace('{</?p>}i', '', $t); } 151 152 function mdwp_hide_tags($text) { 153 global $mdwp_hidden_tags, $mdwp_placeholders; 154 return str_replace($mdwp_hidden_tags, $mdwp_placeholders, $text); 155 } 156 function mdwp_show_tags($text) { 157 global $mdwp_hidden_tags, $mdwp_placeholders; 158 return str_replace($mdwp_placeholders, $mdwp_hidden_tags, $text); 159 } 160} 161 162 163### bBlog Plugin Info ### 164 165function identify_modifier_markdown() { 166 return array( 167 'name' => 'markdown', 168 'type' => 'modifier', 169 'nicename' => 'PHP Markdown Extra', 170 'description' => 'A text-to-HTML conversion tool for web writers', 171 'authors' => 'Michel Fortin and John Gruber', 172 'licence' => 'GPL', 173 'version' => MARKDOWNEXTRA_VERSION, 174 'help' => '<a href="http://daringfireball.net/projects/markdown/syntax">Markdown syntax</a> allows you to write using an easy-to-read, easy-to-write plain text format. Based on the original Perl version by <a href="http://daringfireball.net/">John Gruber</a>. <a href="http://michelf.com/projects/php-markdown/">More...</a>', 175 ); 176} 177 178 179### Smarty Modifier Interface ### 180 181function smarty_modifier_markdown($text) { 182 return Markdown($text); 183} 184 185 186### Textile Compatibility Mode ### 187 188# Rename this file to "classTextile.php" and it can replace Textile everywhere. 189 190if (strcasecmp(substr(__FILE__, -16), "classTextile.php") == 0) { 191 # Try to include PHP SmartyPants. Should be in the same directory. 192 @include_once 'smartypants.php'; 193 # Fake Textile class. It calls Markdown instead. 194 class Textile { 195 function TextileThis($text, $lite='', $encode='') { 196 if ($lite == '' && $encode == '') $text = Markdown($text); 197 if (function_exists('SmartyPants')) $text = SmartyPants($text); 198 return $text; 199 } 200 # Fake restricted version: restrictions are not supported for now. 201 function TextileRestricted($text, $lite='', $noimage='') { 202 return $this->TextileThis($text, $lite); 203 } 204 # Workaround to ensure compatibility with TextPattern 4.0.3. 205 function blockLite($text) { return $text; } 206 } 207} 208 209 210 211# 212# Markdown Parser Class 213# 214 215class Markdown_Parser { 216 217 # Regex to match balanced [brackets]. 218 # Needed to insert a maximum bracked depth while converting to PHP. 219 var $nested_brackets_depth = 6; 220 var $nested_brackets_re; 221 222 var $nested_url_parenthesis_depth = 4; 223 var $nested_url_parenthesis_re; 224 225 # Table of hash values for escaped characters: 226 var $escape_chars = '\`*_{}[]()>#+-.!'; 227 var $escape_chars_re; 228 229 # Change to ">" for HTML output. 230 var $empty_element_suffix = MARKDOWN_EMPTY_ELEMENT_SUFFIX; 231 var $tab_width = MARKDOWN_TAB_WIDTH; 232 233 # Change to `true` to disallow markup or entities. 234 var $no_markup = false; 235 var $no_entities = false; 236 237 # Predefined urls and titles for reference links and images. 238 var $predef_urls = array(); 239 var $predef_titles = array(); 240 241 242 function __construct() { 243 # 244 # Constructor function. Initialize appropriate member variables. 245 # 246 $this->_initDetab(); 247 $this->prepareItalicsAndBold(); 248 249 $this->nested_brackets_re = 250 str_repeat('(?>[^\[\]]+|\[', $this->nested_brackets_depth). 251 str_repeat('\])*', $this->nested_brackets_depth); 252 253 $this->nested_url_parenthesis_re = 254 str_repeat('(?>[^()\s]+|\(', $this->nested_url_parenthesis_depth). 255 str_repeat('(?>\)))*', $this->nested_url_parenthesis_depth); 256 257 $this->escape_chars_re = '['.preg_quote($this->escape_chars).']'; 258 259 # Sort document, block, and span gamut in ascendent priority order. 260 asort($this->document_gamut); 261 asort($this->block_gamut); 262 asort($this->span_gamut); 263 } 264 265 266 # Internal hashes used during transformation. 267 var $urls = array(); 268 var $titles = array(); 269 var $html_hashes = array(); 270 271 # Status flag to avoid invalid nesting. 272 var $in_anchor = false; 273 274 275 function setup() { 276 # 277 # Called before the transformation process starts to setup parser 278 # states. 279 # 280 # Clear global hashes. 281 $this->urls = $this->predef_urls; 282 $this->titles = $this->predef_titles; 283 $this->html_hashes = array(); 284 285 $in_anchor = false; 286 } 287 288 function teardown() { 289 # 290 # Called after the transformation process to clear any variable 291 # which may be taking up memory unnecessarly. 292 # 293 $this->urls = array(); 294 $this->titles = array(); 295 $this->html_hashes = array(); 296 } 297 298 299 function transform($text) { 300 # 301 # Main function. Performs some preprocessing on the input text 302 # and pass it through the document gamut. 303 # 304 $this->setup(); 305 306 # Remove UTF-8 BOM and marker character in input, if present. 307 $text = preg_replace('{^\xEF\xBB\xBF|\x1A}', '', $text); 308 309 # Standardize line endings: 310 # DOS to Unix and Mac to Unix 311 $text = preg_replace('{\r\n?}', "\n", $text); 312 313 # Make sure $text ends with a couple of newlines: 314 $text .= "\n\n"; 315 316 # Convert all tabs to spaces. 317 $text = $this->detab($text); 318 319 # Turn block-level HTML blocks into hash entries 320 $text = $this->hashHTMLBlocks($text); 321 322 # Strip any lines consisting only of spaces and tabs. 323 # This makes subsequent regexen easier to write, because we can 324 # match consecutive blank lines with /\n+/ instead of something 325 # contorted like /[ ]*\n+/ . 326 $text = preg_replace('/^[ ]+$/m', '', $text); 327 328 # Run document gamut methods. 329 foreach ($this->document_gamut as $method => $priority) { 330 $text = $this->$method($text); 331 } 332 333 $this->teardown(); 334 335 return $text . "\n"; 336 } 337 338 var $document_gamut = array( 339 # Strip link definitions, store in hashes. 340 "stripLinkDefinitions" => 20, 341 342 "runBasicBlockGamut" => 30, 343 ); 344 345 346 function stripLinkDefinitions($text) { 347 # 348 # Strips link definitions from text, stores the URLs and titles in 349 # hash references. 350 # 351 $less_than_tab = $this->tab_width - 1; 352 353 # Link defs are in the form: ^[id]: url "optional title" 354 $text = preg_replace_callback('{ 355 ^[ ]{0,'.$less_than_tab.'}\[(.+)\][ ]?: # id = $1 356 [ ]* 357 \n? # maybe *one* newline 358 [ ]* 359 (?: 360 <(.+?)> # url = $2 361 | 362 (\S+?) # url = $3 363 ) 364 [ ]* 365 \n? # maybe one newline 366 [ ]* 367 (?: 368 (?<=\s) # lookbehind for whitespace 369 ["(] 370 (.*?) # title = $4 371 [")] 372 [ ]* 373 )? # title is optional 374 (?:\n+|\Z) 375 }xm', 376 array(&$this, '_stripLinkDefinitions_callback'), 377 $text); 378 return $text; 379 } 380 function _stripLinkDefinitions_callback($matches) { 381 $link_id = strtolower($matches[1]); 382 $url = $matches[2] == '' ? $matches[3] : $matches[2]; 383 $this->urls[$link_id] = $url; 384 $this->titles[$link_id] =& $matches[4]; 385 return ''; # String that will replace the block 386 } 387 388 389 function hashHTMLBlocks($text) { 390 if ($this->no_markup) return $text; 391 392 $less_than_tab = $this->tab_width - 1; 393 394 # Hashify HTML blocks: 395 # We only want to do this for block-level HTML tags, such as headers, 396 # lists, and tables. That's because we still want to wrap <p>s around 397 # "paragraphs" that are wrapped in non-block-level tags, such as anchors, 398 # phrase emphasis, and spans. The list of tags we're looking for is 399 # hard-coded: 400 # 401 # * List "a" is made of tags which can be both inline or block-level. 402 # These will be treated block-level when the start tag is alone on 403 # its line, otherwise they're not matched here and will be taken as 404 # inline later. 405 # * List "b" is made of tags which are always block-level; 406 # 407 $block_tags_a_re = 'ins|del'; 408 $block_tags_b_re = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|address|'. 409 'script|noscript|form|fieldset|iframe|math'; 410 411 # Regular expression for the content of a block tag. 412 $nested_tags_level = 4; 413 $attr = ' 414 (?> # optional tag attributes 415 \s # starts with whitespace 416 (?> 417 [^>"/]+ # text outside quotes 418 | 419 /+(?!>) # slash not followed by ">" 420 | 421 "[^"]*" # text inside double quotes (tolerate ">") 422 | 423 \'[^\']*\' # text inside single quotes (tolerate ">") 424 )* 425 )? 426 '; 427 $content = 428 str_repeat(' 429 (?> 430 [^<]+ # content without tag 431 | 432 <\2 # nested opening tag 433 '.$attr.' # attributes 434 (?> 435 /> 436 | 437 >', $nested_tags_level). # end of opening tag 438 '.*?'. # last level nested tag content 439 str_repeat(' 440 </\2\s*> # closing nested tag 441 ) 442 | 443 <(?!/\2\s*> # other tags with a different name 444 ) 445 )*', 446 $nested_tags_level); 447 $content2 = str_replace('\2', '\3', $content); 448 449 # First, look for nested blocks, e.g.: 450 # <div> 451 # <div> 452 # tags for inner block must be indented. 453 # </div> 454 # </div> 455 # 456 # The outermost tags must start at the left margin for this to match, and 457 # the inner nested divs must be indented. 458 # We need to do this before the next, more liberal match, because the next 459 # match will start at the first `<div>` and stop at the first `</div>`. 460 $text = preg_replace_callback('{(?> 461 (?> 462 (?<=\n\n) # Starting after a blank line 463 | # or 464 \A\n? # the beginning of the doc 465 ) 466 ( # save in $1 467 468 # Match from `\n<tag>` to `</tag>\n`, handling nested tags 469 # in between. 470 471 [ ]{0,'.$less_than_tab.'} 472 <('.$block_tags_b_re.')# start tag = $2 473 '.$attr.'> # attributes followed by > and \n 474 '.$content.' # content, support nesting 475 </\2> # the matching end tag 476 [ ]* # trailing spaces/tabs 477 (?=\n+|\Z) # followed by a newline or end of document 478 479 | # Special version for tags of group a. 480 481 [ ]{0,'.$less_than_tab.'} 482 <('.$block_tags_a_re.')# start tag = $3 483 '.$attr.'>[ ]*\n # attributes followed by > 484 '.$content2.' # content, support nesting 485 </\3> # the matching end tag 486 [ ]* # trailing spaces/tabs 487 (?=\n+|\Z) # followed by a newline or end of document 488 489 | # Special case just for <hr />. It was easier to make a special 490 # case than to make the other regex more complicated. 491 492 [ ]{0,'.$less_than_tab.'} 493 <(hr) # start tag = $2 494 '.$attr.' # attributes 495 /?> # the matching end tag 496 [ ]* 497 (?=\n{2,}|\Z) # followed by a blank line or end of document 498 499 | # Special case for standalone HTML comments: 500 501 [ ]{0,'.$less_than_tab.'} 502 (?s: 503 <!-- .*? --> 504 ) 505 [ ]* 506 (?=\n{2,}|\Z) # followed by a blank line or end of document 507 508 | # PHP and ASP-style processor instructions (<? and <%) 509 510 [ ]{0,'.$less_than_tab.'} 511 (?s: 512 <([?%]) # $2 513 .*? 514 \2> 515 ) 516 [ ]* 517 (?=\n{2,}|\Z) # followed by a blank line or end of document 518 519 ) 520 )}Sxmi', 521 array(&$this, '_hashHTMLBlocks_callback'), 522 $text); 523 524 return $text; 525 } 526 function _hashHTMLBlocks_callback($matches) { 527 $text = $matches[1]; 528 $key = $this->hashBlock($text); 529 return "\n\n$key\n\n"; 530 } 531 532 533 function hashPart($text, $boundary = 'X') { 534 # 535 # Called whenever a tag must be hashed when a function insert an atomic 536 # element in the text stream. Passing $text to through this function gives 537 # a unique text-token which will be reverted back when calling unhash. 538 # 539 # The $boundary argument specify what character should be used to surround 540 # the token. By convension, "B" is used for block elements that needs not 541 # to be wrapped into paragraph tags at the end, ":" is used for elements 542 # that are word separators and "X" is used in the general case. 543 # 544 # Swap back any tag hash found in $text so we do not have to `unhash` 545 # multiple times at the end. 546 $text = $this->unhash($text); 547 548 # Then hash the block. 549 static $i = 0; 550 $key = "$boundary\x1A" . ++$i . $boundary; 551 $this->html_hashes[$key] = $text; 552 return $key; # String that will replace the tag. 553 } 554 555 556 function hashBlock($text) { 557 # 558 # Shortcut function for hashPart with block-level boundaries. 559 # 560 return $this->hashPart($text, 'B'); 561 } 562 563 564 var $block_gamut = array( 565 # 566 # These are all the transformations that form block-level 567 # tags like paragraphs, headers, and list items. 568 # 569 "doHeaders" => 10, 570 "doHorizontalRules" => 20, 571 572 "doLists" => 40, 573 "doCodeBlocks" => 50, 574 "doBlockQuotes" => 60, 575 ); 576 577 function runBlockGamut($text) { 578 # 579 # Run block gamut tranformations. 580 # 581 # We need to escape raw HTML in Markdown source before doing anything 582 # else. This need to be done for each block, and not only at the 583 # begining in the Markdown function since hashed blocks can be part of 584 # list items and could have been indented. Indented blocks would have 585 # been seen as a code block in a previous pass of hashHTMLBlocks. 586 $text = $this->hashHTMLBlocks($text); 587 588 return $this->runBasicBlockGamut($text); 589 } 590 591 function runBasicBlockGamut($text) { 592 # 593 # Run block gamut tranformations, without hashing HTML blocks. This is 594 # useful when HTML blocks are known to be already hashed, like in the first 595 # whole-document pass. 596 # 597 foreach ($this->block_gamut as $method => $priority) { 598 $text = $this->$method($text); 599 } 600 601 # Finally form paragraph and restore hashed blocks. 602 $text = $this->formParagraphs($text); 603 604 return $text; 605 } 606 607 608 function doHorizontalRules($text) { 609 # Do Horizontal Rules: 610 return preg_replace( 611 '{ 612 ^[ ]{0,3} # Leading space 613 ([-*_]) # $1: First marker 614 (?> # Repeated marker group 615 [ ]{0,2} # Zero, one, or two spaces. 616 \1 # Marker character 617 ){2,} # Group repeated at least twice 618 [ ]* # Tailing spaces 619 $ # End of line. 620 }mx', 621 "\n".$this->hashBlock("<hr$this->empty_element_suffix")."\n", 622 $text); 623 } 624 625 626 var $span_gamut = array( 627 # 628 # These are all the transformations that occur *within* block-level 629 # tags like paragraphs, headers, and list items. 630 # 631 # Process character escapes, code spans, and inline HTML 632 # in one shot. 633 "parseSpan" => -30, 634 635 # Process anchor and image tags. Images must come first, 636 # because ![foo][f] looks like an anchor. 637 "doImages" => 10, 638 "doAnchors" => 20, 639 640 # Make links out of things like `<http://example.com/>` 641 # Must come after doAnchors, because you can use < and > 642 # delimiters in inline links like [this](<url>). 643 "doAutoLinks" => 30, 644 "encodeAmpsAndAngles" => 40, 645 646 "doItalicsAndBold" => 50, 647 "doHardBreaks" => 60, 648 ); 649 650 function runSpanGamut($text) { 651 # 652 # Run span gamut tranformations. 653 # 654 foreach ($this->span_gamut as $method => $priority) { 655 $text = $this->$method($text); 656 } 657 658 return $text; 659 } 660 661 662 function doHardBreaks($text) { 663 # Do hard breaks: 664 return preg_replace_callback('/ {2,}\n/', 665 array(&$this, '_doHardBreaks_callback'), $text); 666 } 667 function _doHardBreaks_callback($matches) { 668 return $this->hashPart("<br$this->empty_element_suffix\n"); 669 } 670 671 672 function doAnchors($text) { 673 # 674 # Turn Markdown link shortcuts into XHTML <a> tags. 675 # 676 if ($this->in_anchor) return $text; 677 $this->in_anchor = true; 678 679 # 680 # First, handle reference-style links: [link text] [id] 681 # 682 $text = preg_replace_callback('{ 683 ( # wrap whole match in $1 684 \[ 685 ('.$this->nested_brackets_re.') # link text = $2 686 \] 687 688 [ ]? # one optional space 689 (?:\n[ ]*)? # one optional newline followed by spaces 690 691 \[ 692 (.*?) # id = $3 693 \] 694 ) 695 }xs', 696 array(&$this, '_doAnchors_reference_callback'), $text); 697 698 # 699 # Next, inline-style links: [link text](url "optional title") 700 # 701 $text = preg_replace_callback('{ 702 ( # wrap whole match in $1 703 \[ 704 ('.$this->nested_brackets_re.') # link text = $2 705 \] 706 \( # literal paren 707 [ \n]* 708 (?: 709 <(.+?)> # href = $3 710 | 711 ('.$this->nested_url_parenthesis_re.') # href = $4 712 ) 713 [ \n]* 714 ( # $5 715 ([\'"]) # quote char = $6 716 (.*?) # Title = $7 717 \6 # matching quote 718 [ \n]* # ignore any spaces/tabs between closing quote and ) 719 )? # title is optional 720 \) 721 ) 722 }xs', 723 array(&$this, '_doAnchors_inline_callback'), $text); 724 725 # 726 # Last, handle reference-style shortcuts: [link text] 727 # These must come last in case you've also got [link text][1] 728 # or [link text](/foo) 729 # 730 $text = preg_replace_callback('{ 731 ( # wrap whole match in $1 732 \[ 733 ([^\[\]]+) # link text = $2; can\'t contain [ or ] 734 \] 735 ) 736 }xs', 737 array(&$this, '_doAnchors_reference_callback'), $text); 738 739 $this->in_anchor = false; 740 return $text; 741 } 742 function _doAnchors_reference_callback($matches) { 743 $whole_match = $matches[1]; 744 $link_text = $matches[2]; 745 $link_id =& $matches[3]; 746 747 if ($link_id == "") { 748 # for shortcut links like [this][] or [this]. 749 $link_id = $link_text; 750 } 751 752 # lower-case and turn embedded newlines into spaces 753 $link_id = strtolower($link_id); 754 $link_id = preg_replace('{[ ]?\n}', ' ', $link_id); 755 756 if (isset($this->urls[$link_id])) { 757 $url = $this->urls[$link_id]; 758 $url = $this->encodeAttribute($url); 759 760 $result = "<a href=\"$url\""; 761 if ( isset( $this->titles[$link_id] ) ) { 762 $title = $this->titles[$link_id]; 763 $title = $this->encodeAttribute($title); 764 $result .= " title=\"$title\""; 765 } 766 767 $link_text = $this->runSpanGamut($link_text); 768 $result .= ">$link_text</a>"; 769 $result = $this->hashPart($result); 770 } 771 else { 772 $result = $whole_match; 773 } 774 return $result; 775 } 776 function _doAnchors_inline_callback($matches) { 777 $whole_match = $matches[1]; 778 $link_text = $this->runSpanGamut($matches[2]); 779 $url = $matches[3] == '' ? $matches[4] : $matches[3]; 780 $title =& $matches[7]; 781 782 $url = $this->encodeAttribute($url); 783 784 $result = "<a href=\"$url\""; 785 if (isset($title)) { 786 $title = $this->encodeAttribute($title); 787 $result .= " title=\"$title\""; 788 } 789 790 $link_text = $this->runSpanGamut($link_text); 791 $result .= ">$link_text</a>"; 792 793 return $this->hashPart($result); 794 } 795 796 797 function doImages($text) { 798 # 799 # Turn Markdown image shortcuts into <img> tags. 800 # 801 # 802 # First, handle reference-style labeled images: ![alt text][id] 803 # 804 $text = preg_replace_callback('{ 805 ( # wrap whole match in $1 806 !\[ 807 ('.$this->nested_brackets_re.') # alt text = $2 808 \] 809 810 [ ]? # one optional space 811 (?:\n[ ]*)? # one optional newline followed by spaces 812 813 \[ 814 (.*?) # id = $3 815 \] 816 817 ) 818 }xs', 819 array(&$this, '_doImages_reference_callback'), $text); 820 821 # 822 # Next, handle inline images: ![alt text](url "optional title") 823 # Don't forget: encode * and _ 824 # 825 $text = preg_replace_callback('{ 826 ( # wrap whole match in $1 827 !\[ 828 ('.$this->nested_brackets_re.') # alt text = $2 829 \] 830 \s? # One optional whitespace character 831 \( # literal paren 832 [ \n]* 833 (?: 834 <(\S*)> # src url = $3 835 | 836 ('.$this->nested_url_parenthesis_re.') # src url = $4 837 ) 838 [ \n]* 839 ( # $5 840 ([\'"]) # quote char = $6 841 (.*?) # title = $7 842 \6 # matching quote 843 [ \n]* 844 )? # title is optional 845 \) 846 ) 847 }xs', 848 array(&$this, '_doImages_inline_callback'), $text); 849 850 return $text; 851 } 852 function _doImages_reference_callback($matches) { 853 $whole_match = $matches[1]; 854 $alt_text = $matches[2]; 855 $link_id = strtolower($matches[3]); 856 857 if ($link_id == "") { 858 $link_id = strtolower($alt_text); # for shortcut links like ![this][]. 859 } 860 861 $alt_text = $this->encodeAttribute($alt_text); 862 if (isset($this->urls[$link_id])) { 863 $url = $this->encodeAttribute($this->urls[$link_id]); 864 $result = "<img src=\"$url\" alt=\"$alt_text\""; 865 if (isset($this->titles[$link_id])) { 866 $title = $this->titles[$link_id]; 867 $title = $this->encodeAttribute($title); 868 $result .= " title=\"$title\""; 869 } 870 $result .= $this->empty_element_suffix; 871 $result = $this->hashPart($result); 872 } 873 else { 874 # If there's no such link ID, leave intact: 875 $result = $whole_match; 876 } 877 878 return $result; 879 } 880 function _doImages_inline_callback($matches) { 881 $whole_match = $matches[1]; 882 $alt_text = $matches[2]; 883 $url = $matches[3] == '' ? $matches[4] : $matches[3]; 884 $title =& $matches[7]; 885 886 $alt_text = $this->encodeAttribute($alt_text); 887 $url = $this->encodeAttribute($url); 888 $result = "<img src=\"$url\" alt=\"$alt_text\""; 889 if (isset($title)) { 890 $title = $this->encodeAttribute($title); 891 $result .= " title=\"$title\""; # $title already quoted 892 } 893 $result .= $this->empty_element_suffix; 894 895 return $this->hashPart($result); 896 } 897 898 899 function doHeaders($text) { 900 # Setext-style headers: 901 # Header 1 902 # ======== 903 # 904 # Header 2 905 # -------- 906 # 907 $text = preg_replace_callback('{ ^(.+?)[ ]*\n(=+|-+)[ ]*\n+ }mx', 908 array(&$this, '_doHeaders_callback_setext'), $text); 909 910 # atx-style headers: 911 # # Header 1 912 # ## Header 2 913 # ## Header 2 with closing hashes ## 914 # ... 915 # ###### Header 6 916 # 917 $text = preg_replace_callback('{ 918 ^(\#{1,6}) # $1 = string of #\'s 919 [ ]* 920 (.+?) # $2 = Header text 921 [ ]* 922 \#* # optional closing #\'s (not counted) 923 \n+ 924 }xm', 925 array(&$this, '_doHeaders_callback_atx'), $text); 926 927 return $text; 928 } 929 function _doHeaders_callback_setext($matches) { 930 # Terrible hack to check we haven't found an empty list item. 931 if ($matches[2] == '-' && preg_match('{^-(?: |$)}', $matches[1])) 932 return $matches[0]; 933 934 $level = $matches[2][0] == '=' ? 1 : 2; 935 $block = "<h$level>".$this->runSpanGamut($matches[1])."</h$level>"; 936 return "\n" . $this->hashBlock($block) . "\n\n"; 937 } 938 function _doHeaders_callback_atx($matches) { 939 $level = strlen($matches[1]); 940 $block = "<h$level>".$this->runSpanGamut($matches[2])."</h$level>"; 941 return "\n" . $this->hashBlock($block) . "\n\n"; 942 } 943 944 945 function doLists($text) { 946 # 947 # Form HTML ordered (numbered) and unordered (bulleted) lists. 948 # 949 $less_than_tab = $this->tab_width - 1; 950 951 # Re-usable patterns to match list item bullets and number markers: 952 $marker_ul_re = '[*+-]'; 953 $marker_ol_re = '\d+[\.]'; 954 $marker_any_re = "(?:$marker_ul_re|$marker_ol_re)"; 955 956 $markers_relist = array( 957 $marker_ul_re => $marker_ol_re, 958 $marker_ol_re => $marker_ul_re, 959 ); 960 961 foreach ($markers_relist as $marker_re => $other_marker_re) { 962 # Re-usable pattern to match any entirel ul or ol list: 963 $whole_list_re = ' 964 ( # $1 = whole list 965 ( # $2 966 ([ ]{0,'.$less_than_tab.'}) # $3 = number of spaces 967 ('.$marker_re.') # $4 = first list item marker 968 [ ]+ 969 ) 970 (?s:.+?) 971 ( # $5 972 \z 973 | 974 \n{2,} 975 (?=\S) 976 (?! # Negative lookahead for another list item marker 977 [ ]* 978 '.$marker_re.'[ ]+ 979 ) 980 | 981 (?= # Lookahead for another kind of list 982 \n 983 \3 # Must have the same indentation 984 '.$other_marker_re.'[ ]+ 985 ) 986 ) 987 ) 988 '; // mx 989 990 # We use a different prefix before nested lists than top-level lists. 991 # See extended comment in _ProcessListItems(). 992 993 if ($this->list_level) { 994 $text = preg_replace_callback('{ 995 ^ 996 '.$whole_list_re.' 997 }mx', 998 array(&$this, '_doLists_callback'), $text); 999 } 1000 else { 1001 $text = preg_replace_callback('{ 1002 (?:(?<=\n)\n|\A\n?) # Must eat the newline 1003 '.$whole_list_re.' 1004 }mx', 1005 array(&$this, '_doLists_callback'), $text); 1006 } 1007 } 1008 1009 return $text; 1010 } 1011 function _doLists_callback($matches) { 1012 # Re-usable patterns to match list item bullets and number markers: 1013 $marker_ul_re = '[*+-]'; 1014 $marker_ol_re = '\d+[\.]'; 1015 $marker_any_re = "(?:$marker_ul_re|$marker_ol_re)"; 1016 1017 $list = $matches[1]; 1018 $list_type = preg_match("/$marker_ul_re/", $matches[4]) ? "ul" : "ol"; 1019 1020 $marker_any_re = ( $list_type == "ul" ? $marker_ul_re : $marker_ol_re ); 1021 1022 $list .= "\n"; 1023 $result = $this->processListItems($list, $marker_any_re); 1024 1025 $result = $this->hashBlock("<$list_type>\n" . $result . "</$list_type>"); 1026 return "\n". $result ."\n\n"; 1027 } 1028 1029 var $list_level = 0; 1030 1031 function processListItems($list_str, $marker_any_re) { 1032 # 1033 # Process the contents of a single ordered or unordered list, splitting it 1034 # into individual list items. 1035 # 1036 # The $this->list_level global keeps track of when we're inside a list. 1037 # Each time we enter a list, we increment it; when we leave a list, 1038 # we decrement. If it's zero, we're not in a list anymore. 1039 # 1040 # We do this because when we're not inside a list, we want to treat 1041 # something like this: 1042 # 1043 # I recommend upgrading to version 1044 # 8. Oops, now this line is treated 1045 # as a sub-list. 1046 # 1047 # As a single paragraph, despite the fact that the second line starts 1048 # with a digit-period-space sequence. 1049 # 1050 # Whereas when we're inside a list (or sub-list), that line will be 1051 # treated as the start of a sub-list. What a kludge, huh? This is 1052 # an aspect of Markdown's syntax that's hard to parse perfectly 1053 # without resorting to mind-reading. Perhaps the solution is to 1054 # change the syntax rules such that sub-lists must start with a 1055 # starting cardinal number; e.g. "1." or "a.". 1056 1057 $this->list_level++; 1058 1059 # trim trailing blank lines: 1060 $list_str = preg_replace("/\n{2,}\\z/", "\n", $list_str); 1061 1062 $list_str = preg_replace_callback('{ 1063 (\n)? # leading line = $1 1064 (^[ ]*) # leading whitespace = $2 1065 ('.$marker_any_re.' # list marker and space = $3 1066 (?:[ ]+|(?=\n)) # space only required if item is not empty 1067 ) 1068 ((?s:.*?)) # list item text = $4 1069 (?:(\n+(?=\n))|\n) # tailing blank line = $5 1070 (?= \n* (\z | \2 ('.$marker_any_re.') (?:[ ]+|(?=\n)))) 1071 }xm', 1072 array(&$this, '_processListItems_callback'), $list_str); 1073 1074 $this->list_level--; 1075 return $list_str; 1076 } 1077 function _processListItems_callback($matches) { 1078 $item = $matches[4]; 1079 $leading_line =& $matches[1]; 1080 $leading_space =& $matches[2]; 1081 $marker_space = $matches[3]; 1082 $tailing_blank_line =& $matches[5]; 1083 1084 if ($leading_line || $tailing_blank_line || 1085 preg_match('/\n{2,}/', $item)) 1086 { 1087 # Replace marker with the appropriate whitespace indentation 1088 $item = $leading_space . str_repeat(' ', strlen($marker_space)) . $item; 1089 $item = $this->runBlockGamut($this->outdent($item)."\n"); 1090 } 1091 else { 1092 # Recursion for sub-lists: 1093 $item = $this->doLists($this->outdent($item)); 1094 $item = preg_replace('/\n+$/', '', $item); 1095 $item = $this->runSpanGamut($item); 1096 } 1097 1098 return "<li>" . $item . "</li>\n"; 1099 } 1100 1101 1102 function doCodeBlocks($text) { 1103 # 1104 # Process Markdown `<pre><code>` blocks. 1105 # 1106 $text = preg_replace_callback('{ 1107 (?:\n\n|\A\n?) 1108 ( # $1 = the code block -- one or more lines, starting with a space/tab 1109 (?> 1110 [ ]{'.$this->tab_width.'} # Lines must start with a tab or a tab-width of spaces 1111 .*\n+ 1112 )+ 1113 ) 1114 ((?=^[ ]{0,'.$this->tab_width.'}\S)|\Z) # Lookahead for non-space at line-start, or end of doc 1115 }xm', 1116 array(&$this, '_doCodeBlocks_callback'), $text); 1117 1118 return $text; 1119 } 1120 function _doCodeBlocks_callback($matches) { 1121 $codeblock = $matches[1]; 1122 1123 $codeblock = $this->outdent($codeblock); 1124 $codeblock = htmlspecialchars($codeblock, ENT_NOQUOTES); 1125 1126 # trim leading newlines and trailing newlines 1127 $codeblock = preg_replace('/\A\n+|\n+\z/', '', $codeblock); 1128 1129 $codeblock = "<pre><code>$codeblock\n</code></pre>"; 1130 return "\n\n".$this->hashBlock($codeblock)."\n\n"; 1131 } 1132 1133 1134 function makeCodeSpan($code) { 1135 # 1136 # Create a code span markup for $code. Called from handleSpanToken. 1137 # 1138 $code = htmlspecialchars(trim($code), ENT_NOQUOTES); 1139 return $this->hashPart("<code>$code</code>"); 1140 } 1141 1142 1143 var $em_relist = array( 1144 '' => '(?:(?<!\*)\*(?!\*)|(?<!_)_(?!_))(?=\S|$)(?![\.,:;]\s)', 1145 '*' => '(?<=\S|^)(?<!\*)\*(?!\*)', 1146 '_' => '(?<=\S|^)(?<!_)_(?!_)', 1147 ); 1148 var $strong_relist = array( 1149 '' => '(?:(?<!\*)\*\*(?!\*)|(?<!_)__(?!_))(?=\S|$)(?![\.,:;]\s)', 1150 '**' => '(?<=\S|^)(?<!\*)\*\*(?!\*)', 1151 '__' => '(?<=\S|^)(?<!_)__(?!_)', 1152 ); 1153 var $em_strong_relist = array( 1154 '' => '(?:(?<!\*)\*\*\*(?!\*)|(?<!_)___(?!_))(?=\S|$)(?![\.,:;]\s)', 1155 '***' => '(?<=\S|^)(?<!\*)\*\*\*(?!\*)', 1156 '___' => '(?<=\S|^)(?<!_)___(?!_)', 1157 ); 1158 var $em_strong_prepared_relist; 1159 1160 function prepareItalicsAndBold() { 1161 # 1162 # Prepare regular expressions for searching emphasis tokens in any 1163 # context. 1164 # 1165 foreach ($this->em_relist as $em => $em_re) { 1166 foreach ($this->strong_relist as $strong => $strong_re) { 1167 # Construct list of allowed token expressions. 1168 $token_relist = array(); 1169 if (isset($this->em_strong_relist["$em$strong"])) { 1170 $token_relist[] = $this->em_strong_relist["$em$strong"]; 1171 } 1172 $token_relist[] = $em_re; 1173 $token_relist[] = $strong_re; 1174 1175 # Construct master expression from list. 1176 $token_re = '{('. implode('|', $token_relist) .')}'; 1177 $this->em_strong_prepared_relist["$em$strong"] = $token_re; 1178 } 1179 } 1180 } 1181 1182 function doItalicsAndBold($text) { 1183 $token_stack = array(''); 1184 $text_stack = array(''); 1185 $em = ''; 1186 $strong = ''; 1187 $tree_char_em = false; 1188 1189 while (1) { 1190 # 1191 # Get prepared regular expression for seraching emphasis tokens 1192 # in current context. 1193 # 1194 $token_re = $this->em_strong_prepared_relist["$em$strong"]; 1195 1196 # 1197 # Each loop iteration search for the next emphasis token. 1198 # Each token is then passed to handleSpanToken. 1199 # 1200 $parts = preg_split($token_re, $text, 2, PREG_SPLIT_DELIM_CAPTURE); 1201 $text_stack[0] .= $parts[0]; 1202 $token =& $parts[1]; 1203 $text =& $parts[2]; 1204 1205 if (empty($token)) { 1206 # Reached end of text span: empty stack without emitting. 1207 # any more emphasis. 1208 while ($token_stack[0]) { 1209 $text_stack[1] .= array_shift($token_stack); 1210 $text_stack[0] .= array_shift($text_stack); 1211 } 1212 break; 1213 } 1214 1215 $token_len = strlen($token); 1216 if ($tree_char_em) { 1217 # Reached closing marker while inside a three-char emphasis. 1218 if ($token_len == 3) { 1219 # Three-char closing marker, close em and strong. 1220 array_shift($token_stack); 1221 $span = array_shift($text_stack); 1222 $span = $this->runSpanGamut($span); 1223 $span = "<strong><em>$span</em></strong>"; 1224 $text_stack[0] .= $this->hashPart($span); 1225 $em = ''; 1226 $strong = ''; 1227 } else { 1228 # Other closing marker: close one em or strong and 1229 # change current token state to match the other 1230 $token_stack[0] = str_repeat($token[0], 3-$token_len); 1231 $tag = $token_len == 2 ? "strong" : "em"; 1232 $span = $text_stack[0]; 1233 $span = $this->runSpanGamut($span); 1234 $span = "<$tag>$span</$tag>"; 1235 $text_stack[0] = $this->hashPart($span); 1236 $$tag = ''; # $$tag stands for $em or $strong 1237 } 1238 $tree_char_em = false; 1239 } else if ($token_len == 3) { 1240 if ($em) { 1241 # Reached closing marker for both em and strong. 1242 # Closing strong marker: 1243 for ($i = 0; $i < 2; ++$i) { 1244 $shifted_token = array_shift($token_stack); 1245 $tag = strlen($shifted_token) == 2 ? "strong" : "em"; 1246 $span = array_shift($text_stack); 1247 $span = $this->runSpanGamut($span); 1248 $span = "<$tag>$span</$tag>"; 1249 $text_stack[0] .= $this->hashPart($span); 1250 $$tag = ''; # $$tag stands for $em or $strong 1251 } 1252 } else { 1253 # Reached opening three-char emphasis marker. Push on token 1254 # stack; will be handled by the special condition above. 1255 $em = $token[0]; 1256 $strong = "$em$em"; 1257 array_unshift($token_stack, $token); 1258 array_unshift($text_stack, ''); 1259 $tree_char_em = true; 1260 } 1261 } else if ($token_len == 2) { 1262 if ($strong) { 1263 # Unwind any dangling emphasis marker: 1264 if (strlen($token_stack[0]) == 1) { 1265 $text_stack[1] .= array_shift($token_stack); 1266 $text_stack[0] .= array_shift($text_stack); 1267 } 1268 # Closing strong marker: 1269 array_shift($token_stack); 1270 $span = array_shift($text_stack); 1271 $span = $this->runSpanGamut($span); 1272 $span = "<strong>$span</strong>"; 1273 $text_stack[0] .= $this->hashPart($span); 1274 $strong = ''; 1275 } else { 1276 array_unshift($token_stack, $token); 1277 array_unshift($text_stack, ''); 1278 $strong = $token; 1279 } 1280 } else { 1281 # Here $token_len == 1 1282 if ($em) { 1283 if (strlen($token_stack[0]) == 1) { 1284 # Closing emphasis marker: 1285 array_shift($token_stack); 1286 $span = array_shift($text_stack); 1287 $span = $this->runSpanGamut($span); 1288 $span = "<em>$span</em>"; 1289 $text_stack[0] .= $this->hashPart($span); 1290 $em = ''; 1291 } else { 1292 $text_stack[0] .= $token; 1293 } 1294 } else { 1295 array_unshift($token_stack, $token); 1296 array_unshift($text_stack, ''); 1297 $em = $token; 1298 } 1299 } 1300 } 1301 return $text_stack[0]; 1302 } 1303 1304 1305 function doBlockQuotes($text) { 1306 $text = preg_replace_callback('/ 1307 ( # Wrap whole match in $1 1308 (?> 1309 ^[ ]*>[ ]? # ">" at the start of a line 1310 .+\n # rest of the first line 1311 (.+\n)* # subsequent consecutive lines 1312 \n* # blanks 1313 )+ 1314 ) 1315 /xm', 1316 array(&$this, '_doBlockQuotes_callback'), $text); 1317 1318 return $text; 1319 } 1320 function _doBlockQuotes_callback($matches) { 1321 $bq = $matches[1]; 1322 # trim one level of quoting - trim whitespace-only lines 1323 $bq = preg_replace('/^[ ]*>[ ]?|^[ ]+$/m', '', $bq); 1324 $bq = $this->runBlockGamut($bq); # recurse 1325 1326 $bq = preg_replace('/^/m', " ", $bq); 1327 # These leading spaces cause problem with <pre> content, 1328 # so we need to fix that: 1329 $bq = preg_replace_callback('{(\s*<pre>.+?</pre>)}sx', 1330 array(&$this, '_doBlockQuotes_callback2'), $bq); 1331 1332 return "\n". $this->hashBlock("<blockquote>\n$bq\n</blockquote>")."\n\n"; 1333 } 1334 function _doBlockQuotes_callback2($matches) { 1335 $pre = $matches[1]; 1336 $pre = preg_replace('/^ /m', '', $pre); 1337 return $pre; 1338 } 1339 1340 1341 function formParagraphs($text) { 1342 # 1343 # Params: 1344 # $text - string to process with html <p> tags 1345 # 1346 # Strip leading and trailing lines: 1347 $text = preg_replace('/\A\n+|\n+\z/', '', $text); 1348 1349 $grafs = preg_split('/\n{2,}/', $text, -1, PREG_SPLIT_NO_EMPTY); 1350 1351 # 1352 # Wrap <p> tags and unhashify HTML blocks 1353 # 1354 foreach ($grafs as $key => $value) { 1355 if (!preg_match('/^B\x1A[0-9]+B$/', $value)) { 1356 # Is a paragraph. 1357 $value = $this->runSpanGamut($value); 1358 $value = preg_replace('/^([ ]*)/', "<p>", $value); 1359 $value .= "</p>"; 1360 $grafs[$key] = $this->unhash($value); 1361 } 1362 else { 1363 # Is a block. 1364 # Modify elements of @grafs in-place... 1365 $graf = $value; 1366 $block = $this->html_hashes[$graf]; 1367 $graf = $block; 1368// if (preg_match('{ 1369// \A 1370// ( # $1 = <div> tag 1371// <div \s+ 1372// [^>]* 1373// \b 1374// markdown\s*=\s* ([\'"]) # $2 = attr quote char 1375// 1 1376// \2 1377// [^>]* 1378// > 1379// ) 1380// ( # $3 = contents 1381// .* 1382// ) 1383// (</div>) # $4 = closing tag 1384// \z 1385// }xs', $block, $matches)) 1386// { 1387// list(, $div_open, , $div_content, $div_close) = $matches; 1388// 1389// # We can't call Markdown(), because that resets the hash; 1390// # that initialization code should be pulled into its own sub, though. 1391// $div_content = $this->hashHTMLBlocks($div_content); 1392// 1393// # Run document gamut methods on the content. 1394// foreach ($this->document_gamut as $method => $priority) { 1395// $div_content = $this->$method($div_content); 1396// } 1397// 1398// $div_open = preg_replace( 1399// '{\smarkdown\s*=\s*([\'"]).+?\1}', '', $div_open); 1400// 1401// $graf = $div_open . "\n" . $div_content . "\n" . $div_close; 1402// } 1403 $grafs[$key] = $graf; 1404 } 1405 } 1406 1407 return implode("\n\n", $grafs); 1408 } 1409 1410 1411 function encodeAttribute($text) { 1412 # 1413 # Encode text for a double-quoted HTML attribute. This function 1414 # is *not* suitable for attributes enclosed in single quotes. 1415 # 1416 $text = $this->encodeAmpsAndAngles($text); 1417 $text = str_replace('"', '"', $text); 1418 return $text; 1419 } 1420 1421 1422 function encodeAmpsAndAngles($text) { 1423 # 1424 # Smart processing for ampersands and angle brackets that need to 1425 # be encoded. Valid character entities are left alone unless the 1426 # no-entities mode is set. 1427 # 1428 if ($this->no_entities) { 1429 $text = str_replace('&', '&', $text); 1430 } else { 1431 # Ampersand-encoding based entirely on Nat Irons's Amputator 1432 # MT plugin: <http://bumppo.net/projects/amputator/> 1433 $text = preg_replace('/&(?!#?[xX]?(?:[0-9a-fA-F]+|\w+);)/', 1434 '&', $text);; 1435 } 1436 # Encode remaining <'s 1437 $text = str_replace('<', '<', $text); 1438 1439 return $text; 1440 } 1441 1442 1443 function doAutoLinks($text) { 1444 $text = preg_replace_callback('{<((https?|ftp|dict):[^\'">\s]+)>}i', 1445 array(&$this, '_doAutoLinks_url_callback'), $text); 1446 1447 # Email addresses: <address@domain.foo> 1448 $text = preg_replace_callback('{ 1449 < 1450 (?:mailto:)? 1451 ( 1452 (?: 1453 [-!#$%&\'*+/=?^_`.{|}~\w\x80-\xFF]+ 1454 | 1455 ".*?" 1456 ) 1457 \@ 1458 (?: 1459 [-a-z0-9\x80-\xFF]+(\.[-a-z0-9\x80-\xFF]+)*\.[a-z]+ 1460 | 1461 \[[\d.a-fA-F:]+\] # IPv4 & IPv6 1462 ) 1463 ) 1464 > 1465 }xi', 1466 array(&$this, '_doAutoLinks_email_callback'), $text); 1467 1468 return $text; 1469 } 1470 function _doAutoLinks_url_callback($matches) { 1471 $url = $this->encodeAttribute($matches[1]); 1472 $link = "<a href=\"$url\">$url</a>"; 1473 return $this->hashPart($link); 1474 } 1475 function _doAutoLinks_email_callback($matches) { 1476 $address = $matches[1]; 1477 $link = $this->encodeEmailAddress($address); 1478 return $this->hashPart($link); 1479 } 1480 1481 1482 function encodeEmailAddress($addr) { 1483 # 1484 # Input: an email address, e.g. "foo@example.com" 1485 # 1486 # Output: the email address as a mailto link, with each character 1487 # of the address encoded as either a decimal or hex entity, in 1488 # the hopes of foiling most address harvesting spam bots. E.g.: 1489 # 1490 # <p><a href="mailto:foo 1491 # @example.co 1492 # m">foo@exampl 1493 # e.com</a></p> 1494 # 1495 # Based by a filter by Matthew Wickline, posted to BBEdit-Talk. 1496 # With some optimizations by Milian Wolff. 1497 # 1498 $addr = "mailto:" . $addr; 1499 $chars = preg_split('/(?<!^)(?!$)/', $addr); 1500 $seed = (int)abs(crc32($addr) / strlen($addr)); # Deterministic seed. 1501 1502 foreach ($chars as $key => $char) { 1503 $ord = ord($char); 1504 # Ignore non-ascii chars. 1505 if ($ord < 128) { 1506 $r = ($seed * (1 + $key)) % 100; # Pseudo-random function. 1507 # roughly 10% raw, 45% hex, 45% dec 1508 # '@' *must* be encoded. I insist. 1509 if ($r > 90 && $char != '@') /* do nothing */; 1510 else if ($r < 45) $chars[$key] = '&#x'.dechex($ord).';'; 1511 else $chars[$key] = '&#'.$ord.';'; 1512 } 1513 } 1514 1515 $addr = implode('', $chars); 1516 $text = implode('', array_slice($chars, 7)); # text without `mailto:` 1517 $addr = "<a href=\"$addr\">$text</a>"; 1518 1519 return $addr; 1520 } 1521 1522 1523 function parseSpan($str) { 1524 # 1525 # Take the string $str and parse it into tokens, hashing embeded HTML, 1526 # escaped characters and handling code spans. 1527 # 1528 $output = ''; 1529 1530 $span_re = '{ 1531 ( 1532 \\\\'.$this->escape_chars_re.' 1533 | 1534 (?<![`\\\\]) 1535 `+ # code span marker 1536 '.( $this->no_markup ? '' : ' 1537 | 1538 <!-- .*? --> # comment 1539 | 1540 <\?.*?\?> | <%.*?%> # processing instruction 1541 | 1542 <[/!$]?[-a-zA-Z0-9:_]+ # regular tags 1543 (?> 1544 \s 1545 (?>[^"\'>]+|"[^"]*"|\'[^\']*\')* 1546 )? 1547 > 1548 ').' 1549 ) 1550 }xs'; 1551 1552 while (1) { 1553 # 1554 # Each loop iteration seach for either the next tag, the next 1555 # openning code span marker, or the next escaped character. 1556 # Each token is then passed to handleSpanToken. 1557 # 1558 $parts = preg_split($span_re, $str, 2, PREG_SPLIT_DELIM_CAPTURE); 1559 1560 # Create token from text preceding tag. 1561 if ($parts[0] != "") { 1562 $output .= $parts[0]; 1563 } 1564 1565 # Check if we reach the end. 1566 if (isset($parts[1])) { 1567 $output .= $this->handleSpanToken($parts[1], $parts[2]); 1568 $str = $parts[2]; 1569 } 1570 else { 1571 break; 1572 } 1573 } 1574 1575 return $output; 1576 } 1577 1578 1579 function handleSpanToken($token, &$str) { 1580 # 1581 # Handle $token provided by parseSpan by determining its nature and 1582 # returning the corresponding value that should replace it. 1583 # 1584 switch ($token[0]) { 1585 case "\\": 1586 return $this->hashPart("&#". ord($token[1]). ";"); 1587 case "`": 1588 # Search for end marker in remaining text. 1589 if (preg_match('/^(.*?[^`])'.preg_quote($token).'(?!`)(.*)$/sm', 1590 $str, $matches)) 1591 { 1592 $str = $matches[2]; 1593 $codespan = $this->makeCodeSpan($matches[1]); 1594 return $this->hashPart($codespan); 1595 } 1596 return $token; // return as text since no ending marker found. 1597 default: 1598 return $this->hashPart($token); 1599 } 1600 } 1601 1602 1603 function outdent($text) { 1604 # 1605 # Remove one level of line-leading tabs or spaces 1606 # 1607 return preg_replace('/^(\t|[ ]{1,'.$this->tab_width.'})/m', '', $text); 1608 } 1609 1610 1611 # String length function for detab. `_initDetab` will create a function to 1612 # hanlde UTF-8 if the default function does not exist. 1613 var $utf8_strlen = 'mb_strlen'; 1614 1615 function detab($text) { 1616 # 1617 # Replace tabs with the appropriate amount of space. 1618 # 1619 # For each line we separate the line in blocks delemited by 1620 # tab characters. Then we reconstruct every line by adding the 1621 # appropriate number of space between each blocks. 1622 1623 $text = preg_replace_callback('/^.*\t.*$/m', 1624 array(&$this, '_detab_callback'), $text); 1625 1626 return $text; 1627 } 1628 function _detab_callback($matches) { 1629 $line = $matches[0]; 1630 $strlen = $this->utf8_strlen; # strlen function for UTF-8. 1631 1632 # Split in blocks. 1633 $blocks = explode("\t", $line); 1634 # Add each blocks to the line. 1635 $line = $blocks[0]; 1636 unset($blocks[0]); # Do not add first block twice. 1637 foreach ($blocks as $block) { 1638 # Calculate amount of space, insert spaces, insert block. 1639 $amount = $this->tab_width - 1640 $strlen($line, 'UTF-8') % $this->tab_width; 1641 $line .= str_repeat(" ", $amount) . $block; 1642 } 1643 return $line; 1644 } 1645 function _initDetab() { 1646 # 1647 # Check for the availability of the function in the `utf8_strlen` property 1648 # (initially `mb_strlen`). If the function is not available, create a 1649 # function that will loosely count the number of UTF-8 characters with a 1650 # regular expression. 1651 # 1652 if (function_exists($this->utf8_strlen)) return; 1653 $this->utf8_strlen = create_function('$text', 'return preg_match_all( 1654 "/[\\\\x00-\\\\xBF]|[\\\\xC0-\\\\xFF][\\\\x80-\\\\xBF]*/", 1655 $text, $m);'); 1656 } 1657 1658 1659 function unhash($text) { 1660 # 1661 # Swap back in all the tags hashed by _HashHTMLBlocks. 1662 # 1663 return preg_replace_callback('/(.)\x1A[0-9]+\1/', 1664 array(&$this, '_unhash_callback'), $text); 1665 } 1666 function _unhash_callback($matches) { 1667 return $this->html_hashes[$matches[0]]; 1668 } 1669 1670} 1671 1672 1673# 1674# Markdown Extra Parser Class 1675# 1676 1677class MarkdownExtra_Parser extends Markdown_Parser { 1678 1679 # Prefix for footnote ids. 1680 var $fn_id_prefix = ""; 1681 1682 # Optional title attribute for footnote links and backlinks. 1683 var $fn_link_title = MARKDOWN_FN_LINK_TITLE; 1684 var $fn_backlink_title = MARKDOWN_FN_BACKLINK_TITLE; 1685 1686 # Optional class attribute for footnote links and backlinks. 1687 var $fn_link_class = MARKDOWN_FN_LINK_CLASS; 1688 var $fn_backlink_class = MARKDOWN_FN_BACKLINK_CLASS; 1689 1690 # Predefined abbreviations. 1691 var $predef_abbr = array(); 1692 1693 1694 function __construct() { 1695 # 1696 # Constructor function. Initialize the parser object. 1697 # 1698 # Add extra escapable characters before parent constructor 1699 # initialize the table. 1700 $this->escape_chars .= ':|'; 1701 1702 # Insert extra document, block, and span transformations. 1703 # Parent constructor will do the sorting. 1704 $this->document_gamut += array( 1705 "doFencedCodeBlocks" => 5, 1706 "stripFootnotes" => 15, 1707 "stripAbbreviations" => 25, 1708 "appendFootnotes" => 50, 1709 ); 1710 $this->block_gamut += array( 1711 "doFencedCodeBlocks" => 5, 1712 "doTables" => 15, 1713 "doDefLists" => 45, 1714 ); 1715 $this->span_gamut += array( 1716 "doFootnotes" => 5, 1717 "doAbbreviations" => 70, 1718 ); 1719 1720 parent::Markdown_Parser(); 1721 } 1722 1723 1724 # Extra variables used during extra transformations. 1725 var $footnotes = array(); 1726 var $footnotes_ordered = array(); 1727 var $abbr_desciptions = array(); 1728 var $abbr_word_re = ''; 1729 1730 # Give the current footnote number. 1731 var $footnote_counter = 1; 1732 1733 1734 function setup() { 1735 # 1736 # Setting up Extra-specific variables. 1737 # 1738 parent::setup(); 1739 1740 $this->footnotes = array(); 1741 $this->footnotes_ordered = array(); 1742 $this->abbr_desciptions = array(); 1743 $this->abbr_word_re = ''; 1744 $this->footnote_counter = 1; 1745 1746 foreach ($this->predef_abbr as $abbr_word => $abbr_desc) { 1747 if ($this->abbr_word_re) 1748 $this->abbr_word_re .= '|'; 1749 $this->abbr_word_re .= preg_quote($abbr_word); 1750 $this->abbr_desciptions[$abbr_word] = trim($abbr_desc); 1751 } 1752 } 1753 1754 function teardown() { 1755 # 1756 # Clearing Extra-specific variables. 1757 # 1758 $this->footnotes = array(); 1759 $this->footnotes_ordered = array(); 1760 $this->abbr_desciptions = array(); 1761 $this->abbr_word_re = ''; 1762 1763 parent::teardown(); 1764 } 1765 1766 1767 ### HTML Block Parser ### 1768 1769 # Tags that are always treated as block tags: 1770 var $block_tags_re = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|address|form|fieldset|iframe|hr|legend'; 1771 1772 # Tags treated as block tags only if the opening tag is alone on it's line: 1773 var $context_block_tags_re = 'script|noscript|math|ins|del'; 1774 1775 # Tags where markdown="1" default to span mode: 1776 var $contain_span_tags_re = 'p|h[1-6]|li|dd|dt|td|th|legend|address'; 1777 1778 # Tags which must not have their contents modified, no matter where 1779 # they appear: 1780 var $clean_tags_re = 'script|math'; 1781 1782 # Tags that do not need to be closed. 1783 var $auto_close_tags_re = 'hr|img'; 1784 1785 1786 function hashHTMLBlocks($text) { 1787 # 1788 # Hashify HTML Blocks and "clean tags". 1789 # 1790 # We only want to do this for block-level HTML tags, such as headers, 1791 # lists, and tables. That's because we still want to wrap <p>s around 1792 # "paragraphs" that are wrapped in non-block-level tags, such as anchors, 1793 # phrase emphasis, and spans. The list of tags we're looking for is 1794 # hard-coded. 1795 # 1796 # This works by calling _HashHTMLBlocks_InMarkdown, which then calls 1797 # _HashHTMLBlocks_InHTML when it encounter block tags. When the markdown="1" 1798 # attribute is found whitin a tag, _HashHTMLBlocks_InHTML calls back 1799 # _HashHTMLBlocks_InMarkdown to handle the Markdown syntax within the tag. 1800 # These two functions are calling each other. It's recursive! 1801 # 1802 # 1803 # Call the HTML-in-Markdown hasher. 1804 # 1805 list($text, ) = $this->_hashHTMLBlocks_inMarkdown($text); 1806 1807 return $text; 1808 } 1809 function _hashHTMLBlocks_inMarkdown($text, $indent = 0, 1810 $enclosing_tag_re = '', $span = false) 1811 { 1812 # 1813 # Parse markdown text, calling _HashHTMLBlocks_InHTML for block tags. 1814 # 1815 # * $indent is the number of space to be ignored when checking for code 1816 # blocks. This is important because if we don't take the indent into 1817 # account, something like this (which looks right) won't work as expected: 1818 # 1819 # <div> 1820 # <div markdown="1"> 1821 # Hello World. <-- Is this a Markdown code block or text? 1822 # </div> <-- Is this a Markdown code block or a real tag? 1823 # <div> 1824 # 1825 # If you don't like this, just don't indent the tag on which 1826 # you apply the markdown="1" attribute. 1827 # 1828 # * If $enclosing_tag_re is not empty, stops at the first unmatched closing 1829 # tag with that name. Nested tags supported. 1830 # 1831 # * If $span is true, text inside must treated as span. So any double 1832 # newline will be replaced by a single newline so that it does not create 1833 # paragraphs. 1834 # 1835 # Returns an array of that form: ( processed text , remaining text ) 1836 # 1837 if ($text === '') return array('', ''); 1838 1839 # Regex to check for the presense of newlines around a block tag. 1840 $newline_before_re = '/(?:^\n?|\n\n)*$/'; 1841 $newline_after_re = 1842 '{ 1843 ^ # Start of text following the tag. 1844 (?>[ ]*<!--.*?-->)? # Optional comment. 1845 [ ]*\n # Must be followed by newline. 1846 }xs'; 1847 1848 # Regex to match any tag. 1849 $block_tag_re = 1850 '{ 1851 ( # $2: Capture hole tag. 1852 </? # Any opening or closing tag. 1853 (?> # Tag name. 1854 '.$this->block_tags_re.' | 1855 '.$this->context_block_tags_re.' | 1856 '.$this->clean_tags_re.' | 1857 (?!\s)'.$enclosing_tag_re.' 1858 ) 1859 (?: 1860 (?=[\s"\'/a-zA-Z0-9]) # Allowed characters after tag name. 1861 (?> 1862 ".*?" | # Double quotes (can contain `>`) 1863 \'.*?\' | # Single quotes (can contain `>`) 1864 .+? # Anything but quotes and `>`. 1865 )*? 1866 )? 1867 > # End of tag. 1868 | 1869 <!-- .*? --> # HTML Comment 1870 | 1871 <\?.*?\?> | <%.*?%> # Processing instruction 1872 | 1873 <!\[CDATA\[.*?\]\]> # CData Block 1874 | 1875 # Code span marker 1876 `+ 1877 '. ( !$span ? ' # If not in span. 1878 | 1879 # Indented code block 1880 (?: ^[ ]*\n | ^ | \n[ ]*\n ) 1881 [ ]{'.($indent+4).'}[^\n]* \n 1882 (?> 1883 (?: [ ]{'.($indent+4).'}[^\n]* | [ ]* ) \n 1884 )* 1885 | 1886 # Fenced code block marker 1887 (?> ^ | \n ) 1888 [ ]{0,'.($indent).'}~~~+[ ]*\n 1889 ' : '' ). ' # End (if not is span). 1890 ) 1891 }xs'; 1892 1893 1894 $depth = 0; # Current depth inside the tag tree. 1895 $parsed = ""; # Parsed text that will be returned. 1896 1897 # 1898 # Loop through every tag until we find the closing tag of the parent 1899 # or loop until reaching the end of text if no parent tag specified. 1900 # 1901 do { 1902 # 1903 # Split the text using the first $tag_match pattern found. 1904 # Text before pattern will be first in the array, text after 1905 # pattern will be at the end, and between will be any catches made 1906 # by the pattern. 1907 # 1908 $parts = preg_split($block_tag_re, $text, 2, 1909 PREG_SPLIT_DELIM_CAPTURE); 1910 1911 # If in Markdown span mode, add a empty-string span-level hash 1912 # after each newline to prevent triggering any block element. 1913 if ($span) { 1914 $void = $this->hashPart("", ':'); 1915 $newline = "$void\n"; 1916 $parts[0] = $void . str_replace("\n", $newline, $parts[0]) . $void; 1917 } 1918 1919 $parsed .= $parts[0]; # Text before current tag. 1920 1921 # If end of $text has been reached. Stop loop. 1922 if (count($parts) < 3) { 1923 $text = ""; 1924 break; 1925 } 1926 1927 $tag = $parts[1]; # Tag to handle. 1928 $text = $parts[2]; # Remaining text after current tag. 1929 $tag_re = preg_quote($tag); # For use in a regular expression. 1930 1931 # 1932 # Check for: Code span marker 1933 # 1934 if ($tag[0] == "`") { 1935 # Find corresponding end marker. 1936 $tag_re = preg_quote($tag); 1937 if (preg_match('{^(?>.+?|\n(?!\n))*?(?<!`)'.$tag_re.'(?!`)}', 1938 $text, $matches)) 1939 { 1940 # End marker found: pass text unchanged until marker. 1941 $parsed .= $tag . $matches[0]; 1942 $text = substr($text, strlen($matches[0])); 1943 } 1944 else { 1945 # Unmatched marker: just skip it. 1946 $parsed .= $tag; 1947 } 1948 } 1949 # 1950 # Check for: Fenced code block marker. 1951 # 1952 else if (preg_match('{^\n?[ ]{0,'.($indent+3).'}~}', $tag)) { 1953 # Fenced code block marker: find matching end marker. 1954 $tag_re = preg_quote(trim($tag)); 1955 if (preg_match('{^(?>.*\n)+?[ ]{0,'.($indent).'}'.$tag_re.'[ ]*\n}', $text, 1956 $matches)) 1957 { 1958 # End marker found: pass text unchanged until marker. 1959 $parsed .= $tag . $matches[0]; 1960 $text = substr($text, strlen($matches[0])); 1961 } 1962 else { 1963 # No end marker: just skip it. 1964 $parsed .= $tag; 1965 } 1966 } 1967 # 1968 # Check for: Indented code block. 1969 # 1970 else if ($tag[0] == "\n" || $tag[0] == " ") { 1971 # Indented code block: pass it unchanged, will be handled 1972 # later. 1973 $parsed .= $tag; 1974 } 1975 # 1976 # Check for: Opening Block level tag or 1977 # Opening Context Block tag (like ins and del) 1978 # used as a block tag (tag is alone on it's line). 1979 # 1980 else if (preg_match('{^<(?:'.$this->block_tags_re.')\b}', $tag) || 1981 ( preg_match('{^<(?:'.$this->context_block_tags_re.')\b}', $tag) && 1982 preg_match($newline_before_re, $parsed) && 1983 preg_match($newline_after_re, $text) ) 1984 ) 1985 { 1986 # Need to parse tag and following text using the HTML parser. 1987 list($block_text, $text) = 1988 $this->_hashHTMLBlocks_inHTML($tag . $text, "hashBlock", true); 1989 1990 # Make sure it stays outside of any paragraph by adding newlines. 1991 $parsed .= "\n\n$block_text\n\n"; 1992 } 1993 # 1994 # Check for: Clean tag (like script, math) 1995 # HTML Comments, processing instructions. 1996 # 1997 else if (preg_match('{^<(?:'.$this->clean_tags_re.')\b}', $tag) || 1998 $tag[1] == '!' || $tag[1] == '?') 1999 { 2000 # Need to parse tag and following text using the HTML parser. 2001 # (don't check for markdown attribute) 2002 list($block_text, $text) = 2003 $this->_hashHTMLBlocks_inHTML($tag . $text, "hashClean", false); 2004 2005 $parsed .= $block_text; 2006 } 2007 # 2008 # Check for: Tag with same name as enclosing tag. 2009 # 2010 else if ($enclosing_tag_re !== '' && 2011 # Same name as enclosing tag. 2012 preg_match('{^</?(?:'.$enclosing_tag_re.')\b}', $tag)) 2013 { 2014 # 2015 # Increase/decrease nested tag count. 2016 # 2017 if ($tag[1] == '/') $depth--; 2018 else if ($tag[strlen($tag)-2] != '/') $depth++; 2019 2020 if ($depth < 0) { 2021 # 2022 # Going out of parent element. Clean up and break so we 2023 # return to the calling function. 2024 # 2025 $text = $tag . $text; 2026 break; 2027 } 2028 2029 $parsed .= $tag; 2030 } 2031 else { 2032 $parsed .= $tag; 2033 } 2034 } while ($depth >= 0); 2035 2036 return array($parsed, $text); 2037 } 2038 function _hashHTMLBlocks_inHTML($text, $hash_method, $md_attr) { 2039 # 2040 # Parse HTML, calling _HashHTMLBlocks_InMarkdown for block tags. 2041 # 2042 # * Calls $hash_method to convert any blocks. 2043 # * Stops when the first opening tag closes. 2044 # * $md_attr indicate if the use of the `markdown="1"` attribute is allowed. 2045 # (it is not inside clean tags) 2046 # 2047 # Returns an array of that form: ( processed text , remaining text ) 2048 # 2049 if ($text === '') return array('', ''); 2050 2051 # Regex to match `markdown` attribute inside of a tag. 2052 $markdown_attr_re = ' 2053 { 2054 \s* # Eat whitespace before the `markdown` attribute 2055 markdown 2056 \s*=\s* 2057 (?> 2058 (["\']) # $1: quote delimiter 2059 (.*?) # $2: attribute value 2060 \1 # matching delimiter 2061 | 2062 ([^\s>]*) # $3: unquoted attribute value 2063 ) 2064 () # $4: make $3 always defined (avoid warnings) 2065 }xs'; 2066 2067 # Regex to match any tag. 2068 $tag_re = '{ 2069 ( # $2: Capture hole tag. 2070 </? # Any opening or closing tag. 2071 [\w:$]+ # Tag name. 2072 (?: 2073 (?=[\s"\'/a-zA-Z0-9]) # Allowed characters after tag name. 2074 (?> 2075 ".*?" | # Double quotes (can contain `>`) 2076 \'.*?\' | # Single quotes (can contain `>`) 2077 .+? # Anything but quotes and `>`. 2078 )*? 2079 )? 2080 > # End of tag. 2081 | 2082 <!-- .*? --> # HTML Comment 2083 | 2084 <\?.*?\?> | <%.*?%> # Processing instruction 2085 | 2086 <!\[CDATA\[.*?\]\]> # CData Block 2087 ) 2088 }xs'; 2089 2090 $original_text = $text; # Save original text in case of faliure. 2091 2092 $depth = 0; # Current depth inside the tag tree. 2093 $block_text = ""; # Temporary text holder for current text. 2094 $parsed = ""; # Parsed text that will be returned. 2095 2096 # 2097 # Get the name of the starting tag. 2098 # (This pattern makes $base_tag_name_re safe without quoting.) 2099 # 2100 if (preg_match('/^<([\w:$]*)\b/', $text, $matches)) 2101 $base_tag_name_re = $matches[1]; 2102 2103 # 2104 # Loop through every tag until we find the corresponding closing tag. 2105 # 2106 do { 2107 # 2108 # Split the text using the first $tag_match pattern found. 2109 # Text before pattern will be first in the array, text after 2110 # pattern will be at the end, and between will be any catches made 2111 # by the pattern. 2112 # 2113 $parts = preg_split($tag_re, $text, 2, PREG_SPLIT_DELIM_CAPTURE); 2114 2115 if (count($parts) < 3) { 2116 # 2117 # End of $text reached with unbalenced tag(s). 2118 # In that case, we return original text unchanged and pass the 2119 # first character as filtered to prevent an infinite loop in the 2120 # parent function. 2121 # 2122 return array($original_text[0], substr($original_text, 1)); 2123 } 2124 2125 $block_text .= $parts[0]; # Text before current tag. 2126 $tag = $parts[1]; # Tag to handle. 2127 $text = $parts[2]; # Remaining text after current tag. 2128 2129 # 2130 # Check for: Auto-close tag (like <hr/>) 2131 # Comments and Processing Instructions. 2132 # 2133 if (preg_match('{^</?(?:'.$this->auto_close_tags_re.')\b}', $tag) || 2134 $tag[1] == '!' || $tag[1] == '?') 2135 { 2136 # Just add the tag to the block as if it was text. 2137 $block_text .= $tag; 2138 } 2139 else { 2140 # 2141 # Increase/decrease nested tag count. Only do so if 2142 # the tag's name match base tag's. 2143 # 2144 if (preg_match('{^</?'.$base_tag_name_re.'\b}', $tag)) { 2145 if ($tag[1] == '/') $depth--; 2146 else if ($tag[strlen($tag)-2] != '/') $depth++; 2147 } 2148 2149 # 2150 # Check for `markdown="1"` attribute and handle it. 2151 # 2152 if ($md_attr && 2153 preg_match($markdown_attr_re, $tag, $attr_m) && 2154 preg_match('/^1|block|span$/', $attr_m[2] . $attr_m[3])) 2155 { 2156 # Remove `markdown` attribute from opening tag. 2157 $tag = preg_replace($markdown_attr_re, '', $tag); 2158 2159 # Check if text inside this tag must be parsed in span mode. 2160 $this->mode = $attr_m[2] . $attr_m[3]; 2161 $span_mode = $this->mode == 'span' || $this->mode != 'block' && 2162 preg_match('{^<(?:'.$this->contain_span_tags_re.')\b}', $tag); 2163 2164 # Calculate indent before tag. 2165 if (preg_match('/(?:^|\n)( *?)(?! ).*?$/', $block_text, $matches)) { 2166 $strlen = $this->utf8_strlen; 2167 $indent = $strlen($matches[1], 'UTF-8'); 2168 } else { 2169 $indent = 0; 2170 } 2171 2172 # End preceding block with this tag. 2173 $block_text .= $tag; 2174 $parsed .= $this->$hash_method($block_text); 2175 2176 # Get enclosing tag name for the ParseMarkdown function. 2177 # (This pattern makes $tag_name_re safe without quoting.) 2178 preg_match('/^<([\w:$]*)\b/', $tag, $matches); 2179 $tag_name_re = $matches[1]; 2180 2181 # Parse the content using the HTML-in-Markdown parser. 2182 list ($block_text, $text) 2183 = $this->_hashHTMLBlocks_inMarkdown($text, $indent, 2184 $tag_name_re, $span_mode); 2185 2186 # Outdent markdown text. 2187 if ($indent > 0) { 2188 $block_text = preg_replace("/^[ ]{1,$indent}/m", "", 2189 $block_text); 2190 } 2191 2192 # Append tag content to parsed text. 2193 if (!$span_mode) $parsed .= "\n\n$block_text\n\n"; 2194 else $parsed .= "$block_text"; 2195 2196 # Start over a new block. 2197 $block_text = ""; 2198 } 2199 else $block_text .= $tag; 2200 } 2201 2202 } while ($depth > 0); 2203 2204 # 2205 # Hash last block text that wasn't processed inside the loop. 2206 # 2207 $parsed .= $this->$hash_method($block_text); 2208 2209 return array($parsed, $text); 2210 } 2211 2212 2213 function hashClean($text) { 2214 # 2215 # Called whenever a tag must be hashed when a function insert a "clean" tag 2216 # in $text, it pass through this function and is automaticaly escaped, 2217 # blocking invalid nested overlap. 2218 # 2219 return $this->hashPart($text, 'C'); 2220 } 2221 2222 2223 function doHeaders($text) { 2224 # 2225 # Redefined to add id attribute support. 2226 # 2227 # Setext-style headers: 2228 # Header 1 {#header1} 2229 # ======== 2230 # 2231 # Header 2 {#header2} 2232 # -------- 2233 # 2234 $text = preg_replace_callback( 2235 '{ 2236 (^.+?) # $1: Header text 2237 (?:[ ]+\{\#([-_:a-zA-Z0-9]+)\})? # $2: Id attribute 2238 [ ]*\n(=+|-+)[ ]*\n+ # $3: Header footer 2239 }mx', 2240 array(&$this, '_doHeaders_callback_setext'), $text); 2241 2242 # atx-style headers: 2243 # # Header 1 {#header1} 2244 # ## Header 2 {#header2} 2245 # ## Header 2 with closing hashes ## {#header3} 2246 # ... 2247 # ###### Header 6 {#header2} 2248 # 2249 $text = preg_replace_callback('{ 2250 ^(\#{1,6}) # $1 = string of #\'s 2251 [ ]* 2252 (.+?) # $2 = Header text 2253 [ ]* 2254 \#* # optional closing #\'s (not counted) 2255 (?:[ ]+\{\#([-_:a-zA-Z0-9]+)\})? # id attribute 2256 [ ]* 2257 \n+ 2258 }xm', 2259 array(&$this, '_doHeaders_callback_atx'), $text); 2260 2261 return $text; 2262 } 2263 function _doHeaders_attr($attr) { 2264 if (empty($attr)) return ""; 2265 return " id=\"$attr\""; 2266 } 2267 function _doHeaders_callback_setext($matches) { 2268 if ($matches[3] == '-' && preg_match('{^- }', $matches[1])) 2269 return $matches[0]; 2270 $level = $matches[3][0] == '=' ? 1 : 2; 2271 $attr = $this->_doHeaders_attr($id =& $matches[2]); 2272 $block = "<h$level$attr>".$this->runSpanGamut($matches[1])."</h$level>"; 2273 return "\n" . $this->hashBlock($block) . "\n\n"; 2274 } 2275 function _doHeaders_callback_atx($matches) { 2276 $level = strlen($matches[1]); 2277 $attr = $this->_doHeaders_attr($id =& $matches[3]); 2278 $block = "<h$level$attr>".$this->runSpanGamut($matches[2])."</h$level>"; 2279 return "\n" . $this->hashBlock($block) . "\n\n"; 2280 } 2281 2282 2283 function doTables($text) { 2284 # 2285 # Form HTML tables. 2286 # 2287 $less_than_tab = $this->tab_width - 1; 2288 # 2289 # Find tables with leading pipe. 2290 # 2291 # | Header 1 | Header 2 2292 # | -------- | -------- 2293 # | Cell 1 | Cell 2 2294 # | Cell 3 | Cell 4 2295 # 2296 $text = preg_replace_callback(' 2297 { 2298 ^ # Start of a line 2299 [ ]{0,'.$less_than_tab.'} # Allowed whitespace. 2300 [|] # Optional leading pipe (present) 2301 (.+) \n # $1: Header row (at least one pipe) 2302 2303 [ ]{0,'.$less_than_tab.'} # Allowed whitespace. 2304 [|] ([ ]*[-:]+[-| :]*) \n # $2: Header underline 2305 2306 ( # $3: Cells 2307 (?> 2308 [ ]* # Allowed whitespace. 2309 [|] .* \n # Row content. 2310 )* 2311 ) 2312 (?=\n|\Z) # Stop at final double newline. 2313 }xm', 2314 array(&$this, '_doTable_leadingPipe_callback'), $text); 2315 2316 # 2317 # Find tables without leading pipe. 2318 # 2319 # Header 1 | Header 2 2320 # -------- | -------- 2321 # Cell 1 | Cell 2 2322 # Cell 3 | Cell 4 2323 # 2324 $text = preg_replace_callback(' 2325 { 2326 ^ # Start of a line 2327 [ ]{0,'.$less_than_tab.'} # Allowed whitespace. 2328 (\S.*[|].*) \n # $1: Header row (at least one pipe) 2329 2330 [ ]{0,'.$less_than_tab.'} # Allowed whitespace. 2331 ([-:]+[ ]*[|][-| :]*) \n # $2: Header underline 2332 2333 ( # $3: Cells 2334 (?> 2335 .* [|] .* \n # Row content 2336 )* 2337 ) 2338 (?=\n|\Z) # Stop at final double newline. 2339 }xm', 2340 array(&$this, '_DoTable_callback'), $text); 2341 2342 return $text; 2343 } 2344 function _doTable_leadingPipe_callback($matches) { 2345 $head = $matches[1]; 2346 $underline = $matches[2]; 2347 $content = $matches[3]; 2348 2349 # Remove leading pipe for each row. 2350 $content = preg_replace('/^ *[|]/m', '', $content); 2351 2352 return $this->_doTable_callback(array($matches[0], $head, $underline, $content)); 2353 } 2354 function _doTable_callback($matches) { 2355 $head = $matches[1]; 2356 $underline = $matches[2]; 2357 $content = $matches[3]; 2358 2359 # Remove any tailing pipes for each line. 2360 $head = preg_replace('/[|] *$/m', '', $head); 2361 $underline = preg_replace('/[|] *$/m', '', $underline); 2362 $content = preg_replace('/[|] *$/m', '', $content); 2363 2364 # Reading alignement from header underline. 2365 $separators = preg_split('/ *[|] */', $underline); 2366 foreach ($separators as $n => $s) { 2367 if (preg_match('/^ *-+: *$/', $s)) $attr[$n] = ' align="right"'; 2368 else if (preg_match('/^ *:-+: *$/', $s))$attr[$n] = ' align="center"'; 2369 else if (preg_match('/^ *:-+ *$/', $s)) $attr[$n] = ' align="left"'; 2370 else $attr[$n] = ''; 2371 } 2372 2373 # Parsing span elements, including code spans, character escapes, 2374 # and inline HTML tags, so that pipes inside those gets ignored. 2375 $head = $this->parseSpan($head); 2376 $headers = preg_split('/ *[|] */', $head); 2377 $col_count = count($headers); 2378 2379 # Write column headers. 2380 $text = "<table>\n"; 2381 $text .= "<thead>\n"; 2382 $text .= "<tr>\n"; 2383 foreach ($headers as $n => $header) 2384 $text .= " <th$attr[$n]>".$this->runSpanGamut(trim($header))."</th>\n"; 2385 $text .= "</tr>\n"; 2386 $text .= "</thead>\n"; 2387 2388 # Split content by row. 2389 $rows = explode("\n", trim($content, "\n")); 2390 2391 $text .= "<tbody>\n"; 2392 foreach ($rows as $row) { 2393 # Parsing span elements, including code spans, character escapes, 2394 # and inline HTML tags, so that pipes inside those gets ignored. 2395 $row = $this->parseSpan($row); 2396 2397 # Split row by cell. 2398 $row_cells = preg_split('/ *[|] */', $row, $col_count); 2399 $row_cells = array_pad($row_cells, $col_count, ''); 2400 2401 $text .= "<tr>\n"; 2402 foreach ($row_cells as $n => $cell) 2403 $text .= " <td$attr[$n]>".$this->runSpanGamut(trim($cell))."</td>\n"; 2404 $text .= "</tr>\n"; 2405 } 2406 $text .= "</tbody>\n"; 2407 $text .= "</table>"; 2408 2409 return $this->hashBlock($text) . "\n"; 2410 } 2411 2412 2413 function doDefLists($text) { 2414 # 2415 # Form HTML definition lists. 2416 # 2417 $less_than_tab = $this->tab_width - 1; 2418 2419 # Re-usable pattern to match any entire dl list: 2420 $whole_list_re = '(?> 2421 ( # $1 = whole list 2422 ( # $2 2423 [ ]{0,'.$less_than_tab.'} 2424 ((?>.*\S.*\n)+) # $3 = defined term 2425 \n? 2426 [ ]{0,'.$less_than_tab.'}:[ ]+ # colon starting definition 2427 ) 2428 (?s:.+?) 2429 ( # $4 2430 \z 2431 | 2432 \n{2,} 2433 (?=\S) 2434 (?! # Negative lookahead for another term 2435 [ ]{0,'.$less_than_tab.'} 2436 (?: \S.*\n )+? # defined term 2437 \n? 2438 [ ]{0,'.$less_than_tab.'}:[ ]+ # colon starting definition 2439 ) 2440 (?! # Negative lookahead for another definition 2441 [ ]{0,'.$less_than_tab.'}:[ ]+ # colon starting definition 2442 ) 2443 ) 2444 ) 2445 )'; // mx 2446 2447 $text = preg_replace_callback('{ 2448 (?>\A\n?|(?<=\n\n)) 2449 '.$whole_list_re.' 2450 }mx', 2451 array(&$this, '_doDefLists_callback'), $text); 2452 2453 return $text; 2454 } 2455 function _doDefLists_callback($matches) { 2456 # Re-usable patterns to match list item bullets and number markers: 2457 $list = $matches[1]; 2458 2459 # Turn double returns into triple returns, so that we can make a 2460 # paragraph for the last item in a list, if necessary: 2461 $result = trim($this->processDefListItems($list)); 2462 $result = "<dl>\n" . $result . "\n</dl>"; 2463 return $this->hashBlock($result) . "\n\n"; 2464 } 2465 2466 2467 function processDefListItems($list_str) { 2468 # 2469 # Process the contents of a single definition list, splitting it 2470 # into individual term and definition list items. 2471 # 2472 $less_than_tab = $this->tab_width - 1; 2473 2474 # trim trailing blank lines: 2475 $list_str = preg_replace("/\n{2,}\\z/", "\n", $list_str); 2476 2477 # Process definition terms. 2478 $list_str = preg_replace_callback('{ 2479 (?>\A\n?|\n\n+) # leading line 2480 ( # definition terms = $1 2481 [ ]{0,'.$less_than_tab.'} # leading whitespace 2482 (?![:][ ]|[ ]) # negative lookahead for a definition 2483 # mark (colon) or more whitespace. 2484 (?> \S.* \n)+? # actual term (not whitespace). 2485 ) 2486 (?=\n?[ ]{0,3}:[ ]) # lookahead for following line feed 2487 # with a definition mark. 2488 }xm', 2489 array(&$this, '_processDefListItems_callback_dt'), $list_str); 2490 2491 # Process actual definitions. 2492 $list_str = preg_replace_callback('{ 2493 \n(\n+)? # leading line = $1 2494 ( # marker space = $2 2495 [ ]{0,'.$less_than_tab.'} # whitespace before colon 2496 [:][ ]+ # definition mark (colon) 2497 ) 2498 ((?s:.+?)) # definition text = $3 2499 (?= \n+ # stop at next definition mark, 2500 (?: # next term or end of text 2501 [ ]{0,'.$less_than_tab.'} [:][ ] | 2502 <dt> | \z 2503 ) 2504 ) 2505 }xm', 2506 array(&$this, '_processDefListItems_callback_dd'), $list_str); 2507 2508 return $list_str; 2509 } 2510 function _processDefListItems_callback_dt($matches) { 2511 $terms = explode("\n", trim($matches[1])); 2512 $text = ''; 2513 foreach ($terms as $term) { 2514 $term = $this->runSpanGamut(trim($term)); 2515 $text .= "\n<dt>" . $term . "</dt>"; 2516 } 2517 return $text . "\n"; 2518 } 2519 function _processDefListItems_callback_dd($matches) { 2520 $leading_line = $matches[1]; 2521 $marker_space = $matches[2]; 2522 $def = $matches[3]; 2523 2524 if ($leading_line || preg_match('/\n{2,}/', $def)) { 2525 # Replace marker with the appropriate whitespace indentation 2526 $def = str_repeat(' ', strlen($marker_space)) . $def; 2527 $def = $this->runBlockGamut($this->outdent($def . "\n\n")); 2528 $def = "\n". $def ."\n"; 2529 } 2530 else { 2531 $def = rtrim($def); 2532 $def = $this->runSpanGamut($this->outdent($def)); 2533 } 2534 2535 return "\n<dd>" . $def . "</dd>\n"; 2536 } 2537 2538 2539 function doFencedCodeBlocks($text) { 2540 # 2541 # Adding the fenced code block syntax to regular Markdown: 2542 # 2543 # ~~~ 2544 # Code block 2545 # ~~~ 2546 # 2547 $less_than_tab = $this->tab_width; 2548 2549 $text = preg_replace_callback('{ 2550 (?:\n|\A) 2551 # 1: Opening marker 2552 ( 2553 ~{3,} # Marker: three tilde or more. 2554 ) 2555 [ ]* \n # Whitespace and newline following marker. 2556 2557 # 2: Content 2558 ( 2559 (?> 2560 (?!\1 [ ]* \n) # Not a closing marker. 2561 .*\n+ 2562 )+ 2563 ) 2564 2565 # Closing marker. 2566 \1 [ ]* \n 2567 }xm', 2568 array(&$this, '_doFencedCodeBlocks_callback'), $text); 2569 2570 return $text; 2571 } 2572 function _doFencedCodeBlocks_callback($matches) { 2573 $codeblock = $matches[2]; 2574 $codeblock = htmlspecialchars($codeblock, ENT_NOQUOTES); 2575 $codeblock = preg_replace_callback('/^\n+/', 2576 array(&$this, '_doFencedCodeBlocks_newlines'), $codeblock); 2577 $codeblock = "<pre><code>$codeblock</code></pre>"; 2578 return "\n\n".$this->hashBlock($codeblock)."\n\n"; 2579 } 2580 function _doFencedCodeBlocks_newlines($matches) { 2581 return str_repeat("<br$this->empty_element_suffix", 2582 strlen($matches[0])); 2583 } 2584 2585 2586 # 2587 # Redefining emphasis markers so that emphasis by underscore does not 2588 # work in the middle of a word. 2589 # 2590 var $em_relist = array( 2591 '' => '(?:(?<!\*)\*(?!\*)|(?<![a-zA-Z0-9_])_(?!_))(?=\S|$)(?![\.,:;]\s)', 2592 '*' => '(?<=\S|^)(?<!\*)\*(?!\*)', 2593 '_' => '(?<=\S|^)(?<!_)_(?![a-zA-Z0-9_])', 2594 ); 2595 var $strong_relist = array( 2596 '' => '(?:(?<!\*)\*\*(?!\*)|(?<![a-zA-Z0-9_])__(?!_))(?=\S|$)(?![\.,:;]\s)', 2597 '**' => '(?<=\S|^)(?<!\*)\*\*(?!\*)', 2598 '__' => '(?<=\S|^)(?<!_)__(?![a-zA-Z0-9_])', 2599 ); 2600 var $em_strong_relist = array( 2601 '' => '(?:(?<!\*)\*\*\*(?!\*)|(?<![a-zA-Z0-9_])___(?!_))(?=\S|$)(?![\.,:;]\s)', 2602 '***' => '(?<=\S|^)(?<!\*)\*\*\*(?!\*)', 2603 '___' => '(?<=\S|^)(?<!_)___(?![a-zA-Z0-9_])', 2604 ); 2605 2606 2607 function formParagraphs($text) { 2608 # 2609 # Params: 2610 # $text - string to process with html <p> tags 2611 # 2612 # Strip leading and trailing lines: 2613 $text = preg_replace('/\A\n+|\n+\z/', '', $text); 2614 2615 $grafs = preg_split('/\n{2,}/', $text, -1, PREG_SPLIT_NO_EMPTY); 2616 2617 # 2618 # Wrap <p> tags and unhashify HTML blocks 2619 # 2620 foreach ($grafs as $key => $value) { 2621 $value = trim($this->runSpanGamut($value)); 2622 2623 # Check if this should be enclosed in a paragraph. 2624 # Clean tag hashes & block tag hashes are left alone. 2625 $is_p = !preg_match('/^B\x1A[0-9]+B|^C\x1A[0-9]+C$/', $value); 2626 2627 if ($is_p) { 2628 $value = "<p>$value</p>"; 2629 } 2630 $grafs[$key] = $value; 2631 } 2632 2633 # Join grafs in one text, then unhash HTML tags. 2634 $text = implode("\n\n", $grafs); 2635 2636 # Finish by removing any tag hashes still present in $text. 2637 $text = $this->unhash($text); 2638 2639 return $text; 2640 } 2641 2642 2643 ### Footnotes 2644 2645 function stripFootnotes($text) { 2646 # 2647 # Strips link definitions from text, stores the URLs and titles in 2648 # hash references. 2649 # 2650 $less_than_tab = $this->tab_width - 1; 2651 2652 # Link defs are in the form: [^id]: url "optional title" 2653 $text = preg_replace_callback('{ 2654 ^[ ]{0,'.$less_than_tab.'}\[\^(.+?)\][ ]?: # note_id = $1 2655 [ ]* 2656 \n? # maybe *one* newline 2657 ( # text = $2 (no blank lines allowed) 2658 (?: 2659 .+ # actual text 2660 | 2661 \n # newlines but 2662 (?!\[\^.+?\]:\s)# negative lookahead for footnote marker. 2663 (?!\n+[ ]{0,3}\S)# ensure line is not blank and followed 2664 # by non-indented content 2665 )* 2666 ) 2667 }xm', 2668 array(&$this, '_stripFootnotes_callback'), 2669 $text); 2670 return $text; 2671 } 2672 function _stripFootnotes_callback($matches) { 2673 $note_id = $this->fn_id_prefix . $matches[1]; 2674 $this->footnotes[$note_id] = $this->outdent($matches[2]); 2675 return ''; # String that will replace the block 2676 } 2677 2678 2679 function doFootnotes($text) { 2680 # 2681 # Replace footnote references in $text [^id] with a special text-token 2682 # which will be replaced by the actual footnote marker in appendFootnotes. 2683 # 2684 if (!$this->in_anchor) { 2685 $text = preg_replace('{\[\^(.+?)\]}', "F\x1Afn:\\1\x1A:", $text); 2686 } 2687 return $text; 2688 } 2689 2690 2691 function appendFootnotes($text) { 2692 # 2693 # Append footnote list to text. 2694 # 2695 $text = preg_replace_callback('{F\x1Afn:(.*?)\x1A:}', 2696 array(&$this, '_appendFootnotes_callback'), $text); 2697 2698 if (!empty($this->footnotes_ordered)) { 2699 $text .= "\n\n"; 2700 $text .= "<div class=\"footnotes\">\n"; 2701 $text .= "<hr". $this->empty_element_suffix ."\n"; 2702 $text .= "<ol>\n\n"; 2703 2704 $attr = " rev=\"footnote\""; 2705 if ($this->fn_backlink_class != "") { 2706 $class = $this->fn_backlink_class; 2707 $class = $this->encodeAttribute($class); 2708 $attr .= " class=\"$class\""; 2709 } 2710 if ($this->fn_backlink_title != "") { 2711 $title = $this->fn_backlink_title; 2712 $title = $this->encodeAttribute($title); 2713 $attr .= " title=\"$title\""; 2714 } 2715 $num = 0; 2716 2717 while (!empty($this->footnotes_ordered)) { 2718 $footnote = reset($this->footnotes_ordered); 2719 $note_id = key($this->footnotes_ordered); 2720 unset($this->footnotes_ordered[$note_id]); 2721 2722 $footnote .= "\n"; # Need to append newline before parsing. 2723 $footnote = $this->runBlockGamut("$footnote\n"); 2724 $footnote = preg_replace_callback('{F\x1Afn:(.*?)\x1A:}', 2725 array(&$this, '_appendFootnotes_callback'), $footnote); 2726 2727 $attr = str_replace("%%", ++$num, $attr); 2728 $note_id = $this->encodeAttribute($note_id); 2729 2730 # Add backlink to last paragraph; create new paragraph if needed. 2731 $backlink = "<a href=\"#fnref:$note_id\"$attr>↩</a>"; 2732 if (preg_match('{</p>$}', $footnote)) { 2733 $footnote = substr($footnote, 0, -4) . " $backlink</p>"; 2734 } else { 2735 $footnote .= "\n\n<p>$backlink</p>"; 2736 } 2737 2738 $text .= "<li id=\"fn:$note_id\">\n"; 2739 $text .= $footnote . "\n"; 2740 $text .= "</li>\n\n"; 2741 } 2742 2743 $text .= "</ol>\n"; 2744 $text .= "</div>"; 2745 } 2746 return $text; 2747 } 2748 function _appendFootnotes_callback($matches) { 2749 $node_id = $this->fn_id_prefix . $matches[1]; 2750 2751 # Create footnote marker only if it has a corresponding footnote *and* 2752 # the footnote hasn't been used by another marker. 2753 if (isset($this->footnotes[$node_id])) { 2754 # Transfert footnote content to the ordered list. 2755 $this->footnotes_ordered[$node_id] = $this->footnotes[$node_id]; 2756 unset($this->footnotes[$node_id]); 2757 2758 $num = $this->footnote_counter++; 2759 $attr = " rel=\"footnote\""; 2760 if ($this->fn_link_class != "") { 2761 $class = $this->fn_link_class; 2762 $class = $this->encodeAttribute($class); 2763 $attr .= " class=\"$class\""; 2764 } 2765 if ($this->fn_link_title != "") { 2766 $title = $this->fn_link_title; 2767 $title = $this->encodeAttribute($title); 2768 $attr .= " title=\"$title\""; 2769 } 2770 2771 $attr = str_replace("%%", $num, $attr); 2772 $node_id = $this->encodeAttribute($node_id); 2773 2774 return 2775 "<sup id=\"fnref:$node_id\">". 2776 "<a href=\"#fn:$node_id\"$attr>$num</a>". 2777 "</sup>"; 2778 } 2779 2780 return "[^".$matches[1]."]"; 2781 } 2782 2783 2784 ### Abbreviations ### 2785 2786 function stripAbbreviations($text) { 2787 # 2788 # Strips abbreviations from text, stores titles in hash references. 2789 # 2790 $less_than_tab = $this->tab_width - 1; 2791 2792 # Link defs are in the form: [id]*: url "optional title" 2793 $text = preg_replace_callback('{ 2794 ^[ ]{0,'.$less_than_tab.'}\*\[(.+?)\][ ]?: # abbr_id = $1 2795 (.*) # text = $2 (no blank lines allowed) 2796 }xm', 2797 array(&$this, '_stripAbbreviations_callback'), 2798 $text); 2799 return $text; 2800 } 2801 function _stripAbbreviations_callback($matches) { 2802 $abbr_word = $matches[1]; 2803 $abbr_desc = $matches[2]; 2804 if ($this->abbr_word_re) 2805 $this->abbr_word_re .= '|'; 2806 $this->abbr_word_re .= preg_quote($abbr_word); 2807 $this->abbr_desciptions[$abbr_word] = trim($abbr_desc); 2808 return ''; # String that will replace the block 2809 } 2810 2811 2812 function doAbbreviations($text) { 2813 # 2814 # Find defined abbreviations in text and wrap them in <abbr> elements. 2815 # 2816 if ($this->abbr_word_re) { 2817 // cannot use the /x modifier because abbr_word_re may 2818 // contain significant spaces: 2819 $text = preg_replace_callback('{'. 2820 '(?<![\w\x1A])'. 2821 '(?:'.$this->abbr_word_re.')'. 2822 '(?![\w\x1A])'. 2823 '}', 2824 array(&$this, '_doAbbreviations_callback'), $text); 2825 } 2826 return $text; 2827 } 2828 function _doAbbreviations_callback($matches) { 2829 $abbr = $matches[0]; 2830 if (isset($this->abbr_desciptions[$abbr])) { 2831 $desc = $this->abbr_desciptions[$abbr]; 2832 if (empty($desc)) { 2833 return $this->hashPart("<abbr>$abbr</abbr>"); 2834 } else { 2835 $desc = $this->encodeAttribute($desc); 2836 return $this->hashPart("<abbr title=\"$desc\">$abbr</abbr>"); 2837 } 2838 } else { 2839 return $matches[0]; 2840 } 2841 } 2842 2843} 2844 2845 2846/* 2847 2848PHP Markdown Extra 2849================== 2850 2851Description 2852----------- 2853 2854This is a PHP port of the original Markdown formatter written in Perl 2855by John Gruber. This special "Extra" version of PHP Markdown features 2856further enhancements to the syntax for making additional constructs 2857such as tables and definition list. 2858 2859Markdown is a text-to-HTML filter; it translates an easy-to-read / 2860easy-to-write structured text format into HTML. Markdown's text format 2861is most similar to that of plain text email, and supports features such 2862as headers, *emphasis*, code blocks, blockquotes, and links. 2863 2864Markdown's syntax is designed not as a generic markup language, but 2865specifically to serve as a front-end to (X)HTML. You can use span-level 2866HTML tags anywhere in a Markdown document, and you can use block level 2867HTML tags (like <div> and <table> as well). 2868 2869For more information about Markdown's syntax, see: 2870 2871<http://daringfireball.net/projects/markdown/> 2872 2873 2874Bugs 2875---- 2876 2877To file bug reports please send email to: 2878 2879<michel.fortin@michelf.com> 2880 2881Please include with your report: (1) the example input; (2) the output you 2882expected; (3) the output Markdown actually produced. 2883 2884 2885Version History 2886--------------- 2887 2888See the readme file for detailed release notes for this version. 2889 2890 2891Copyright and License 2892--------------------- 2893 2894PHP Markdown & Extra 2895Copyright (c) 2004-2009 Michel Fortin 2896<http://michelf.com/> 2897All rights reserved. 2898 2899Based on Markdown 2900Copyright (c) 2003-2006 John Gruber 2901<http://daringfireball.net/> 2902All rights reserved. 2903 2904Redistribution and use in source and binary forms, with or without 2905modification, are permitted provided that the following conditions are 2906met: 2907 2908* Redistributions of source code must retain the above copyright notice, 2909 this list of conditions and the following disclaimer. 2910 2911* Redistributions in binary form must reproduce the above copyright 2912 notice, this list of conditions and the following disclaimer in the 2913 documentation and/or other materials provided with the distribution. 2914 2915* Neither the name "Markdown" nor the names of its contributors may 2916 be used to endorse or promote products derived from this software 2917 without specific prior written permission. 2918 2919This software is provided by the copyright holders and contributors "as 2920is" and any express or implied warranties, including, but not limited 2921to, the implied warranties of merchantability and fitness for a 2922particular purpose are disclaimed. In no event shall the copyright owner 2923or contributors be liable for any direct, indirect, incidental, special, 2924exemplary, or consequential damages (including, but not limited to, 2925procurement of substitute goods or services; loss of use, data, or 2926profits; or business interruption) however caused and on any theory of 2927liability, whether in contract, strict liability, or tort (including 2928negligence or otherwise) arising in any way out of the use of this 2929software, even if advised of the possibility of such damage. 2930 2931*/ 2932?> 2933