# Markdown Extra - A text-to-HTML conversion tool for web writers␊ |
#␊ |
# PHP Markdown & Extra␊ |
# Copyright (c) 2004-2008 Michel Fortin ␊ |
# <http://www.michelf.com/projects/php-markdown/>␊ |
# Copyright (c) 2004-2009 Michel Fortin ␊ |
# <http://michelf.com/projects/php-markdown/>␊ |
#␊ |
# Original Markdown␊ |
# Copyright (c) 2004-2006 John Gruber ␊ |
|
#␊ |
␊ |
␊ |
define( 'MARKDOWN_VERSION', "1.0.1l" ); # Sun 11 May 2008␊ |
define( 'MARKDOWNEXTRA_VERSION', "1.2.1" ); # Tue 27 May 2008␊ |
define( 'MARKDOWN_VERSION', "1.0.1n" ); # Sat 10 Oct 2009␊ |
define( 'MARKDOWNEXTRA_VERSION', "1.2.4" ); # Sat 10 Oct 2009␊ |
␊ |
␊ |
#␊ |
|
@define( 'MARKDOWN_FN_BACKLINK_CLASS', "" );␊ |
␊ |
␊ |
#␊ |
# WordPress settings:␊ |
#␊ |
␊ |
# Change to false to remove Markdown from posts and/or comments.␊ |
@define( 'MARKDOWN_WP_POSTS', true );␊ |
@define( 'MARKDOWN_WP_COMMENTS', true );␊ |
␊ |
␊ |
␊ |
### Standard Function Interface ###␊ |
␊ |
|
}␊ |
␊ |
function Pluf_Text_MarkDown_parse($text) {␊ |
#␊ |
# Initialize the parser and return the result of its transform method.␊ |
#␊ |
␉# Setup static parser variable.␊ |
␉static $parser;␊ |
␉if (!isset($parser)) {␊ |
␉␉$parser_class = MARKDOWN_PARSER_CLASS;␊ |
␉␉$parser = new $parser_class;␊ |
␉}␊ |
␊ |
␉# Transform text using parser.␊ |
␉return $parser->transform($text);␊ |
return Markdown($text);␊ |
}␊ |
␊ |
␊ |
␊ |
#␊ |
# Markdown Parser Class␊ |
#␊ |
|
␉# Regex to match balanced [brackets].␊ |
␉# Needed to insert a maximum bracked depth while converting to PHP.␊ |
␉var $nested_brackets_depth = 6;␊ |
␉var $nested_brackets;␊ |
␉var $nested_brackets_re;␊ |
␉␊ |
␉var $nested_url_parenthesis_depth = 4;␊ |
␉var $nested_url_parenthesis;␊ |
␉var $nested_url_parenthesis_re;␊ |
␊ |
␉# Table of hash values for escaped characters:␊ |
␉var $escape_chars = '\`*_{}[]()>#+-.!';␊ |
␉␊ |
␉# Regular expression to catch extra attributes.␊ |
␉var $attr_regex;␊ |
␉var $escape_chars_re;␊ |
␊ |
␉# Change to ">" for HTML output.␊ |
␉var $empty_element_suffix = MARKDOWN_EMPTY_ELEMENT_SUFFIX;␊ |
|
␉# Constructor function. Initialize appropriate member variables.␊ |
␉#␊ |
␉␉$this->_initDetab();␊ |
␉␉$this->prepareItalicsAndBold();␊ |
␉␊ |
␉␉$this->nested_brackets = ␊ |
␉␉$this->nested_brackets_re = ␊ |
␉␉␉str_repeat('(?>[^\[\]]+|\[', $this->nested_brackets_depth).␊ |
␉␉␉str_repeat('\])*', $this->nested_brackets_depth);␊ |
␉␊ |
␉␉$this->nested_url_parenthesis = ␊ |
␉␉$this->nested_url_parenthesis_re = ␊ |
␉␉␉str_repeat('(?>[^()\s]+|\(', $this->nested_url_parenthesis_depth).␊ |
␉␉␉str_repeat('(?>\)))*', $this->nested_url_parenthesis_depth);␊ |
␉␉␊ |
␉␉$this->escape_chars_re = '['.preg_quote($this->escape_chars).']';␊ |
␉␉␊ |
␉␉# Sort document, block, and span gamut in ascendent priority order.␊ |
␉␉asort($this->document_gamut);␊ |
␉␉asort($this->block_gamut);␊ |
|
␉#␊ |
␉␉$this->setup();␊ |
␉␊ |
␉␉# Remove UTF-8 BOM, if present.␊ |
␉␉$text = preg_replace('{^\xEF\xBB\xBF}', '', $text);␊ |
␉␉# Remove UTF-8 BOM and marker character in input, if present.␊ |
␉␉$text = preg_replace('{^\xEF\xBB\xBF|\x1A}', '', $text);␊ |
␊ |
␉␉# Standardize line endings:␊ |
␉␉# DOS to Unix and Mac to Unix␊ |
|
␉␉␉␉␉␉␉ [ ]*␊ |
␉␉␉␉␉␉␉ \n?␉␉␉␉# maybe *one* newline␊ |
␉␉␉␉␉␉␉ [ ]*␊ |
␉␉␉␉␉␉␉<?(\S+?)>?␉␉␉# url = $2␊ |
␉␉␉␉␉␉␉(?:␊ |
␉␉␉␉␉␉␉ <(.+?)>␉␉␉# url = $2␊ |
␉␉␉␉␉␉␉|␊ |
␉␉␉␉␉␉␉ (\S+?)␉␉␉# url = $3␊ |
␉␉␉␉␉␉␉)␊ |
␉␉␉␉␉␉␉ [ ]*␊ |
␉␉␉␉␉␉␉ \n?␉␉␉␉# maybe one newline␊ |
␉␉␉␉␉␉␉ [ ]*␊ |
␉␉␉␉␉␉␉(?:␊ |
␉␉␉␉␉␉␉␉(?<=\s)␉␉␉# lookbehind for whitespace␊ |
␉␉␉␉␉␉␉␉["(]␊ |
␉␉␉␉␉␉␉␉(.*?)␉␉␉# title = $3␊ |
␉␉␉␉␉␉␉␉(.*?)␉␉␉# title = $4␊ |
␉␉␉␉␉␉␉␉[")]␊ |
␉␉␉␉␉␉␉␉[ ]*␊ |
␉␉␉␉␉␉␉)?␉# title is optional␊ |
|
␉}␊ |
␉function _stripLinkDefinitions_callback($matches) {␊ |
␉␉$link_id = strtolower($matches[1]);␊ |
␉␉$this->urls[$link_id] = $this->encodeAttribute($matches[2]);␊ |
␉␉if (isset($matches[3]))␊ |
␉␉␉$this->titles[$link_id] = $this->encodeAttribute($matches[3]);␊ |
␉␉$url = $matches[2] == '' ? $matches[3] : $matches[2];␊ |
␉␉$this->urls[$link_id] = $url;␊ |
␉␉$this->titles[$link_id] =& $matches[4];␊ |
␉␉return ''; # String that will replace the block␊ |
␉}␊ |
␊ |
|
␉␉# inline later.␊ |
␉␉# * List "b" is made of tags which are always block-level;␊ |
␉␉#␊ |
␉␉$block_tags_a = 'ins|del';␊ |
␉␉$block_tags_b = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|address|'.␊ |
␉␉␉␉␉␉'script|noscript|form|fieldset|iframe|math';␊ |
␉␉$block_tags_a_re = 'ins|del';␊ |
␉␉$block_tags_b_re = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|address|'.␊ |
␉␉␉␉␉␉ 'script|noscript|form|fieldset|iframe|math';␊ |
␊ |
␉␉# Regular expression for the content of a block tag.␊ |
␉␉$nested_tags_level = 4;␊ |
|
␉␉␉ # in between.␊ |
␉␉␉␉␉␊ |
␉␉␉␉␉␉[ ]{0,'.$less_than_tab.'}␊ |
␉␉␉␉␉␉<('.$block_tags_b.')# start tag = $2␊ |
␉␉␉␉␉␉<('.$block_tags_b_re.')# start tag = $2␊ |
␉␉␉␉␉␉'.$attr.'>␉␉␉# attributes followed by > and \n␊ |
␉␉␉␉␉␉'.$content.'␉␉# content, support nesting␊ |
␉␉␉␉␉␉</\2>␉␉␉␉# the matching end tag␊ |
|
␉␉␉| # Special version for tags of group a.␊ |
␊ |
␉␉␉␉␉␉[ ]{0,'.$less_than_tab.'}␊ |
␉␉␉␉␉␉<('.$block_tags_a.')# start tag = $3␊ |
␉␉␉␉␉␉<('.$block_tags_a_re.')# start tag = $3␊ |
␉␉␉␉␉␉'.$attr.'>[ ]*\n␉# attributes followed by >␊ |
␉␉␉␉␉␉'.$content2.'␉␉# content, support nesting␊ |
␉␉␉␉␉␉</\3>␉␉␉␉# the matching end tag␊ |
|
␉␉$text = preg_replace_callback('{␊ |
␉␉␉(␉␉␉␉␉# wrap whole match in $1␊ |
␉␉␉ \[␊ |
␉␉␉␉('.$this->nested_brackets.')␉# link text = $2␊ |
␉␉␉␉('.$this->nested_brackets_re.')␉# link text = $2␊ |
␉␉␉ \]␊ |
␊ |
␉␉␉ [ ]?␉␉␉␉# one optional space␊ |
|
␉␉$text = preg_replace_callback('{␊ |
␉␉␉(␉␉␉␉# wrap whole match in $1␊ |
␉␉␉ \[␊ |
␉␉␉␉('.$this->nested_brackets.')␉# link text = $2␊ |
␉␉␉␉('.$this->nested_brackets_re.')␉# link text = $2␊ |
␉␉␉ \]␊ |
␉␉␉ \(␉␉␉# literal paren␊ |
␉␉␉␉[ ]*␊ |
␉␉␉␉[ \n]*␊ |
␉␉␉␉(?:␊ |
␉␉␉␉␉<(\S*)>␉# href = $3␊ |
␉␉␉␉␉<(.+?)>␉# href = $3␊ |
␉␉␉␉|␊ |
␉␉␉␉␉('.$this->nested_url_parenthesis.')␉# href = $4␊ |
␉␉␉␉␉('.$this->nested_url_parenthesis_re.')␉# href = $4␊ |
␉␉␉␉)␊ |
␉␉␉␉[ ]*␊ |
␉␉␉␉[ \n]*␊ |
␉␉␉␉(␉␉␉# $5␊ |
␉␉␉␉ ([\'"])␉# quote char = $6␊ |
␉␉␉␉ (.*?)␉␉# Title = $7␊ |
␉␉␉␉ \6␉␉# matching quote␊ |
␉␉␉␉ [ ]*␉# ignore any spaces/tabs between closing quote and )␊ |
␉␉␉␉ [ \n]*␉# ignore any spaces/tabs between closing quote and )␊ |
␉␉␉␉)?␉␉␉# title is optional␊ |
␉␉␉ \)␊ |
␉␉␉)␊ |
␉␉␉}xs',␊ |
␉␉␉array(&$this, '_DoAnchors_inline_callback'), $text);␊ |
␉␉␉array(&$this, '_doAnchors_inline_callback'), $text);␊ |
␊ |
␉␉#␊ |
␉␉# Last, handle reference-style shortcuts: [link text]␊ |
␉␉# These must come last in case you've also got [link test][1]␊ |
␉␉# or [link test](/foo)␊ |
␉␉# These must come last in case you've also got [link text][1]␊ |
␉␉# or [link text](/foo)␊ |
␉␉#␊ |
//␉␉$text = preg_replace_callback('{␊ |
//␉␉␉(␉␉␉␉␉# wrap whole match in $1␊ |
//␉␉␉ \[␊ |
//␉␉␉␉([^\[\]]+)␉␉# link text = $2; can\'t contain [ or ]␊ |
//␉␉␉ \]␊ |
//␉␉␉)␊ |
//␉␉␉}xs',␊ |
//␉␉␉array(&$this, '_doAnchors_reference_callback'), $text);␊ |
␉␉$text = preg_replace_callback('{␊ |
␉␉␉(␉␉␉␉␉# wrap whole match in $1␊ |
␉␉␉ \[␊ |
␉␉␉␉([^\[\]]+)␉␉# link text = $2; can\'t contain [ or ]␊ |
␉␉␉ \]␊ |
␉␉␉)␊ |
␉␉␉}xs',␊ |
␉␉␉array(&$this, '_doAnchors_reference_callback'), $text);␊ |
␊ |
␉␉$this->in_anchor = false;␊ |
␉␉return $text;␊ |
|
␉␉$text = preg_replace_callback('{␊ |
␉␉␉(␉␉␉␉# wrap whole match in $1␊ |
␉␉␉ !\[␊ |
␉␉␉␉('.$this->nested_brackets.')␉␉# alt text = $2␊ |
␉␉␉␉('.$this->nested_brackets_re.')␉␉# alt text = $2␊ |
␉␉␉ \]␊ |
␊ |
␉␉␉ [ ]?␉␉␉␉# one optional space␊ |
|
␉␉$text = preg_replace_callback('{␊ |
␉␉␉(␉␉␉␉# wrap whole match in $1␊ |
␉␉␉ !\[␊ |
␉␉␉␉('.$this->nested_brackets.')␉␉# alt text = $2␊ |
␉␉␉␉('.$this->nested_brackets_re.')␉␉# alt text = $2␊ |
␉␉␉ \]␊ |
␉␉␉ \s?␉␉␉# One optional whitespace character␊ |
␉␉␉ \(␉␉␉# literal paren␊ |
␉␉␉␉[ ]*␊ |
␉␉␉␉[ \n]*␊ |
␉␉␉␉(?:␊ |
␉␉␉␉␉<(\S*)>␉# src url = $3␊ |
␉␉␉␉|␊ |
␉␉␉␉␉('.$this->nested_url_parenthesis.')␉# src url = $4␊ |
␉␉␉␉␉('.$this->nested_url_parenthesis_re.')␉# src url = $4␊ |
␉␉␉␉)␊ |
␉␉␉␉[ ]*␊ |
␉␉␉␉[ \n]*␊ |
␉␉␉␉(␉␉␉# $5␊ |
␉␉␉␉ ([\'"])␉# quote char = $6␊ |
␉␉␉␉ (.*?)␉␉# title = $7␊ |
␉␉␉␉ \6␉␉# matching quote␊ |
␉␉␉␉ [ ]*␊ |
␉␉␉␉ [ \n]*␊ |
␉␉␉␉)?␉␉␉# title is optional␊ |
␉␉␉ \)␊ |
␉␉␉)␊ |
|
␊ |
␉␉$alt_text = $this->encodeAttribute($alt_text);␊ |
␉␉if (isset($this->urls[$link_id])) {␊ |
␉␉␉$url = $this->urls[$link_id];␊ |
␉␉␉$url = $this->encodeAttribute($this->urls[$link_id]);␊ |
␉␉␉$result = "<img src=\"$url\" alt=\"$alt_text\"";␊ |
␉␉␉if (isset($this->titles[$link_id])) {␊ |
␉␉␉␉$title = $this->titles[$link_id];␊ |
␉␉␉␉$title = $this->encodeAttribute($title);␊ |
␉␉␉␉$result .= " title=\"$title\"";␊ |
␉␉␉}␊ |
␉␉␉$result .= $this->empty_element_suffix;␊ |
|
␉␉return $text;␊ |
␉}␊ |
␉function _doHeaders_callback_setext($matches) {␊ |
␉␉# Terrible hack to check we haven't found an empty list item.␊ |
␉␉if ($matches[2] == '-' && preg_match('{^-(?: |$)}', $matches[1]))␊ |
␉␉␉return $matches[0];␊ |
␉␉␊ |
␉␉$level = $matches[2]{0} == '=' ? 1 : 2;␊ |
␉␉$block = "<h$level>".$this->runSpanGamut($matches[1])."</h$level>";␊ |
␉␉return "\n" . $this->hashBlock($block) . "\n\n";␊ |
|
␉␉$less_than_tab = $this->tab_width - 1;␊ |
␊ |
␉␉# Re-usable patterns to match list item bullets and number markers:␊ |
␉␉$marker_ul = '[*+-]';␊ |
␉␉$marker_ol = '\d+[.]';␊ |
␉␉$marker_any = "(?:$marker_ul|$marker_ol)";␊ |
␉␉$marker_ul_re = '[*+-]';␊ |
␉␉$marker_ol_re = '\d+[.]';␊ |
␉␉$marker_any_re = "(?:$marker_ul_re|$marker_ol_re)";␊ |
␊ |
␉␉$markers = array($marker_ul, $marker_ol);␊ |
␉␉$markers_relist = array(␊ |
␉␉␉$marker_ul_re => $marker_ol_re,␊ |
␉␉␉$marker_ol_re => $marker_ul_re,␊ |
␉␉␉);␊ |
␊ |
␉␉foreach ($markers as $marker) {␊ |
␉␉foreach ($markers_relist as $marker_re => $other_marker_re) {␊ |
␉␉␉# Re-usable pattern to match any entirel ul or ol list:␊ |
␉␉␉$whole_list = '␊ |
␉␉␉$whole_list_re = '␊ |
␉␉␉␉(␉␉␉␉␉␉␉␉# $1 = whole list␊ |
␉␉␉␉ (␉␉␉␉␉␉␉␉# $2␊ |
␉␉␉␉␉[ ]{0,'.$less_than_tab.'}␊ |
␉␉␉␉␉('.$marker.')␉␉␉␉# $3 = first list item marker␊ |
␉␉␉␉␉([ ]{0,'.$less_than_tab.'})␉# $3 = number of spaces␊ |
␉␉␉␉␉('.$marker_re.')␉␉␉# $4 = first list item marker␊ |
␉␉␉␉␉[ ]+␊ |
␉␉␉␉ )␊ |
␉␉␉␉ (?s:.+?)␊ |
␉␉␉␉ (␉␉␉␉␉␉␉␉# $4␊ |
␉␉␉␉ (␉␉␉␉␉␉␉␉# $5␊ |
␉␉␉␉␉ \z␊ |
␉␉␉␉␉|␊ |
␉␉␉␉␉ \n{2,}␊ |
␉␉␉␉␉ (?=\S)␊ |
␉␉␉␉␉ (?!␉␉␉␉␉␉# Negative lookahead for another list item marker␊ |
␉␉␉␉␉␉[ ]*␊ |
␉␉␉␉␉␉'.$marker.'[ ]+␊ |
␉␉␉␉␉␉'.$marker_re.'[ ]+␊ |
␉␉␉␉␉ )␊ |
␉␉␉␉␉|␊ |
␉␉␉␉␉ (?=␉␉␉␉␉␉# Lookahead for another kind of list␊ |
␉␉␉␉␉ \n␊ |
␉␉␉␉␉␉\3␉␉␉␉␉␉# Must have the same indentation␊ |
␉␉␉␉␉␉'.$other_marker_re.'[ ]+␊ |
␉␉␉␉␉ )␊ |
␉␉␉␉ )␊ |
␉␉␉␉)␊ |
|
␉␉␉if ($this->list_level) {␊ |
␉␉␉␉$text = preg_replace_callback('{␊ |
␉␉␉␉␉␉^␊ |
␉␉␉␉␉␉'.$whole_list.'␊ |
␉␉␉␉␉␉'.$whole_list_re.'␊ |
␉␉␉␉␉}mx',␊ |
␉␉␉␉␉array(&$this, '_doLists_callback'), $text);␊ |
␉␉␉}␊ |
␉␉␉else {␊ |
␉␉␉␉$text = preg_replace_callback('{␊ |
␉␉␉␉␉␉(?:(?<=\n)\n|\A\n?) # Must eat the newline␊ |
␉␉␉␉␉␉'.$whole_list.'␊ |
␉␉␉␉␉␉'.$whole_list_re.'␊ |
␉␉␉␉␉}mx',␊ |
␉␉␉␉␉array(&$this, '_doLists_callback'), $text);␊ |
␉␉␉}␊ |
|
␉}␊ |
␉function _doLists_callback($matches) {␊ |
␉␉# Re-usable patterns to match list item bullets and number markers:␊ |
␉␉$marker_ul = '[*+-]';␊ |
␉␉$marker_ol = '\d+[.]';␊ |
␉␉$marker_any = "(?:$marker_ul|$marker_ol)";␊ |
␉␉$marker_ul_re = '[*+-]';␊ |
␉␉$marker_ol_re = '\d+[.]';␊ |
␉␉$marker_any_re = "(?:$marker_ul_re|$marker_ol_re)";␊ |
␉␉␊ |
␉␉$list = $matches[1];␊ |
␉␉$list_type = preg_match("/$marker_ul/", $matches[3]) ? "ul" : "ol";␊ |
␉␉$list_type = preg_match("/$marker_ul_re/", $matches[4]) ? "ul" : "ol";␊ |
␉␉␊ |
␉␉$marker_any = ( $list_type == "ul" ? $marker_ul : $marker_ol );␊ |
␉␉$marker_any_re = ( $list_type == "ul" ? $marker_ul_re : $marker_ol_re );␊ |
␉␉␊ |
␉␉$list .= "\n";␊ |
␉␉$result = $this->processListItems($list, $marker_any);␊ |
␉␉$result = $this->processListItems($list, $marker_any_re);␊ |
␉␉␊ |
␉␉$result = $this->hashBlock("<$list_type>\n" . $result . "</$list_type>");␊ |
␉␉return "\n". $result ."\n\n";␊ |
|
␊ |
␉var $list_level = 0;␊ |
␊ |
␉function processListItems($list_str, $marker_any) {␊ |
␉function processListItems($list_str, $marker_any_re) {␊ |
␉#␊ |
␉#␉Process the contents of a single ordered or unordered list, splitting it␊ |
␉#␉into individual list items.␊ |
|
␊ |
␉␉$list_str = preg_replace_callback('{␊ |
␉␉␉(\n)?␉␉␉␉␉␉␉# leading line = $1␊ |
␉␉␉(^[ ]*)␉␉␉␉␉␉# leading whitespace = $2␊ |
␉␉␉('.$marker_any.' [ ]+)␉␉# list marker and space = $3␊ |
␉␉␉((?s:.+?))␉␉␉␉␉␉# list item text = $4␊ |
␉␉␉(^[ ]*)␉␉␉␉␉␉␉# leading whitespace = $2␊ |
␉␉␉('.$marker_any_re.'␉␉␉␉# list marker and space = $3␊ |
␉␉␉␉(?:[ ]+|(?=\n))␉# space only required if item is not empty␊ |
␉␉␉)␊ |
␉␉␉((?s:.*?))␉␉␉␉␉␉# list item text = $4␊ |
␉␉␉(?:(\n+(?=\n))|\n)␉␉␉␉# tailing blank line = $5␊ |
␉␉␉(?= \n* (\z | \2 ('.$marker_any.') [ ]+))␊ |
␉␉␉(?= \n* (\z | \2 ('.$marker_any_re.') (?:[ ]+|(?=\n))))␊ |
␉␉␉}xm',␊ |
␉␉␉array(&$this, '_processListItems_callback'), $list_str);␊ |
␊ |
|
␉}␊ |
␊ |
␊ |
␉function doItalicsAndBold($text) {␊ |
␉␉# <strong> must go first:␊ |
␉␉$text = preg_replace_callback('{␊ |
␉␉␉␉(␉␉␉␉␉␉# $1: Marker␊ |
␉␉␉␉␉(?<!\*\*) \* |␉␉# (not preceded by two chars of␊ |
␉␉␉␉␉(?<!__) _␉␉␉# the same marker)␊ |
␉␉␉␉)␊ |
␉␉␉␉\1␊ |
␉␉␉␉(?=\S) ␉␉␉␉␉# Not followed by whitespace ␊ |
␉␉␉␉(?!\1\1)␉␉␉␉# or two others marker chars.␊ |
␉␉␉␉(␉␉␉␉␉␉# $2: Content␊ |
␉␉␉␉␉(?>␊ |
␉␉␉␉␉␉[^*_]+?␉␉␉# Anthing not em markers.␊ |
␉␉␉␉␉|␊ |
␉␉␉␉␉␉␉␉␉␉# Balence any regular emphasis inside.␊ |
␉␉␉␉␉␉\1 (?=\S) .+? (?<=\S) \1␊ |
␉␉␉␉␉|␊ |
␉␉␉␉␉␉.␉␉␉␉# Allow unbalenced * and _.␊ |
␉␉␉␉␉)+?␊ |
␉␉␉␉)␊ |
␉␉␉␉(?<=\S) \1\1␉␉␉# End mark not preceded by whitespace.␊ |
␉␉␉}sx',␊ |
␉␉␉array(&$this, '_doItalicAndBold_strong_callback'), $text);␊ |
␉␉# Then <em>:␊ |
␉␉$text = preg_replace_callback(␊ |
␉␉␉'{ ( (?<!\*)\* | (?<!_)_ ) (?=\S) (?! \1) (.+?) (?<=\S)(?<!\s(?=\1).) \1 }sx',␊ |
␉␉␉array(&$this, '_doItalicAndBold_em_callback'), $text);␊ |
␊ |
␉␉return $text;␊ |
␉}␊ |
␉function _doItalicAndBold_em_callback($matches) {␊ |
␉␉$text = $matches[2];␊ |
␉␉$text = $this->runSpanGamut($text);␊ |
␉␉return $this->hashPart("<em>$text</em>");␊ |
␉var $em_relist = array(␊ |
␉␉'' => '(?:(?<!\*)\*(?!\*)|(?<!_)_(?!_))(?=\S|$)(?![.,:;]\s)',␊ |
␉␉'*' => '(?<=\S|^)(?<!\*)\*(?!\*)',␊ |
␉␉'_' => '(?<=\S|^)(?<!_)_(?!_)',␊ |
␉␉);␊ |
␉var $strong_relist = array(␊ |
␉␉'' => '(?:(?<!\*)\*\*(?!\*)|(?<!_)__(?!_))(?=\S|$)(?![.,:;]\s)',␊ |
␉␉'**' => '(?<=\S|^)(?<!\*)\*\*(?!\*)',␊ |
␉␉'__' => '(?<=\S|^)(?<!_)__(?!_)',␊ |
␉␉);␊ |
␉var $em_strong_relist = array(␊ |
␉␉'' => '(?:(?<!\*)\*\*\*(?!\*)|(?<!_)___(?!_))(?=\S|$)(?![.,:;]\s)',␊ |
␉␉'***' => '(?<=\S|^)(?<!\*)\*\*\*(?!\*)',␊ |
␉␉'___' => '(?<=\S|^)(?<!_)___(?!_)',␊ |
␉␉);␊ |
␉var $em_strong_prepared_relist;␊ |
␉␊ |
␉function prepareItalicsAndBold() {␊ |
␉#␊ |
␉# Prepare regular expressions for searching emphasis tokens in any␊ |
␉# context.␊ |
␉#␊ |
␉␉foreach ($this->em_relist as $em => $em_re) {␊ |
␉␉␉foreach ($this->strong_relist as $strong => $strong_re) {␊ |
␉␉␉␉# Construct list of allowed token expressions.␊ |
␉␉␉␉$token_relist = array();␊ |
␉␉␉␉if (isset($this->em_strong_relist["$em$strong"])) {␊ |
␉␉␉␉␉$token_relist[] = $this->em_strong_relist["$em$strong"];␊ |
␉␉␉␉}␊ |
␉␉␉␉$token_relist[] = $em_re;␊ |
␉␉␉␉$token_relist[] = $strong_re;␊ |
␉␉␉␉␊ |
␉␉␉␉# Construct master expression from list.␊ |
␉␉␉␉$token_re = '{('. implode('|', $token_relist) .')}';␊ |
␉␉␉␉$this->em_strong_prepared_relist["$em$strong"] = $token_re;␊ |
␉␉␉}␊ |
␉␉}␊ |
␉}␊ |
␉function _doItalicAndBold_strong_callback($matches) {␊ |
␉␉$text = $matches[2];␊ |
␉␉$text = $this->runSpanGamut($text);␊ |
␉␉return $this->hashPart("<strong>$text</strong>");␊ |
␉␊ |
␉function doItalicsAndBold($text) {␊ |
␉␉$token_stack = array('');␊ |
␉␉$text_stack = array('');␊ |
␉␉$em = '';␊ |
␉␉$strong = '';␊ |
␉␉$tree_char_em = false;␊ |
␉␉␊ |
␉␉while (1) {␊ |
␉␉␉#␊ |
␉␉␉# Get prepared regular expression for seraching emphasis tokens␊ |
␉␉␉# in current context.␊ |
␉␉␉#␊ |
␉␉␉$token_re = $this->em_strong_prepared_relist["$em$strong"];␊ |
␉␉␉␊ |
␉␉␉#␊ |
␉␉␉# Each loop iteration search for the next emphasis token. ␊ |
␉␉␉# Each token is then passed to handleSpanToken.␊ |
␉␉␉#␊ |
␉␉␉$parts = preg_split($token_re, $text, 2, PREG_SPLIT_DELIM_CAPTURE);␊ |
␉␉␉$text_stack[0] .= $parts[0];␊ |
␉␉␉$token =& $parts[1];␊ |
␉␉␉$text =& $parts[2];␊ |
␉␉␉␊ |
␉␉␉if (empty($token)) {␊ |
␉␉␉␉# Reached end of text span: empty stack without emitting.␊ |
␉␉␉␉# any more emphasis.␊ |
␉␉␉␉while ($token_stack[0]) {␊ |
␉␉␉␉␉$text_stack[1] .= array_shift($token_stack);␊ |
␉␉␉␉␉$text_stack[0] .= array_shift($text_stack);␊ |
␉␉␉␉}␊ |
␉␉␉␉break;␊ |
␉␉␉}␊ |
␉␉␉␊ |
␉␉␉$token_len = strlen($token);␊ |
␉␉␉if ($tree_char_em) {␊ |
␉␉␉␉# Reached closing marker while inside a three-char emphasis.␊ |
␉␉␉␉if ($token_len == 3) {␊ |
␉␉␉␉␉# Three-char closing marker, close em and strong.␊ |
␉␉␉␉␉array_shift($token_stack);␊ |
␉␉␉␉␉$span = array_shift($text_stack);␊ |
␉␉␉␉␉$span = $this->runSpanGamut($span);␊ |
␉␉␉␉␉$span = "<strong><em>$span</em></strong>";␊ |
␉␉␉␉␉$text_stack[0] .= $this->hashPart($span);␊ |
␉␉␉␉␉$em = '';␊ |
␉␉␉␉␉$strong = '';␊ |
␉␉␉␉} else {␊ |
␉␉␉␉␉# Other closing marker: close one em or strong and␊ |
␉␉␉␉␉# change current token state to match the other␊ |
␉␉␉␉␉$token_stack[0] = str_repeat($token{0}, 3-$token_len);␊ |
␉␉␉␉␉$tag = $token_len == 2 ? "strong" : "em";␊ |
␉␉␉␉␉$span = $text_stack[0];␊ |
␉␉␉␉␉$span = $this->runSpanGamut($span);␊ |
␉␉␉␉␉$span = "<$tag>$span</$tag>";␊ |
␉␉␉␉␉$text_stack[0] = $this->hashPart($span);␊ |
␉␉␉␉␉$$tag = ''; # $$tag stands for $em or $strong␊ |
␉␉␉␉}␊ |
␉␉␉␉$tree_char_em = false;␊ |
␉␉␉} else if ($token_len == 3) {␊ |
␉␉␉␉if ($em) {␊ |
␉␉␉␉␉# Reached closing marker for both em and strong.␊ |
␉␉␉␉␉# Closing strong marker:␊ |
␉␉␉␉␉for ($i = 0; $i < 2; ++$i) {␊ |
␉␉␉␉␉␉$shifted_token = array_shift($token_stack);␊ |
␉␉␉␉␉␉$tag = strlen($shifted_token) == 2 ? "strong" : "em";␊ |
␉␉␉␉␉␉$span = array_shift($text_stack);␊ |
␉␉␉␉␉␉$span = $this->runSpanGamut($span);␊ |
␉␉␉␉␉␉$span = "<$tag>$span</$tag>";␊ |
␉␉␉␉␉␉$text_stack[0] .= $this->hashPart($span);␊ |
␉␉␉␉␉␉$$tag = ''; # $$tag stands for $em or $strong␊ |
␉␉␉␉␉}␊ |
␉␉␉␉} else {␊ |
␉␉␉␉␉# Reached opening three-char emphasis marker. Push on token ␊ |
␉␉␉␉␉# stack; will be handled by the special condition above.␊ |
␉␉␉␉␉$em = $token{0};␊ |
␉␉␉␉␉$strong = "$em$em";␊ |
␉␉␉␉␉array_unshift($token_stack, $token);␊ |
␉␉␉␉␉array_unshift($text_stack, '');␊ |
␉␉␉␉␉$tree_char_em = true;␊ |
␉␉␉␉}␊ |
␉␉␉} else if ($token_len == 2) {␊ |
␉␉␉␉if ($strong) {␊ |
␉␉␉␉␉# Unwind any dangling emphasis marker:␊ |
␉␉␉␉␉if (strlen($token_stack[0]) == 1) {␊ |
␉␉␉␉␉␉$text_stack[1] .= array_shift($token_stack);␊ |
␉␉␉␉␉␉$text_stack[0] .= array_shift($text_stack);␊ |
␉␉␉␉␉}␊ |
␉␉␉␉␉# Closing strong marker:␊ |
␉␉␉␉␉array_shift($token_stack);␊ |
␉␉␉␉␉$span = array_shift($text_stack);␊ |
␉␉␉␉␉$span = $this->runSpanGamut($span);␊ |
␉␉␉␉␉$span = "<strong>$span</strong>";␊ |
␉␉␉␉␉$text_stack[0] .= $this->hashPart($span);␊ |
␉␉␉␉␉$strong = '';␊ |
␉␉␉␉} else {␊ |
␉␉␉␉␉array_unshift($token_stack, $token);␊ |
␉␉␉␉␉array_unshift($text_stack, '');␊ |
␉␉␉␉␉$strong = $token;␊ |
␉␉␉␉}␊ |
␉␉␉} else {␊ |
␉␉␉␉# Here $token_len == 1␊ |
␉␉␉␉if ($em) {␊ |
␉␉␉␉␉if (strlen($token_stack[0]) == 1) {␊ |
␉␉␉␉␉␉# Closing emphasis marker:␊ |
␉␉␉␉␉␉array_shift($token_stack);␊ |
␉␉␉␉␉␉$span = array_shift($text_stack);␊ |
␉␉␉␉␉␉$span = $this->runSpanGamut($span);␊ |
␉␉␉␉␉␉$span = "<em>$span</em>";␊ |
␉␉␉␉␉␉$text_stack[0] .= $this->hashPart($span);␊ |
␉␉␉␉␉␉$em = '';␊ |
␉␉␉␉␉} else {␊ |
␉␉␉␉␉␉$text_stack[0] .= $token;␊ |
␉␉␉␉␉}␊ |
␉␉␉␉} else {␊ |
␉␉␉␉␉array_unshift($token_stack, $token);␊ |
␉␉␉␉␉array_unshift($text_stack, '');␊ |
␉␉␉␉␉$em = $token;␊ |
␉␉␉␉}␊ |
␉␉␉}␊ |
␉␉}␊ |
␉␉return $text_stack[0];␊ |
␉}␊ |
␊ |
␊ |
|
␉␉# These leading spaces cause problem with <pre> content, ␊ |
␉␉# so we need to fix that:␊ |
␉␉$bq = preg_replace_callback('{(\s*<pre>.+?</pre>)}sx', ␊ |
␉␉␉array(&$this, '_DoBlockQuotes_callback2'), $bq);␊ |
␉␉␉array(&$this, '_doBlockQuotes_callback2'), $bq);␊ |
␊ |
␉␉return "\n". $this->hashBlock("<blockquote>\n$bq\n</blockquote>")."\n\n";␊ |
␉}␊ |
|
␊ |
␉function encodeAttribute($text) {␊ |
␉#␊ |
␉# Encode text for a double-quoted HTML attribute.␊ |
␉# Encode text for a double-quoted HTML attribute. This function␊ |
␉# is *not* suitable for attributes enclosed in single quotes.␊ |
␉#␊ |
␉␉$text = $this->encodeAmpsAndAngles($text);␊ |
␉␉$text = str_replace('"', '"', $text);␊ |
|
␉␊ |
␉␊ |
␉function encodeAmpsAndAngles($text) {␊ |
␉# Smart processing for ampersands and angle brackets that need to be encoded.␊ |
␉#␊ |
␉# Smart processing for ampersands and angle brackets that need to ␊ |
␉# be encoded. Valid character entities are left alone unless the␊ |
␉# no-entities mode is set.␊ |
␉#␊ |
␉␉if ($this->no_entities) {␊ |
␉␉␉$text = str_replace('&', '&', $text);␊ |
␉␉␉$text = str_replace('<', '<', $text);␊ |
␉␉␉return $text;␊ |
␉␉} else {␊ |
␉␉␉# Ampersand-encoding based entirely on Nat Irons's Amputator␊ |
␉␉␉# MT plugin: <http://bumppo.net/projects/amputator/>␊ |
␉␉␉$text = preg_replace('/&(?!#?[xX]?(?:[0-9a-fA-F]+|\w+);)/', ␊ |
␉␉␉␉␉␉␉␉'&', $text);;␊ |
␉␉}␊ |
␊ |
␉␉# Ampersand-encoding based entirely on Nat Irons's Amputator MT plugin:␊ |
␉␉# http://bumppo.net/projects/amputator/␊ |
␉␉$text = preg_replace('/&(?!#?[xX]?(?:[0-9a-fA-F]+|\w+);)/', ␊ |
␉␉␉␉␉␉␉ '&', $text);;␊ |
␊ |
␉␉# Encode naked <'s␊ |
␉␉$text = preg_replace('{<(?![a-z/?\$!%])}i', '<', $text);␊ |
␉␉# Encode remaining <'s␊ |
␉␉$text = str_replace('<', '<', $text);␊ |
␊ |
␉␉return $text;␊ |
␉}␊ |
|
␉␉␉<␊ |
␉␉␉(?:mailto:)?␊ |
␉␉␉(␊ |
␉␉␉␉[-.\w\x80-\xFF]+␊ |
␉␉␉␉(?:␊ |
␉␉␉␉␉[-!#$%&\'*+/=?^_`.{|}~\w\x80-\xFF]+␊ |
␉␉␉␉|␊ |
␉␉␉␉␉".*?"␊ |
␉␉␉␉)␊ |
␉␉␉␉\@␊ |
␉␉␉␉[-a-z0-9\x80-\xFF]+(\.[-a-z0-9\x80-\xFF]+)*\.[a-z]+␊ |
␉␉␉␉(?:␊ |
␉␉␉␉␉[-a-z0-9\x80-\xFF]+(\.[-a-z0-9\x80-\xFF]+)*\.[a-z]+␊ |
␉␉␉␉|␊ |
␉␉␉␉␉\[[\d.a-fA-F:]+\]␉# IPv4 & IPv6␊ |
␉␉␉␉)␊ |
␉␉␉)␊ |
␉␉␉>␊ |
␉␉␉}xi',␊ |
|
␉#␊ |
␉␉$output = '';␊ |
␉␉␊ |
␉␉$regex = '{␊ |
␉␉$span_re = '{␊ |
␉␉␉␉(␊ |
␉␉␉␉␉\\\\['.preg_quote($this->escape_chars).']␊ |
␉␉␉␉␉\\\\'.$this->escape_chars_re.'␊ |
␉␉␉␉|␊ |
␉␉␉␉␉(?<![`\\\\])␊ |
␉␉␉␉␉`+␉␉␉␉␉␉# code span marker␊ |
|
␉␉␉␉|␊ |
␉␉␉␉␉<\?.*?\?> | <%.*?%>␉␉# processing instruction␊ |
␉␉␉␉|␊ |
␉␉␉␉␉<[/!$]?[-a-zA-Z0-9:]+␉# regular tags␊ |
␉␉␉␉␉<[/!$]?[-a-zA-Z0-9:_]+␉# regular tags␊ |
␉␉␉␉␉(?>␊ |
␉␉␉␉␉␉\s␊ |
␉␉␉␉␉␉(?>[^"\'>]+|"[^"]*"|\'[^\']*\')*␊ |
|
␉␉␉# openning code span marker, or the next escaped character. ␊ |
␉␉␉# Each token is then passed to handleSpanToken.␊ |
␉␉␉#␊ |
␉␉␉$parts = preg_split($regex, $str, 2, PREG_SPLIT_DELIM_CAPTURE);␊ |
␉␉␉$parts = preg_split($span_re, $str, 2, PREG_SPLIT_DELIM_CAPTURE);␊ |
␉␉␉␊ |
␉␉␉# Create token from text preceding tag.␊ |
␉␉␉if ($parts[0] != "") {␊ |
|
␉␉␉␉return $this->hashPart("&#". ord($token{1}). ";");␊ |
␉␉␉case "`":␊ |
␉␉␉␉# Search for end marker in remaining text.␊ |
␉␉␉␉if (preg_match('/^(.*?[^`])'.$token.'(?!`)(.*)$/sm', ␊ |
␉␉␉␉if (preg_match('/^(.*?[^`])'.preg_quote($token).'(?!`)(.*)$/sm', ␊ |
␉␉␉␉␉$str, $matches))␊ |
␉␉␉␉{␊ |
␉␉␉␉␉$str = $matches[2];␊ |
|
␉␉# Insert extra document, block, and span transformations. ␊ |
␉␉# Parent constructor will do the sorting.␊ |
␉␉$this->document_gamut += array(␊ |
␉␉␉"doFencedCodeBlocks" => 5,␊ |
␉␉␉"stripFootnotes" => 15,␊ |
␉␉␉"stripAbbreviations" => 25,␊ |
␉␉␉"appendFootnotes" => 50,␊ |
|
␉}␊ |
␉␊ |
␉␊ |
␉# Extra hashes used during extra transformations.␊ |
␉# Extra variables used during extra transformations.␊ |
␉var $footnotes = array();␊ |
␉var $footnotes_ordered = array();␊ |
␉var $abbr_desciptions = array();␊ |
␉var $abbr_matches = array();␊ |
␉var $abbr_word_re = '';␊ |
␉␊ |
␉# Give the current footnote number.␊ |
␉var $footnote_counter = 1;␊ |
|
␉␉$this->footnotes = array();␊ |
␉␉$this->footnotes_ordered = array();␊ |
␉␉$this->abbr_desciptions = array();␊ |
␉␉$this->abbr_matches = array();␊ |
␉␉$this->abbr_word_re = '';␊ |
␉␉$this->footnote_counter = 1;␊ |
␉␉␊ |
␉␉foreach ($this->predef_abbr as $abbr_word => $abbr_desc) {␊ |
␉␉␉$this->abbr_matches[] = preg_quote($abbr_word);␊ |
␉␉␉if ($this->abbr_word_re)␊ |
␉␉␉␉$this->abbr_word_re .= '|';␊ |
␉␉␉$this->abbr_word_re .= preg_quote($abbr_word);␊ |
␉␉␉$this->abbr_desciptions[$abbr_word] = trim($abbr_desc);␊ |
␉␉}␊ |
␉}␊ |
|
␉␉$this->footnotes = array();␊ |
␉␉$this->footnotes_ordered = array();␊ |
␉␉$this->abbr_desciptions = array();␊ |
␉␉$this->abbr_matches = array();␊ |
␉␉$this->abbr_word_re = '';␊ |
␉␉␊ |
␉␉parent::teardown();␊ |
␉}␊ |
|
␉### HTML Block Parser ###␊ |
␉␊ |
␉# Tags that are always treated as block tags:␊ |
␉var $block_tags = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|address|form|fieldset|iframe|hr|legend';␊ |
␉var $block_tags_re = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|address|form|fieldset|iframe|hr|legend';␊ |
␉␊ |
␉# Tags treated as block tags only if the opening tag is alone on it's line:␊ |
␉var $context_block_tags = 'script|noscript|math|ins|del';␊ |
␉var $context_block_tags_re = 'script|noscript|math|ins|del';␊ |
␉␊ |
␉# Tags where markdown="1" default to span mode:␊ |
␉var $contain_span_tags = 'p|h[1-6]|li|dd|dt|td|th|legend|address';␊ |
␉var $contain_span_tags_re = 'p|h[1-6]|li|dd|dt|td|th|legend|address';␊ |
␉␊ |
␉# Tags which must not have their contents modified, no matter where ␊ |
␉# they appear:␊ |
␉var $clean_tags = 'script|math';␊ |
␉var $clean_tags_re = 'script|math';␊ |
␉␊ |
␉# Tags that do not need to be closed.␊ |
␉var $auto_close_tags = 'hr|img';␊ |
␉var $auto_close_tags_re = 'hr|img';␊ |
␉␊ |
␊ |
␉function hashHTMLBlocks($text) {␊ |
|
␉␉return $text;␊ |
␉}␊ |
␉function _hashHTMLBlocks_inMarkdown($text, $indent = 0, ␊ |
␉␉␉␉␉␉␉␉␉␉$enclosing_tag = '', $span = false)␊ |
␉␉␉␉␉␉␉␉␉␉$enclosing_tag_re = '', $span = false)␊ |
␉{␊ |
␉#␊ |
␉# Parse markdown text, calling _HashHTMLBlocks_InHTML for block tags.␊ |
|
␉# If you don't like this, just don't indent the tag on which␊ |
␉# you apply the markdown="1" attribute.␊ |
␉#␊ |
␉# * If $enclosing_tag is not empty, stops at the first unmatched closing ␊ |
␉# * If $enclosing_tag_re is not empty, stops at the first unmatched closing ␊ |
␉# tag with that name. Nested tags supported.␊ |
␉#␊ |
␉# * If $span is true, text inside must treated as span. So any double ␊ |
|
␉␉if ($text === '') return array('', '');␊ |
␊ |
␉␉# Regex to check for the presense of newlines around a block tag.␊ |
␉␉$newline_match_before = '/(?:^\n?|\n\n)*$/';␊ |
␉␉$newline_match_after = ␊ |
␉␉$newline_before_re = '/(?:^\n?|\n\n)*$/';␊ |
␉␉$newline_after_re = ␊ |
␉␉␉'{␊ |
␉␉␉␉^␉␉␉␉␉␉# Start of text following the tag.␊ |
␉␉␉␉(?>[ ]*<!--.*?-->)?␉␉# Optional comment.␊ |
|
␉␉␉}xs';␊ |
␉␉␊ |
␉␉# Regex to match any tag.␊ |
␉␉$block_tag_match =␊ |
␉␉$block_tag_re =␊ |
␉␉␉'{␊ |
␉␉␉␉(␉␉␉␉␉# $2: Capture hole tag.␊ |
␉␉␉␉␉</?␉␉␉␉␉# Any opening or closing tag.␊ |
␉␉␉␉␉␉(?>␉␉␉␉# Tag name.␊ |
␉␉␉␉␉␉␉'.$this->block_tags.'␉␉␉|␊ |
␉␉␉␉␉␉␉'.$this->context_block_tags.'␉|␊ |
␉␉␉␉␉␉␉'.$this->clean_tags.' ␉|␊ |
␉␉␉␉␉␉␉(?!\s)'.$enclosing_tag.'␊ |
␉␉␉␉␉␉␉'.$this->block_tags_re.'␉␉␉|␊ |
␉␉␉␉␉␉␉'.$this->context_block_tags_re.'␉|␊ |
␉␉␉␉␉␉␉'.$this->clean_tags_re.' ␉|␊ |
␉␉␉␉␉␉␉(?!\s)'.$enclosing_tag_re.'␊ |
␉␉␉␉␉␉)␊ |
␉␉␉␉␉␉(?:␊ |
␉␉␉␉␉␉␉(?=[\s"\'/])␉␉# Allowed characters after tag name.␊ |
␉␉␉␉␉␉␉(?=[\s"\'/a-zA-Z0-9])␉# Allowed characters after tag name.␊ |
␉␉␉␉␉␉␉(?>␊ |
␉␉␉␉␉␉␉␉".*?"␉␉|␉# Double quotes (can contain `>`)␊ |
␉␉␉␉␉␉␉␉\'.*?\' ␉|␉# Single quotes (can contain `>`)␊ |
|
␉␉␉␉'. ( !$span ? ' # If not in span.␊ |
␉␉␉␉|␊ |
␉␉␉␉␉# Indented code block␊ |
␉␉␉␉␉(?> ^[ ]*\n? | \n[ ]*\n )␊ |
␉␉␉␉␉(?: ^[ ]*\n | ^ | \n[ ]*\n )␊ |
␉␉␉␉␉[ ]{'.($indent+4).'}[^\n]* \n␊ |
␉␉␉␉␉(?>␊ |
␉␉␉␉␉␉(?: [ ]{'.($indent+4).'}[^\n]* | [ ]* ) \n␊ |
|
␉␉␉# pattern will be at the end, and between will be any catches made ␊ |
␉␉␉# by the pattern.␊ |
␉␉␉#␊ |
␉␉␉$parts = preg_split($block_tag_match, $text, 2, ␊ |
␉␉␉$parts = preg_split($block_tag_re, $text, 2, ␊ |
␉␉␉␉␉␉␉␉PREG_SPLIT_DELIM_CAPTURE);␊ |
␉␉␉␊ |
␉␉␉# If in Markdown span mode, add a empty-string span-level hash ␊ |
|
␉␉␉␊ |
␉␉␉$tag = $parts[1]; # Tag to handle.␊ |
␉␉␉$text = $parts[2]; # Remaining text after current tag.␊ |
␉␉␉$tag_re = preg_quote($tag); # For use in a regular expression.␊ |
␉␉␉␊ |
␉␉␉#␊ |
␉␉␉# Check for: Code span marker␊ |
␉␉␉#␊ |
␉␉␉if ($tag{0} == "`") {␊ |
␉␉␉␉# Find corresponding end marker.␊ |
␉␉␉␉if (preg_match('{^(?>.+?|\n(?!\n))*?(?<!`)'.$tag.'(?!`)}', ␊ |
␉␉␉␉$tag_re = preg_quote($tag);␊ |
␉␉␉␉if (preg_match('{^(?>.+?|\n(?!\n))*?(?<!`)'.$tag_re.'(?!`)}',␊ |
␉␉␉␉␉$text, $matches))␊ |
␉␉␉␉{␊ |
␉␉␉␉␉# End marker found: pass text unchanged until marker.␊ |
|
␉␉␉␉}␊ |
␉␉␉}␊ |
␉␉␉#␊ |
␉␉␉# Check for: Indented code block or fenced code block marker.␊ |
␉␉␉# Check for: Indented code block.␊ |
␉␉␉#␊ |
␉␉␉else if ($tag{0} == "\n" || $tag{0} == "~") {␊ |
␉␉␉␉if ($tag{1} == "\n" || $tag{1} == " ") {␊ |
␉␉␉␉␉# Indented code block: pass it unchanged, will be handled ␊ |
␉␉␉␉␉# later.␊ |
␉␉␉␉␉$parsed .= $tag;␊ |
␉␉␉else if ($tag{0} == "\n" || $tag{0} == " ") {␊ |
␉␉␉␉# Indented code block: pass it unchanged, will be handled ␊ |
␉␉␉␉# later.␊ |
␉␉␉␉$parsed .= $tag;␊ |
␉␉␉}␊ |
␉␉␉#␊ |
␉␉␉# Check for: Fenced code block marker.␊ |
␉␉␉#␊ |
␉␉␉else if ($tag{0} == "~") {␊ |
␉␉␉␉# Fenced code block marker: find matching end marker.␊ |
␉␉␉␉$tag_re = preg_quote(trim($tag));␊ |
␉␉␉␉if (preg_match('{^(?>.*\n)+?'.$tag_re.' *\n}', $text, ␊ |
␉␉␉␉␉$matches)) ␊ |
␉␉␉␉{␊ |
␉␉␉␉␉# End marker found: pass text unchanged until marker.␊ |
␉␉␉␉␉$parsed .= $tag . $matches[0];␊ |
␉␉␉␉␉$text = substr($text, strlen($matches[0]));␊ |
␉␉␉␉}␊ |
␉␉␉␉else {␊ |
␉␉␉␉␉# Fenced code block marker: find matching end marker.␊ |
␉␉␉␉␉if (preg_match('{^(?>.*\n)+?'.trim($tag).' *\n}', $text, ␊ |
␉␉␉␉␉␉$matches)) ␊ |
␉␉␉␉␉{␊ |
␉␉␉␉␉␉# End marker found: pass text unchanged until marker.␊ |
␉␉␉␉␉␉$parsed .= $tag . $matches[0];␊ |
␉␉␉␉␉␉$text = substr($text, strlen($matches[0]));␊ |
␉␉␉␉␉}␊ |
␉␉␉␉␉else {␊ |
␉␉␉␉␉␉# No end marker: just skip it.␊ |
␉␉␉␉␉␉$parsed .= $tag;␊ |
␉␉␉␉␉}␊ |
␉␉␉␉␉# No end marker: just skip it.␊ |
␉␉␉␉␉$parsed .= $tag;␊ |
␉␉␉␉}␊ |
␉␉␉}␊ |
␉␉␉#␊ |
␉␉␉# Check for: Opening Block level tag or␊ |
␉␉␉# Opening Content Block tag (like ins and del) ␊ |
␉␉␉# Opening Context Block tag (like ins and del) ␊ |
␉␉␉# used as a block tag (tag is alone on it's line).␊ |
␉␉␉#␊ |
␉␉␉else if (preg_match("{^<(?:$this->block_tags)\b}", $tag) ||␊ |
␉␉␉␉(␉preg_match("{^<(?:$this->context_block_tags)\b}", $tag) &&␊ |
␉␉␉␉␉preg_match($newline_match_before, $parsed) &&␊ |
␉␉␉␉␉preg_match($newline_match_after, $text)␉)␊ |
␉␉␉else if (preg_match('{^<(?:'.$this->block_tags_re.')\b}', $tag) ||␊ |
␉␉␉␉(␉preg_match('{^<(?:'.$this->context_block_tags_re.')\b}', $tag) &&␊ |
␉␉␉␉␉preg_match($newline_before_re, $parsed) &&␊ |
␉␉␉␉␉preg_match($newline_after_re, $text)␉)␊ |
␉␉␉␉)␊ |
␉␉␉{␊ |
␉␉␉␉# Need to parse tag and following text using the HTML parser.␊ |
|
␉␉␉# Check for: Clean tag (like script, math)␊ |
␉␉␉# HTML Comments, processing instructions.␊ |
␉␉␉#␊ |
␉␉␉else if (preg_match("{^<(?:$this->clean_tags)\b}", $tag) ||␊ |
␉␉␉else if (preg_match('{^<(?:'.$this->clean_tags_re.')\b}', $tag) ||␊ |
␉␉␉␉$tag{1} == '!' || $tag{1} == '?')␊ |
␉␉␉{␊ |
␉␉␉␉# Need to parse tag and following text using the HTML parser.␊ |
|
␉␉␉#␊ |
␉␉␉# Check for: Tag with same name as enclosing tag.␊ |
␉␉␉#␊ |
␉␉␉else if ($enclosing_tag !== '' &&␊ |
␉␉␉else if ($enclosing_tag_re !== '' &&␊ |
␉␉␉␉# Same name as enclosing tag.␊ |
␉␉␉␉preg_match("{^</?(?:$enclosing_tag)\b}", $tag))␊ |
␉␉␉␉preg_match('{^</?(?:'.$enclosing_tag_re.')\b}', $tag))␊ |
␉␉␉{␊ |
␉␉␉␉#␊ |
␉␉␉␉# Increase/decrease nested tag count.␊ |
|
␉␉if ($text === '') return array('', '');␊ |
␉␉␊ |
␉␉# Regex to match `markdown` attribute inside of a tag.␊ |
␉␉$markdown_attr_match = '␊ |
␉␉$markdown_attr_re = '␊ |
␉␉␉{␊ |
␉␉␉␉\s*␉␉␉# Eat whitespace before the `markdown` attribute␊ |
␉␉␉␉markdown␊ |
|
␉␉␉}xs';␊ |
␉␉␊ |
␉␉# Regex to match any tag.␊ |
␉␉$tag_match = '{␊ |
␉␉$tag_re = '{␊ |
␉␉␉␉(␉␉␉␉␉# $2: Capture hole tag.␊ |
␉␉␉␉␉</?␉␉␉␉␉# Any opening or closing tag.␊ |
␉␉␉␉␉␉[\w:$]+␉␉␉# Tag name.␊ |
␉␉␉␉␉␉(?:␊ |
␉␉␉␉␉␉␉(?=[\s"\'/])␉␉# Allowed characters after tag name.␊ |
␉␉␉␉␉␉␉(?=[\s"\'/a-zA-Z0-9])␉# Allowed characters after tag name.␊ |
␉␉␉␉␉␉␉(?>␊ |
␉␉␉␉␉␉␉␉".*?"␉␉|␉# Double quotes (can contain `>`)␊ |
␉␉␉␉␉␉␉␉\'.*?\' ␉|␉# Single quotes (can contain `>`)␊ |
|
␊ |
␉␉#␊ |
␉␉# Get the name of the starting tag.␊ |
␉␉# (This pattern makes $base_tag_name_re safe without quoting.)␊ |
␉␉#␊ |
␉␉if (preg_match("/^<([\w:$]*)\b/", $text, $matches))␊ |
␉␉␉$base_tag_name = $matches[1];␊ |
␉␉if (preg_match('/^<([\w:$]*)\b/', $text, $matches))␊ |
␉␉␉$base_tag_name_re = $matches[1];␊ |
␊ |
␉␉#␊ |
␉␉# Loop through every tag until we find the corresponding closing tag.␊ |
|
␉␉␉# pattern will be at the end, and between will be any catches made ␊ |
␉␉␉# by the pattern.␊ |
␉␉␉#␊ |
␉␉␉$parts = preg_split($tag_match, $text, 2, PREG_SPLIT_DELIM_CAPTURE);␊ |
␉␉␉$parts = preg_split($tag_re, $text, 2, PREG_SPLIT_DELIM_CAPTURE);␊ |
␉␉␉␊ |
␉␉␉if (count($parts) < 3) {␊ |
␉␉␉␉#␊ |
|
␉␉␉# Check for: Auto-close tag (like <hr/>)␊ |
␉␉␉#␉␉␉ Comments and Processing Instructions.␊ |
␉␉␉#␊ |
␉␉␉if (preg_match("{^</?(?:$this->auto_close_tags)\b}", $tag) ||␊ |
␉␉␉if (preg_match('{^</?(?:'.$this->auto_close_tags_re.')\b}', $tag) ||␊ |
␉␉␉␉$tag{1} == '!' || $tag{1} == '?')␊ |
␉␉␉{␊ |
␉␉␉␉# Just add the tag to the block as if it was text.␊ |
|
␉␉␉␉# Increase/decrease nested tag count. Only do so if␊ |
␉␉␉␉# the tag's name match base tag's.␊ |
␉␉␉␉#␊ |
␉␉␉␉if (preg_match("{^</?$base_tag_name\b}", $tag)) {␊ |
␉␉␉␉if (preg_match('{^</?'.$base_tag_name_re.'\b}', $tag)) {␊ |
␉␉␉␉␉if ($tag{1} == '/')␉␉␉␉␉␉$depth--;␊ |
␉␉␉␉␉else if ($tag{strlen($tag)-2} != '/')␉$depth++;␊ |
␉␉␉␉}␊ |
|
␉␉␉␉# Check for `markdown="1"` attribute and handle it.␊ |
␉␉␉␉#␊ |
␉␉␉␉if ($md_attr && ␊ |
␉␉␉␉␉preg_match($markdown_attr_match, $tag, $attr_m) &&␊ |
␉␉␉␉␉preg_match($markdown_attr_re, $tag, $attr_m) &&␊ |
␉␉␉␉␉preg_match('/^1|block|span$/', $attr_m[2] . $attr_m[3]))␊ |
␉␉␉␉{␊ |
␉␉␉␉␉# Remove `markdown` attribute from opening tag.␊ |
␉␉␉␉␉$tag = preg_replace($markdown_attr_match, '', $tag);␊ |
␉␉␉␉␉$tag = preg_replace($markdown_attr_re, '', $tag);␊ |
␉␉␉␉␉␊ |
␉␉␉␉␉# Check if text inside this tag must be parsed in span mode.␊ |
␉␉␉␉␉$this->mode = $attr_m[2] . $attr_m[3];␊ |
␉␉␉␉␉$span_mode = $this->mode == 'span' || $this->mode != 'block' &&␊ |
␉␉␉␉␉␉preg_match("{^<(?:$this->contain_span_tags)\b}", $tag);␊ |
␉␉␉␉␉␉preg_match('{^<(?:'.$this->contain_span_tags_re.')\b}', $tag);␊ |
␉␉␉␉␉␊ |
␉␉␉␉␉# Calculate indent before tag.␊ |
␉␉␉␉␉if (preg_match('/(?:^|\n)( *?)(?! ).*?$/', $block_text, $matches)) {␊ |
|
␉␉␉␉␉$parsed .= $this->$hash_method($block_text);␊ |
␉␉␉␉␉␊ |
␉␉␉␉␉# Get enclosing tag name for the ParseMarkdown function.␊ |
␉␉␉␉␉# (This pattern makes $tag_name_re safe without quoting.)␊ |
␉␉␉␉␉preg_match('/^<([\w:$]*)\b/', $tag, $matches);␊ |
␉␉␉␉␉$tag_name = $matches[1];␊ |
␉␉␉␉␉$tag_name_re = $matches[1];␊ |
␉␉␉␉␉␊ |
␉␉␉␉␉# Parse the content using the HTML-in-Markdown parser.␊ |
␉␉␉␉␉list ($block_text, $text)␊ |
␉␉␉␉␉␉= $this->_hashHTMLBlocks_inMarkdown($text, $indent, ␊ |
␉␉␉␉␉␉␉␉␉␉␉␉␉␉$tag_name, $span_mode);␊ |
␉␉␉␉␉␉␉$tag_name_re, $span_mode);␊ |
␉␉␉␉␉␊ |
␉␉␉␉␉# Outdent markdown text.␊ |
␉␉␉␉␉if ($indent > 0) {␊ |
|
␉␉return " id=\"$attr\"";␊ |
␉}␊ |
␉function _doHeaders_callback_setext($matches) {␊ |
␉␉if ($matches[3] == '-' && preg_match('{^- }', $matches[1]))␊ |
␉␉␉return $matches[0];␊ |
␉␉$level = $matches[3]{0} == '=' ? 1 : 2;␊ |
␉␉$attr = $this->_doHeaders_attr($id =& $matches[2]);␊ |
␉␉$block = "<h$level$attr>".$this->runSpanGamut($matches[1])."</h$level>";␊ |
|
␉␉$less_than_tab = $this->tab_width - 1;␊ |
␊ |
␉␉# Re-usable pattern to match any entire dl list:␊ |
␉␉$whole_list = '(?>␊ |
␉␉$whole_list_re = '(?>␊ |
␉␉␉(␉␉␉␉␉␉␉␉# $1 = whole list␊ |
␉␉␉ (␉␉␉␉␉␉␉␉# $2␊ |
␉␉␉␉[ ]{0,'.$less_than_tab.'}␊ |
|
␊ |
␉␉$text = preg_replace_callback('{␊ |
␉␉␉␉(?>\A\n?|(?<=\n\n))␊ |
␉␉␉␉'.$whole_list.'␊ |
␉␉␉␉'.$whole_list_re.'␊ |
␉␉␉}mx',␊ |
␉␉␉array(&$this, '_doDefLists_callback'), $text);␊ |
␊ |
|
␉}␊ |
␊ |
␊ |
␉function doItalicsAndBold($text) {␊ |
␉#␊ |
␉# Redefined to change emphasis by underscore behaviour so that it does not ␊ |
␉# Redefining emphasis markers so that emphasis by underscore does not␊ |
␉# work in the middle of a word.␊ |
␉#␊ |
␉␉# <strong> must go first:␊ |
␉␉$text = preg_replace_callback(array(␊ |
␉␉␉'{␊ |
␉␉␉␉(␉␉␉␉␉␉# $1: Marker␊ |
␉␉␉␉␉(?<![a-zA-Z0-9])␉# Not preceded by alphanum␊ |
␉␉␉␉␉(?<!__)␉␉␉␉#␉or by two marker chars.␊ |
␉␉␉␉␉__␊ |
␉␉␉␉)␊ |
␉␉␉␉(?=\S) ␉␉␉␉␉# Not followed by whitespace ␊ |
␉␉␉␉(?!__)␉␉␉␉␉# or two others marker chars.␊ |
␉␉␉␉(␉␉␉␉␉␉# $2: Content␊ |
␉␉␉␉␉(?>␊ |
␉␉␉␉␉␉[^_]+?␉␉␉# Anthing not em markers.␊ |
␉␉␉␉␉|␊ |
␉␉␉␉␉␉␉␉␉␉# Balence any regular _ emphasis inside.␊ |
␉␉␉␉␉␉(?<![a-zA-Z0-9]) _ (?=\S) (.+?) ␊ |
␉␉␉␉␉␉(?<=\S) _ (?![a-zA-Z0-9])␊ |
␉␉␉␉␉|␊ |
␉␉␉␉␉␉_+␉␉␉␉# Allow unbalenced as last resort.␊ |
␉␉␉␉␉)+?␊ |
␉␉␉␉)␊ |
␉␉␉␉(?<=\S) __␉␉␉␉# End mark not preceded by whitespace.␊ |
␉␉␉␉(?![a-zA-Z0-9])␉␉␉# Not followed by alphanum␊ |
␉␉␉␉(?!__)␉␉␉␉␉# or two others marker chars.␊ |
␉␉␉}sx',␊ |
␉␉␉'{␊ |
␉␉␉␉( (?<!\*\*) \*\* )␉␉# $1: Marker (not preceded by two *)␊ |
␉␉␉␉(?=\S) ␉␉␉␉␉# Not followed by whitespace ␊ |
␉␉␉␉(?!\1)␉␉␉␉␉# or two others marker chars.␊ |
␉␉␉␉(␉␉␉␉␉␉# $2: Content␊ |
␉␉␉␉␉(?>␊ |
␉␉␉␉␉␉[^*]+?␉␉␉# Anthing not em markers.␊ |
␉␉␉␉␉|␊ |
␉␉␉␉␉␉␉␉␉␉# Balence any regular * emphasis inside.␊ |
␉␉␉␉␉␉\* (?=\S) (.+?) (?<=\S) \*␊ |
␉␉␉␉␉|␊ |
␉␉␉␉␉␉\*␉␉␉␉# Allow unbalenced as last resort.␊ |
␉␉␉␉␉)+?␊ |
␉␉␉␉)␊ |
␉␉␉␉(?<=\S) \*\*␉␉␉# End mark not preceded by whitespace.␊ |
␉␉␉}sx',␊ |
␉␉␉),␊ |
␉␉␉array(&$this, '_doItalicAndBold_strong_callback'), $text);␊ |
␉␉# Then <em>:␊ |
␉␉$text = preg_replace_callback(array(␊ |
␉␉␉'{ ( (?<![a-zA-Z0-9])(?<!_)_ ) (?=\S) (?! \1) (.+?) (?<=\S) \1(?![a-zA-Z0-9]) }sx',␊ |
␉␉␉'{ ( (?<!\*)\* ) (?=\S) (?! \1) (.+?) (?<=\S)(?<!\s\*) \1 }sx',␊ |
␉␉␉),␊ |
␉␉␉array(&$this, '_doItalicAndBold_em_callback'), $text);␊ |
␊ |
␉␉return $text;␊ |
␉}␊ |
␉var $em_relist = array(␊ |
␉␉'' => '(?:(?<!\*)\*(?!\*)|(?<![a-zA-Z0-9_])_(?!_))(?=\S|$)(?![.,:;]\s)',␊ |
␉␉'*' => '(?<=\S|^)(?<!\*)\*(?!\*)',␊ |
␉␉'_' => '(?<=\S|^)(?<!_)_(?![a-zA-Z0-9_])',␊ |
␉␉);␊ |
␉var $strong_relist = array(␊ |
␉␉'' => '(?:(?<!\*)\*\*(?!\*)|(?<![a-zA-Z0-9_])__(?!_))(?=\S|$)(?![.,:;]\s)',␊ |
␉␉'**' => '(?<=\S|^)(?<!\*)\*\*(?!\*)',␊ |
␉␉'__' => '(?<=\S|^)(?<!_)__(?![a-zA-Z0-9_])',␊ |
␉␉);␊ |
␉var $em_strong_relist = array(␊ |
␉␉'' => '(?:(?<!\*)\*\*\*(?!\*)|(?<![a-zA-Z0-9_])___(?!_))(?=\S|$)(?![.,:;]\s)',␊ |
␉␉'***' => '(?<=\S|^)(?<!\*)\*\*\*(?!\*)',␊ |
␉␉'___' => '(?<=\S|^)(?<!_)___(?![a-zA-Z0-9_])',␊ |
␉␉);␊ |
␊ |
␊ |
␉function formParagraphs($text) {␊ |
|
␉␉if (!empty($this->footnotes_ordered)) {␊ |
␉␉␉$text .= "\n\n";␊ |
␉␉␉$text .= "<div class=\"footnotes\">\n";␊ |
␉␉␉$text .= "<hr". MARKDOWN_EMPTY_ELEMENT_SUFFIX ."\n";␊ |
␉␉␉$text .= "<hr". $this->empty_element_suffix ."\n";␊ |
␉␉␉$text .= "<ol>\n\n";␊ |
␉␉␉␊ |
␉␉␉$attr = " rev=\"footnote\"";␊ |
|
␉␉␉␉$footnote = preg_replace_callback('{F\x1Afn:(.*?)\x1A:}', ␊ |
␉␉␉␉␉array(&$this, '_appendFootnotes_callback'), $footnote);␊ |
␉␉␉␉␊ |
␉␉␉␉$attr2 = str_replace("%%", ++$num, $attr);␊ |
␉␉␉␉$attr = str_replace("%%", ++$num, $attr);␊ |
␉␉␉␉$note_id = $this->encodeAttribute($note_id);␊ |
␉␉␉␉␊ |
␉␉␉␉# Add backlink to last paragraph; create new paragraph if needed.␊ |
␉␉␉␉$backlink = "<a href=\"#fnref:$note_id\"$attr2>↩</a>";␊ |
␉␉␉␉$backlink = "<a href=\"#fnref:$note_id\"$attr>↩</a>";␊ |
␉␉␉␉if (preg_match('{</p>$}', $footnote)) {␊ |
␉␉␉␉␉$footnote = substr($footnote, 0, -4) . " $backlink</p>";␊ |
␉␉␉␉} else {␊ |
|
␉␉␉␉$title = $this->encodeAttribute($title);␊ |
␉␉␉␉$attr .= " title=\"$title\"";␊ |
␉␉␉}␊ |
␉␉␉␊ |
␉␉␉$attr = str_replace("%%", $num, $attr);␊ |
␉␉␉$node_id = $this->encodeAttribute($node_id);␊ |
␉␉␉␊ |
␉␉␉return␊ |
␉␉␉␉"<sup id=\"fnref:$node_id\">".␊ |
|
␉function _stripAbbreviations_callback($matches) {␊ |
␉␉$abbr_word = $matches[1];␊ |
␉␉$abbr_desc = $matches[2];␊ |
␉␉$this->abbr_matches[] = preg_quote($abbr_word);␊ |
␉␉if ($this->abbr_word_re)␊ |
␉␉␉$this->abbr_word_re .= '|';␊ |
␉␉$this->abbr_word_re .= preg_quote($abbr_word);␊ |
␉␉$this->abbr_desciptions[$abbr_word] = trim($abbr_desc);␊ |
␉␉return ''; # String that will replace the block␊ |
␉}␊ |
|
␉#␊ |
␉# Find defined abbreviations in text and wrap them in <abbr> elements.␊ |
␉#␊ |
␉␉if ($this->abbr_matches) {␊ |
␉␉␉// cannot use the /x modifier because abbr_matches may ␊ |
␉␉␉// contain spaces:␊ |
␉␉if ($this->abbr_word_re) {␊ |
␉␉␉// cannot use the /x modifier because abbr_word_re may ␊ |
␉␉␉// contain significant spaces:␊ |
␉␉␉$text = preg_replace_callback('{'.␊ |
␉␉␉␉'(?<![\w\x1A])'.␊ |
␉␉␉␉'(?:'. implode('|', $this->abbr_matches) .')'.␊ |
␉␉␉␉'(?:'.$this->abbr_word_re.')'.␊ |
␉␉␉␉'(?![\w\x1A])'.␊ |
␉␉␉␉'}', ␊ |
␉␉␉␉array(&$this, '_doAbbreviations_callback'), $text);␊ |
|
␉␉␉if (empty($desc)) {␊ |
␉␉␉␉return $this->hashPart("<abbr>$abbr</abbr>");␊ |
␉␉␉} else {␊ |
␉␉␉␉$desc = htmlspecialchars($desc, ENT_NOQUOTES);␊ |
␉␉␉␉$desc = $this->encodeAttribute($desc);␊ |
␉␉␉␉return $this->hashPart("<abbr title=\"$desc\">$abbr</abbr>");␊ |
␉␉␉}␊ |
␉␉} else {␊ |
|
Copyright and License␊ |
---------------------␊ |
␊ |
PHP Markdown & Extra␊ |
Copyright (c) 2004-2008 Michel Fortin ␊ |
<http://www.michelf.com/> ␊ |
PHP Markdown & Extra ␊ |
Copyright (c) 2004-2009 Michel Fortin ␊ |
<http://michelf.com/> ␊ |
All rights reserved.␊ |
␊ |
Based on Markdown ␊ |