<?php /*

 Composr
 Copyright (c) ocProducts, 2004-2016

 See text/EN/licence.txt for full licencing information.


 NOTE TO PROGRAMMERS:
   Do not edit this file. If you need to make changes, save your changed file to the appropriate *_custom folder
   **** If you ignore this advice, then your website upgrades (e.g. for bug fixes) will likely kill your changes ****

*/

/**
 * @license    http://opensource.org/licenses/cpal_1.0 Common Public Attribution License
 * @copyright  ocProducts Ltd
 * @package    core_rich_media
 */

/**
 * Standard code module initialisation function.
 *
 * @ignore
 */
function init__comcode_from_html()
{
    require_code('comcode_compiler');
}

/**
 * Used by semihtml_to_comcode to turn <img> to \[img\]. preg_replace_callback callback
 *
 * @param  array $matches Array of matches
 * @return string Substituted text
 *
 * @ignore
 */
function _img_tag_fixup($matches)
{
    $params = trim($matches[1]);
    if ($params != '') {
        $params .= ' ';
    }
    $params .= trim($matches[3]);
    if ($params != '') {
        $params = ' ' . $params;
    }
    $params = str_replace('alt="', 'param="', $params);
    $params = preg_replace('#style="[^"]*vertical-align: ([^;"]+)(;[^"]*)?;?"#i', 'align="${1}"', $params);
    $params = str_replace(' class="c_img"', '', $params);
    $extraneous = array('border', 'height', 'hspace', 'ismap', 'longdesc', 'usemap', 'vspace', 'width', 'id', 'class', 'title', 'style', 'lang');
    foreach ($extraneous as $ex) {
        $params = preg_replace('# ' . $ex . '="[^"]*"#', '', $params);
    }
    $params = str_replace(' ismap', '', $params);

    /*$referer = post_param_string('http_referer', cms_srv('HTTP_REFERER'));*/ // CKEditor allows us to specify the base, so we know get_base_url() is right
    $caller_url = /*looks_like_url($referer) ? preg_replace('#/[^/]*$#', '', $referer) : */get_base_url();

    if ((strpos($matches[2], '{$FIND_SCRIPT') === false) && (strpos($matches[2], '{$IMG') === false)) {
        $new_url = qualify_url($matches[2], $caller_url);
    } else {
        $new_url = $matches[2];
    }

    return '[img' . rtrim($params) . ']' . $new_url . '[/img]';
}

/**
 * Used by semihtml_to_comcode to turn fix URLs in <img> to be absolute. preg_replace_callback callback
 *
 * @param  array $matches Array of matches
 * @return string Substituted text
 *
 * @ignore
 */
function _img_tag_fixup_raw($matches)
{
    /*$referer = post_param_string('http_referer', cms_srv('HTTP_REFERER'));*/ // CKEditor allows us to specify the base, so we know get_base_url() is right
    $caller_url = /*looks_like_url($referer) ? preg_replace('#/[^/]*$#', '', $referer) : */get_base_url();

    $matches[2] = html_entity_decode($matches[2], ENT_QUOTES, get_charset());

    if ((strpos($matches[2], '{$FIND_SCRIPT') === false) && (strpos($matches[2], '{$IMG') === false)) {
        $new_url = qualify_url($matches[2], $caller_url);
    } else {
        $new_url = $matches[2];
    }

    $ret = '<img' . $matches[1] . ' src="' . escape_html($new_url) . '"' . preg_replace('# */$#', '', $matches[3]) . ' />';

    return $ret;
}

/**
 * Used by semihtml_to_comcode to fix <a> tag links. preg_replace_callback callback
 *
 * @param  array $matches Array of matches
 * @return string Substituted text
 *
 * @ignore
 */
function _a_tag_link_fixup($matches)
{
    $referer = post_param_string('http_referer', cms_srv('HTTP_REFERER'));
    $caller_url = looks_like_url($referer) ? preg_replace('#/[^/]*$#', '', $referer) : get_base_url();
    $ret = '<a ' . $matches[1] . 'href="' . escape_html(qualify_url(html_entity_decode($matches[2], ENT_QUOTES, get_charset()), $caller_url)) . '"' . $matches[3] . '>';
    return $ret;
}

/**
 * Used by semihtml_to_comcode to fix CSS colours aways from RGB notation. preg_replace_callback callback
 *
 * @param  array $matches Array of matches
 * @return string Substituted text
 *
 * @ignore
 */
function _css_color_fixup($matches)
{
    $r = dechex(intval(trim($matches[2])));
    if (strlen($r) == 1) {
        $r = '0' . $r;
    }
    $g = dechex(intval(trim($matches[3])));
    if (strlen($g) == 1) {
        $g = '0' . $g;
    }
    $b = dechex(intval(trim($matches[4])));
    if (strlen($b) == 1) {
        $b = '0' . $b;
    }

    return $matches[1] . '#' . $r . $g . $b . $matches[5];
}

/**
 * Used by semihtml_to_comcode to make it so inline CSS with quotes uses single quotes. preg_replace_callback callback
 *
 * @param  array $matches Array of matches
 * @return string Substituted text
 *
 * @ignore
 */
function _css_quot_fixup($matches)
{
    return str_replace('&quot;', '\'', $matches[0]);
}

/**
 * Apply temporary ad hoc-escaping to a CDATA area (we'll reverse convert later). preg_replace_callback callback
 *
 * @param  array $matches Array of matches
 * @return string Substituted text
 *
 * @ignore
 */
function _cdata_protect($matches)
{
    $new = $matches[2];
    // We use a closing tag, as we can't just type these in the HTML normally (even in CDATA) - so they are safe unused strings
    $new = str_replace(' ', '</CDATA__space>', $new);
    $new = str_replace("\t", '</CDATA__tab>', $new);
    $new = str_replace("\n", '</CDATA__nl>', $new);
    $new = str_replace("\r", '</CDATA__lf>', $new);
    $new = str_replace('&', '</CDATA__amp>', $new);
    return $matches[1] . $new . $matches[3];
}

/**
 * Apply temporary ad hoc-escaping to a code tags (we'll reverse convert later). preg_replace_callback callback
 *
 * @param  array $matches Array of matches
 * @return string Substituted text
 *
 * @ignore
 */
function _codetag_protect($matches)
{
    $new = $matches[2];
    $new = str_replace('<', '___lt___', $new);
    $new = str_replace('>', '___gt___', $new);
    return $matches[1] . $new . $matches[3];
}

/**
 * Apply temporary ad hoc-escaping to a code tags (we'll reverse convert later). preg_replace_callback callback
 *
 * @param  array $matches Array of matches
 * @return string Substituted text
 *
 * @ignore
 */
function _codetag_unprotect($matches)
{
    $new = $matches[2];
    $new = str_replace('___lt___', '<', $new);
    $new = str_replace('___gt___', '>', $new);
    return $matches[1] . $new . $matches[3];
}

/**
 * Reorder XHTML attributes alphabetically, so our regexp's match better. preg_replace_callback callback
 *
 * @param  array $matches Array of matches
 * @return string Substituted text
 *
 * @ignore
 */
function _reorder_xhtml_attributes($matches)
{
    $middle = trim($matches[2]);
    $short = (substr($middle, -1) == '/');
    if ($short) {
        if (substr($middle, -2) != ' /') {
            $middle = substr($middle, 0, strlen($middle) - 1);
        } else {
            $middle = substr($middle, 0, strlen($middle) - 2);
        }
    }

    $bits = array_map('trim', preg_split('#\s(\w+=)\s*"#', ' ' . $middle, -1, PREG_SPLIT_DELIM_CAPTURE));
    array_shift($bits);
    $bits2 = array();
    $cnt = count($bits);
    for ($i = 0; $i < $cnt; $i++) {
        if ($i % 2 == 0) {
            $bits2[] = $bits[$i];
        } else {
            $bits2[intval($i / 2)] .= '"' . $bits[$i];
        }
    }
    sort($bits2);
    $middle = implode(' ', $bits2);

    return '<' . $matches[1] . ' ' . $middle . ($short ? ' /' : '') . '>';
}

/**
 * Reorder style properties alphabetically, so our regexp's match better. preg_replace_callback callback
 *
 * @param  array $matches Array of matches
 * @return string Substituted text
 *
 * @ignore
 */
function _reorder_css_properties($matches)
{
    $middle = $matches[2];
    $bits = array_map('trim', explode(';', $middle));
    sort($bits);
    $middle = '';
    foreach ($bits as $bit) {
        if (trim($bit) == '') {
            continue;
        }
        if ($middle != '') {
            $middle .= '; ';
        }
        $middle .= trim($bit);
    }
    return $matches[1] . $middle . $matches[3];
}

/**
 * Convert Semi-HTML into Comcode. Cleanup where possible. preg_replace_callback callback
 *
 * @param  array $matches Array of matches
 * @return string Substituted text
 *
 * @ignore
 */
function _semihtml_to_comcode_wrap($matches)
{
    $middle = semihtml_to_comcode($matches[2]);
    if (substr($middle, 0, 10) == '[semihtml]') {
        return substr($middle, 10, strlen($middle) - 21);
    }
    return $matches[1] . $middle . $matches[3];
}

/**
 * Extract underlying Comcode from an editor Comcode-management button. preg_replace_callback callback
 *
 * @param  array $matches Array of matches
 * @return string Substituted text
 *
 * @ignore
 */
function _debuttonise($matches)
{
    return html_entity_decode($matches[1], ENT_QUOTES, get_charset());
}

/**
 * Extract underlying Comcode from an editor XML tag. preg_replace_callback callback
 *
 * @param  array $matches Array of matches
 * @return string Substituted text
 *
 * @ignore
 */
function _detagonise($matches)
{
    $tag = $matches[1];
    $attributes = html_entity_decode(str_replace('&quot;', '\"', isset($matches[2]) ? $matches[2] : ''), ENT_QUOTES, get_charset());
    $attributes = preg_replace('# id="[^"]*"#', '', $attributes); // IDs aren't a real Comcode attribute
    return '[' . $tag . $attributes . ']';
}

/**
 * Extract underlying Tempcode directive from an editor XML tag. preg_replace_callback callback
 *
 * @param  array $matches Array of matches
 * @return string Substituted text
 *
 * @ignore
 */
function _dedirectiveise($matches)
{
    $attributes_arr = array();
    $attributes_xml = isset($matches[1]) ? $matches[1] : '';
    $matches_attributes = array();
    $num_matches_attributes = preg_match_all('#\s+([\w\-]+)\s*=\s*"([^"]*)"#', $attributes_xml, $matches_attributes);
    for ($i = 0; $i < $num_matches_attributes; $i++) {
        $attributes_arr[$matches_attributes[1][$i]] = $matches_attributes[2][$i];
    }

    $attributes = '';
    if (!empty($attributes_arr['params'])) {
        $attributes = html_entity_decode($attributes_arr['params'], ENT_QUOTES, get_charset());
    }

    return $attributes;
}

/**
 * Cleanup HTML coming out of the WYSIWYG editor, converting represented Comcode back to proper Comcode
 *
 * @param  string $semihtml Semi-HTML
 */
function remove_wysiwyg_comcode_markup(&$semihtml)
{
    // Our invisible characters isolating the cms Keep markers from style run-off
    $semihtml = str_replace('&#8203;', '', $semihtml);
    $array_html_preg_replace = array();
    if (get_charset() == 'utf-8') {
        $semihtml = str_replace(chr(hexdec('e2')) . chr(hexdec('80')) . chr(hexdec('8b')), '', $semihtml);
    }

    if (stripos($semihtml, '<input') !== false) {
        // Our button editing for embedded tags
        do {
            $semihtml_before = $semihtml;
            $semihtml = preg_replace_callback('#<input [^>]*class="cms_keep_ui_controlled" [^>]*title="([^"]*)" [^>]*type="button" [^>]*value="[^"]*"[^>]*/?' . '>#siU', '_debuttonise', $semihtml);
        } while ($semihtml != $semihtml_before);
    }

    // Our Comcode tag start/end markers
    $array_html_preg_replace[] = array('#^<kbd [^>]*class="(cms_keep|cms_keep_block)"[^>]*>(.*)</kbd>$#siU', "\${2}");
    $semihtml = array_html_preg_replace('kbd', $array_html_preg_replace, $semihtml);

    // Our wrapper tags
    if (stripos($semihtml, '<comcode-') !== false) {
        init_valid_comcode_tags();
        require_code('comcode_renderer');
        _custom_comcode_import($GLOBALS['SITE_DB']);
        global $VALID_COMCODE_TAGS;
        foreach (array_keys($VALID_COMCODE_TAGS) as $tag) {
            $semihtml = preg_replace_callback('#<comcode-(' . preg_quote($tag, '#') . ')( [^<>]*)?' . '>#', '_detagonise', $semihtml);
            $semihtml = preg_replace('#</comcode-' . preg_quote($tag, '#') . '\s*>#', '[/' . $tag . ']', $semihtml);
        }
    }
    if (stripos($semihtml, '<tempcode') !== false) {
        $semihtml = cms_preg_replace_callback_safe('#<tempcode( [^<>]*)' . '>\s*#', '_dedirectiveise', $semihtml);
        $semihtml = preg_replace('#</tempcode\s*>#', '{+END}', $semihtml);
    }
}

/**
 * Convert HTML headers to Comcode titles
 *
 * @param  string $semihtml Semi-HTML
 * @param  boolean $forceful Whether to force conversion on all header tags, even if they don't match Comcode-style/simple headers exactly
 * @return string Semi-HTML, with headers converted to titles
 */
function convert_html_headers_to_titles($semihtml, $forceful)
{
    if (stripos($semihtml, '<h') !== false) {
        $array_html_preg_replace = array();
        $array_html_preg_replace[] = array('#^\s*<h1 id="screen_title"[^<>]*>\s*<span class="inner">(.*)</span>\s*</h1>\s*$#siU', '[title="1"]${1}[/title]' . "\n");
        $array_html_preg_replace[] = array('#^\s*<h1 class="screen_title"[^<>]*>\s*<span class="inner">(.*)</span>\s*</h1>\s*$#siU', '[title="1"]${1}[/title]' . "\n");
        $array_html_preg_replace[] = array('#^\s*<h1 id="screen_title" class="screen_title">\s*<span class="inner">(.*)</span>\s*</h1>\s*$#siU', '[title="1"]${1}[/title]' . "\n");
        $array_html_preg_replace[] = array('#^\s*<h1 id="screen_title"[^<>]*>(.*)</h1>\s*$#siU', '[title="1"]${1}[/title]' . "\n");
        $array_html_preg_replace[] = array('#^\s*<h1 class="screen_title"[^<>]*>(.*)</h1>\s*$#siU', '[title="1"]${1}[/title]' . "\n");
        $array_html_preg_replace[] = array('#^\s*<h1 id="screen_title" class="screen_title"[^<>]*>(.*)</h1>\s*$#siU', '[title="1"]${1}[/title]' . "\n");
        $array_html_preg_replace[] = array('#^\s*<h1>(.*)</h1>\s*$#siU', '[title="1"]${1}[/title]' . "\n");
        if ($forceful) {
            $array_html_preg_replace[] = array('#^\s*<h1[^<>]*>(.*)</h1>\s*$#siU', '[title="1"]${1}[/title]' . "\n");
        }
        $semihtml = array_html_preg_replace('h1', $array_html_preg_replace, $semihtml);
        $semihtml = preg_replace('#^\s*<h1[^>]+>(.*)</h1>\s*#siU', '[title="1"]${1}[/title]' . "\n", $semihtml);
        for ($i = 2; $i <= 4; $i++) {
            $array_html_preg_replace = array();
            $array_html_preg_replace[] = array('#^\s*<h' . strval($i) . '><span class="inner">(.*)</span></h' . strval($i) . '>\s*$#siU', '[title="' . strval($i) . '"]${1}[/title]' . "\n");
            $array_html_preg_replace[] = array('#^\s*<h' . strval($i) . '>(.*)</h' . strval($i) . '>\s*$#siU', '[title="' . strval($i) . '"]${1}[/title]' . "\n");
            if ($forceful) {
                $array_html_preg_replace[] = array('#^\s*<h' . strval($i) . '[^<>]*>(.*)</h' . strval($i) . '>\s*$#siU', '[title="' . strval($i) . '"]${1}[/title]' . "\n");
            }
            $semihtml = array_html_preg_replace('h' . strval($i) . '', $array_html_preg_replace, $semihtml);
        }
    }
    return $semihtml;
}

/**
 * Convert HTML-filled Comcode to cleaner Comcode.
 *
 * @param  LONG_TEXT $comcode The messy Comcode.
 * @return LONG_TEXT The cleaned Comcode.
 */
function force_clean_comcode($comcode)
{
    $matches = array();
    if (preg_match('#^\[semihtml\](.*)\[/semihtml\]$#s', $comcode, $matches) != 0) {
        if ((strpos($matches[1], '[semihtml]') === false) && (strpos($matches[1], '[html]') === false)) {
            return semihtml_to_comcode($matches[1], true);
        }
    }
    if (preg_match('#^\[html\](.*)\[/html\]$#s', $comcode, $matches) != 0) {
        if ((strpos($matches[1], '[semihtml]') === false) && (strpos($matches[1], '[html]') === false)) {
            return html_to_comcode($matches[1], true);
        }
    }
    return $comcode;
}

/**
 * Strip down the contents of the media_set tag for easier WYSIWYG-editing
 *
 * @param  LONG_TEXT $semihtml The Semi-HTML to be converted
 * @return LONG_TEXT The equivalent Comcode
 */
function wysiwygify_media_set($semihtml)
{
    // Media set contents doesn't need any divs, which get left from native attachments
    $i = 0;
    do {
        $media_set_start = strpos($semihtml, '[media_set', $i);
        $media_set_end = strpos($semihtml, '[/media_set]', $i);
        if ($media_set_start !== false && $media_set_end !== false && $media_set_end > $media_set_start) {
            $middle_before = substr($semihtml, $media_set_start, $media_set_end - $media_set_start);
            $middle_after = preg_replace('#</?(div|br|figure)( [^<>]*)?' . '>#', '', $middle_before);
            $middle_after = preg_replace('#<figcaption( [^<>]*)? ' . '>.*</figcaption>#Us', '', $middle_after);
            $semihtml = substr($semihtml, 0, $media_set_start) . $middle_after . substr($semihtml, $media_set_end);
            $i = $media_set_end - (strlen($middle_before) - strlen($middle_after)) + 1;
        }
    } while ($media_set_start !== false && $media_set_end !== false && $media_set_end > $media_set_start);
    return $semihtml;
}

/**
 * Convert Semi-HTML into comcode. Cleanup where possible
 *
 * @param  LONG_TEXT $semihtml The Semi-HTML to be converted
 * @param  boolean $force Whether to force full conversion regardless of settings
 * @param  boolean $quick Whether to trust the HTML is valid rather than cleaning it up (e.g. for Composr-generated HTML)
 * @return LONG_TEXT The equivalent Comcode
 */
function semihtml_to_comcode($semihtml, $force = false, $quick = false)
{
    // Optimisations
    $matches = array();
    if (preg_match('#^\[semihtml\]([^\[\]<>]*)\[\/semihtml\]$#', $semihtml, $matches) != 0) {
        return $matches[1];
    }
    if (preg_match('#^([^\[\]<>\{\}&]*)$#', $semihtml) != 0) {
        return $semihtml;
    }

    $semihtml = trim($semihtml);

    // Optimisation, not long enough to clean up
    if (cms_trim($semihtml, strlen($semihtml) < 30) === '') {
        return '';
    }

    $decoded = html_entity_decode($semihtml, ENT_QUOTES, get_charset());
    if ((strpos($decoded, '<') === false) && (strpos($decoded, '[') === false) && (strpos($decoded, '{') === false) && (strpos($decoded, '&') === false)) {
        return $decoded;
    }

    require_code('obfuscate');

    safe_ini_set('pcre.backtrack_limit', '10000000');

    // Special clean up we always do regardless...

    // Composr markers
    remove_wysiwyg_comcode_markup($semihtml);

    // Links should be kept from being base-URL-specific
    $semihtml = preg_replace('#(<[^<>]*)' . preg_quote(escape_html(get_base_url() . '/'), '#') . '([^<>]*>)#', '$1{$BASE_URL*}/$2', $semihtml);

    // Empty comments
    $semihtml = str_replace('<!-- >', '', $semihtml);

    // CKEditor gibberish
    $semihtml = preg_replace('#<span id="cke_bm_[^"]+" style="display: none;\s*">&nbsp;</span>#', '', $semihtml);

    // CKEditor may leave white-space on the end, we have to assume it was not intentional
    $semihtml = preg_replace('#(\[\w+)&nbsp;#', '${1} ', $semihtml);

    $semihtml = wysiwygify_media_set($semihtml);

    // ---

    // Maybe we don't do a conversion? If possible we want to avoid it because conversions are messy.
    if (((!$force) && (get_option('eager_wysiwyg') == '0') && (has_privilege(get_member(), 'allow_html'))) || (strpos($semihtml, '{$,page hint: no_smart_conversion}') !== false)) {
        // Resolve relative URLs
        $semihtml = preg_replace_callback('#<img([^>]*) src="([^"]*)"([^>]*) />#siU', '_img_tag_fixup_raw', $semihtml);
        $semihtml = preg_replace_callback('#<img([^>]*) src="([^"]*)"([^>]*)>#siU', '_img_tag_fixup_raw', $semihtml);

        // Preserve header formatting by moving it to a span
        $semihtml = preg_replace('#<h1[^>]* style="([^"<>]*)"[^>]*>\s*<span class="inner">(.*)</span>\s*</h1>#Us', '<h1><span class="inner"><span style="display: inline-block; ${1}">${2}</span></span></h1>', $semihtml);
        $semihtml = preg_replace('#<h1[^>]* style="([^"<>]*)"[^>]*>(.*)</h1>#Us', '<h1><span class="inner"><span style="display: block; ${1}">${2}</span></span></h1>', $semihtml);

        // We really need anything inside <kbd> to go back to [tt] so it doesn't get parsed within semihtml
        $array_html_preg_replace = array();
        $array_html_preg_replace[] = array('#^<kbd>(.*)</kbd>$#siU', "[tt]\${1}[/tt]");
        $semihtml = array_html_preg_replace('kbd', $array_html_preg_replace, $semihtml);

        if (strpos($semihtml, '[contents') !== false) { // Contents tag needs proper Comcode titles
            $semihtml = convert_html_headers_to_titles($semihtml, true);
        }

        // Is it really simple? It is if $count is zero (i.e. nothing fancy)...

        $count = 0;
        $count += substr_count($semihtml, '[/');
        $count += substr_count($semihtml, '@');
        $count += substr_count($semihtml, '{');
        $count += substr_count($semihtml, '[[');
        $count += substr_count($semihtml, '<h1');
        $_emoticons = $GLOBALS['FORUM_DRIVER']->find_emoticons();
        foreach (array_keys($_emoticons) as $emoticon_code) {
            $count += substr_count($semihtml, $emoticon_code);
        }
        if (strpos($semihtml, '<a ') === false) {
            $count += substr_count($semihtml, '://');
        }

        // Yes, so just dump it inside html (maximum purity of parsing)...

        if ($count == 0) {
            return ($semihtml == '') ? '' : ('[html]' . $semihtml . '[/html]');
        }

        // No, but maybe we can chop it around a bit...

        if (strpos($semihtml, 'data:') === false) {
            $count2 = substr_count($semihtml, '[/attachment]') + substr_count($semihtml, '<h1');

            // All HTML or attachments or headers, so we can encode mostly as 'html' (as opposed to 'semihtml'). Good purity of parsing
            if ($count2 == $count) {
                if ($semihtml != '') {
                    $semihtml = '[html]' . $semihtml . '[/html]';
                }
                $semihtml = preg_replace('#<h1[^>]*>\s*<span class="inner">(.*)</span>\s*</h1>#Us', '[/html][semihtml][title]${1}[/title][/semihtml][html]', $semihtml);
                $semihtml = preg_replace('#<h1[^>]*>(.*)</h1>#Us', '[/html][semihtml][title]${1}[/title][/semihtml][html]', $semihtml);
                $semihtml = str_replace('[attachment', '[/html][semihtml][attachment', str_replace('[/attachment]', '[/attachment][/semihtml][html]', $semihtml));
                $semihtml = str_replace('[/html][html]', '', $semihtml);
                $semihtml = str_replace('[html][/html]', '', $semihtml);
                return $semihtml;
            }
        }

        // Semihtml then...

        if ($semihtml != '') {
            $semihtml = '[semihtml]' . $semihtml . '[/semihtml]';
        }
        $semihtml = preg_replace('#<h1[^>]*>\s*<span class="inner">(.*)</span>\s*</h1>#Us', '[title]${1}[/title]', $semihtml);
        $semihtml = preg_replace('#<h1[^>]*>(.*)</h1>#Us', '[title]${1}[/title]', $semihtml);

        return $semihtml;
    }

    // Okay, do a conversion...

    if (!$quick) {
        require_code('xhtml');
        $semihtml = xhtmlise_html($semihtml, true); // Needed so we can parse it right
    }

    // Safety from if these are typed in (could cause problems)
    $semihtml = str_replace('[html' . ($force ? ']' : ''), $force ? '' : '[ html', $semihtml);
    $semihtml = str_replace('[semihtml' . ($force ? ']' : ''), $force ? '' : '[ semihtml', $semihtml);
    $semihtml = str_replace('[/html' . ($force ? ']' : ''), $force ? '' : '[ / html', $semihtml);
    $semihtml = str_replace('[/semihtml' . ($force ? ']' : ''), $force ? '' : '[ / semihtml', $semihtml);

    // This is useful for generally stripping sensitive information anyway. Should be null-op if anti-leech was on, but worth doing just-in-case.
    $semihtml = preg_replace('#&amp;keep_session=\w*(&amp;for_session=\w*)?#', '', $semihtml);

    // We must protect anything that is in CDATA from whitespace/entity cleanup (HTML or XHTML - we have to use lowest common denominator)
    $semihtml = preg_replace_callback('#(<script[^>]*>)(.*)(</script>)#siU', '_cdata_protect', $semihtml);
    $semihtml = preg_replace_callback('#(<style[^>]*>)(.*)(</style>)#siU', '_cdata_protect', $semihtml);
    $semihtml = preg_replace_callback('#(<textarea[^>]*>)(.*)(</textarea>)#siU', '_cdata_protect', $semihtml);
    $semihtml = preg_replace_callback('#(<pre[^>]*>)(.*)(</pre>)#siU', '_cdata_protect', $semihtml);
    $semihtml = preg_replace_callback('#(<![CDATA[)(.*)(]]>)#siU', '_cdata_protect', $semihtml);
    // And use same method to protect our code tags
    /* foreach (array_keys($GLOBALS['CODE_TAGS']) as $code_tag)
        $semihtml = preg_replace_callback('#(\[' . $code_tag . '[^\]]*\])(.*)(\[/' . $code_tag . '\])#siU', '_codetag_protect', $semihtml);
    Actually no, we don't want this. These tags are typed potentially to show HTML and thus the entities must get decoded
    */

    // Not full HTML
    $semihtml = preg_replace('#<head[^<>]*>.*</head>#Us', '', $semihtml);
    $semihtml = preg_replace('#</?(html|head|body)[^<>]*>#Us', '', $semihtml);

    // Cleanup from certain word processors
    // LibreOffice
    $semihtml = str_replace('<h2 class="western">', '<h2>', $semihtml);
    $semihtml = cms_preg_replace_safe('#</(ul|ol|h1|h2|h3|h4|h5|h6)>\s*<p style="margin-bottom:\s*0(cm|em|px)?">\s*&nbsp;\s*</p>\s*#Us', '</${1}>', $semihtml);
    $semihtml = preg_replace('#<p style="margin-bottom:\s*0(cm|em|px)?">\s*&nbsp;\s*</p>\s*#Us', '', $semihtml);
    $semihtml = preg_replace('#<li>\s*<p style="margin-bottom:\s*0(cm|em|px)?">(.*)</p>\s*</li>#Us', '<li>${2}</li>', $semihtml);
    $semihtml = preg_replace('#<p style="margin-bottom:\s*0(cm|em|px)?">(.*)</p>\s*<(ul|ol|h1|h2|h3|h4|h5|h6)>#Us', '${2}<${3}>', $semihtml);
    $semihtml = preg_replace('#(<style[^>]*>)(.*)(</style>)#siU', '', $semihtml); // We shouldn't allow this nested anyway (invalid XHTML), and word abuses it
    $semihtml = preg_replace('#<span class="Apple-style-span"[^>]*>(.*)</span>#siU', '${1}', $semihtml); // webkit
    $semihtml = preg_replace('#<meta[^>]*>#siU', '', $semihtml); // We shouldn't allow this nested anyway (invalid XHTML), and word abuses it
    $semihtml = preg_replace('#(<[^>]*) lang="[^"]*"#i', '${1}', $semihtml);
    $semihtml = preg_replace('#(<[^>]*) style="margin-right:\s*0\w*;?"#i', '${1}', $semihtml);
    $semihtml = preg_replace('#(<[^>]*) dir="' . do_lang('dir') . '"#i', '${1}', $semihtml);
    $semihtml = preg_replace_callback('#<[^>"]* style="([^">]*&quot;[^">]*)*"#i', '_css_quot_fixup', $semihtml);
    $semihtml = preg_replace('#<a name="OLE_LINK1">([^<]*)</a>#siU', '${1}', $semihtml);
    $semihtml = preg_replace('#(?U)(<[^>]* style="[^"]*)(?-U);?\s*page-break-after:\s*avoid;?"#is', '${1}"', $semihtml);
    $semihtml = str_replace('<place>', '', $semihtml);
    $semihtml = str_replace('</place>', '', $semihtml);
    $semihtml = preg_replace('#<link [^>]*href="file://[^"]*"[^>]*/>#sU', '', $semihtml);
    $semihtml = preg_replace('#<!--\[if(.*)-->#sU', '', $semihtml);
    $semihtml = preg_replace('#<!--(.*)\[endif(.*)-->#sU', '', $semihtml);
    $semihtml = str_replace('<!-- >', '', $semihtml);
    $semihtml = preg_replace('#</?[ovw]:[^>]*>#s', '', $semihtml);
    $semihtml = preg_replace('#(<[^>]*) [ovw]:[^>"]*"[^"]*"([^>]*>)#s', '${1}${2}', $semihtml);
    $semihtml = preg_replace('#</?st1:[^>]*>#', '', $semihtml); // Word smart tags
    $semihtml = str_replace('<br class="Apple-interchange-newline" />', '<br />', $semihtml);
    $semihtml = preg_replace('# class="Mso\w+"#', '', $semihtml);
    $semihtml = preg_replace('#margin-(top|bottom):\s*0cm#', '', $semihtml);
    $semihtml = str_replace('text-align:justify', '', $semihtml);
    $semihtml = str_replace(' type="disc"', '', $semihtml);
    $semihtml = str_replace(' type="1"', '', $semihtml);
    $semihtml = str_replace(' start="1"', '', $semihtml);
    $semihtml = preg_replace('#mso-\w+-font-family:\s*"[^"]*"#', '', $semihtml);
    $semihtml = preg_replace('#mso-[\w\-]+:[^;"\']*#', '', $semihtml);
    $semihtml = str_replace('text-autospace:none', '', $semihtml);
    $semihtml = preg_replace('#(<[^>]* align="right"[^>]*) style="(margin-right:\s*[\d\.]+pt;\s*)?text-align:\s*right[;\s]*"#is', '${1}', $semihtml); // trim off redundancy
    $semihtml = preg_replace('#(<[^>]* align="center"[^>]*) style="(margin-right:\s*[\d\.]+pt;\s*)?text-align:\s*center[;\s]*"#is', '${1}', $semihtml); // trim off redundancy
    // Clean some whitespace (they have a special Comcode meaning, but no special HTML meaning)
    $inline_elements = array(
        'font', 's', 'u', 'strike', 'span', 'abbr', 'acronym', 'cite',
        'code', 'dfn', 'em', 'strong', 'kbd', 'q', 'samp', 'var',
        'sub', 'sup', 'tt', 'del', 'ruby', 'a', 'bdo', 'img',
        'ins', 'param', 'textarea', 'button', 'input', 'select',
        'object', 'caption', 'label', 'b', 'i', 'small', 'big');
    $semihtml = cms_preg_replace_safe('#(<(' . implode('|', $inline_elements) . ')( [^>]*)?' . '>)\s+#', '${1}</CDATA__space>', $semihtml);
    $semihtml = cms_preg_replace_safe('#\s+(</(' . implode('|', $inline_elements) . ')>)#', '</CDATA__space>${1}', $semihtml);
    $semihtml = cms_preg_replace_safe('#([^\>\s])\s+(<(' . implode('|', $inline_elements) . ')( [^>]*)?' . '>)#', '${1}</CDATA__space>${2}', $semihtml);
    $semihtml = cms_preg_replace_safe('#(</(' . implode('|', $inline_elements) . ')>)\s+#', '${1}</CDATA__space>', $semihtml);
    $semihtml = cms_preg_replace_safe('#>\s+#', '>', $semihtml); // NB: Only non-inline, due to above CDATA__space
    $semihtml = cms_preg_replace_safe('#\s+<#', '<', $semihtml); // ditto
    $semihtml = cms_preg_replace_safe('#(\s)\s*#', '${1}', $semihtml);

    // Clean redundant CSS syntax
    do {
        $old = $semihtml;
        $semihtml = preg_replace('# style="([^"]*); ?; ?+[^"]*#', ' style="$1;', $semihtml);
    }
    while ($old != $semihtml);
    $semihtml = str_replace(' style=""', '', $semihtml);

    // Cleanup impossible stuff in code tags
    global $CODE_TAGS;
    foreach (array_keys($CODE_TAGS) as $tag) {
        $semihtml = comcode_preg_replace($tag, '#^(\[' . $tag . '\])(.*)(\[/' . $tag . '\])$#si', array('comcode_strip_html_tags'), $semihtml);
    }

    // Cleanup how blocks are converted into a line break model. We need to clean up the case where inline leads onto block, by adding a linebreak in-between. Note that this kind of break does not go *between* blocks, which is the reason we can't arbitrarily place it later on.
    $semihtml = cms_preg_replace_safe('#([^\s<>]|</(' . implode('|', $inline_elements) . ')>)(<(div|p))#', '${1}<br />${3}', $semihtml);

    // Reorder XHTML attributes alphabetically, so our regexp's match better
    $semihtml = preg_replace_callback('#<([^>\s]+)\s([^>]+)>#', '_reorder_xhtml_attributes', $semihtml);

    // Reorder style properties alphabetically, so our regexp's match better
    $semihtml = preg_replace_callback('#(<[^>]*style=")([^"]*)("[^>]*>)#', '_reorder_css_properties', $semihtml);

    $semihtml = str_replace('<p  />', '<br /><br />', str_replace('<p />', '<br /><br />', $semihtml));

    // Remove proprietary stylings put in by RTF->HTML conversions performed by certain browsers
    $old_semihtml = '';
    do {
        $old_semihtml = $semihtml;
        $semihtml = preg_replace('#(<[^>]* style="(?U)[^">]*(?-U))-\w+-[^";>]*(;\s*)?#s', '${1}', $semihtml);
    } while ($semihtml != $old_semihtml);

    // Perform lots of conversions. We can't convert everything. Sometimes we reverse-convert what Comcode forward-converts; sometimes we match generic HTML; sometimes we match Microsoft Word or Open Office; sometimes we do lossy match
    $semihtml = convert_html_headers_to_titles($semihtml, strpos($semihtml, '[contents') !== false);
    $array_html_preg_replace = array();
    $array_html_preg_replace[] = array('#^<span>(.*)</span>$#siU', '${1}');
    $array_html_preg_replace[] = array('#^<span( charset="[^"]*")?( content="[^"]*")?( name="[^"]*")?' . '>(.*)</span>$#siU', '${4}');
    $array_html_preg_replace[] = array('#^<span style="\s*?font-family:\s*monospace;\s*?font-size:\s*[\d\.]*em;?">(.*)</span>$#siU', '[tt]${1}[/tt]');
    $array_html_preg_replace[] = array('#^<span style="\s*?font-weight:\s*bold;?">(.*)</span>$#siU', '[b]${1}[/b]');
    $array_html_preg_replace[] = array('#^<span style="\s*?font-style:\s*italic;?">(.*)</span>$#siU', '[i]${1}[/i]');
    $array_html_preg_replace[] = array('#^<span style="\s*?text-decoration:\s*underline;?">(.*)</span>$#siU', '[u]${1}[/u]');
    $array_html_preg_replace[] = array('#^<span( href="[^"]*")?( rel="[^"]*")?' . '>(.*)</span>$#siU', '${3}');
    $array_html_preg_replace[] = array('#^<span style="\s*?font-weight:\s*bold;?">(.*)</span>$#siU', '[b]${1}[/b]');
    $array_html_preg_replace[] = array('#^<span style="\s*?font-style:\s*italic;?">(.*)</span>$#siU', '[i]${1}[/i]');
    $array_html_preg_replace[] = array('#^<span style="\s*?text-decoration:\s*underline;?">(.*)</span>$#siU', '[u]${1}[/u]');
    $array_html_preg_replace[] = array('#^<span style="\s*?color:\s*?([^";]+);\s*?font-family:\s*?([^";]+);\s*?font-size:\s*?([^";]+);?"([^>]*)>(.*)</span>$#siU', '[font param="${2}" color="${1}" size="${3}"]${5}[/font]');
    $array_html_preg_replace[] = array('#^<span style="\s*?color:\s*?([^";]+);\s*?font-size:\s*?([^";]+);?"([^>]*)>(.*)</span>$#siU', '[font color="${1}" size="${2}"]${4}[/font]');
    $array_html_preg_replace[] = array('#^<span style="\s*?color:\s*?([^";]+);\s*?font-family:\s*?([^";]+);?"([^>]*)>(.*)</span>$#siU', '[font param="${2}" color="${1}"]${4}[/font]');
    $array_html_preg_replace[] = array('#^<span style="\s*?color:\s*?([^";]+);?\s*?"([^>]*)>(.*)</span>$#siU', '[font color="${1}"]${3}[/font]');
    $array_html_preg_replace[] = array('#^<span style="\s*?font-family:\s*?([^";]+);\s*?font-size:\s*?([^";]+);?"([^>]*)>(.*)</span>$#siU', '[font param="${1}" size="${2}"]${4}[/font]');
    $array_html_preg_replace[] = array('#^<span style="\s*?font-size:\s*?([0-9\.]+\w\w);?\s*?"([^>]*)>(.*)</span>$#siU', '[font size="${1}"]${3}[/font]');
    $array_html_preg_replace[] = array('#^<span style="\s*?font-family:\s*?([^";]+);?\s*?"([^>]*)>(.*)</span>$#siU', '[font param="${1}"]${3}[/font]');
    $complex_equivs = array('<span class="comcode_highlight">(.*)</span>' => 'highlight', '<span class="comcode_bold">(.*)</span>' => 'b', '<span class="comcode_italic">(.*)</span>' => 'i', '<span class="comcode_underline">(.*)</span>' => 'u');
    foreach ($complex_equivs as $from => $to) {
        $array_html_preg_replace[] = array('#^' . $from . '$#siU', '[' . $to . ']${1}[/' . $to . ']');
    }
    $semihtml = array_html_preg_replace('span', $array_html_preg_replace, $semihtml);
    if (stripos($semihtml, '<div') !== false) {
        $_array_html_preg_replace = array();
        foreach ($array_html_preg_replace as $i => $x) {
            $_array_html_preg_replace[$i] = array();
            $_array_html_preg_replace[$i][0] = str_replace('span', 'div', $x[0]);
            $_array_html_preg_replace[$i][1] = '<div>' . $x[1] . '</div>';
        }
        $semihtml = array_html_preg_replace('div', $_array_html_preg_replace, $semihtml);
    }
    if (stripos($semihtml, '<p') !== false) {
        $_array_html_preg_replace = array();
        foreach ($array_html_preg_replace as $i => $x) {
            $_array_html_preg_replace[$i] = array();
            $_array_html_preg_replace[$i][0] = str_replace('div', 'p', $x[0]);
            $_array_html_preg_replace[$i][1] = '<p>' . $x[1] . '</p>';
        }
        $semihtml = array_html_preg_replace('p', $_array_html_preg_replace, $semihtml);
    }
    $array_html_preg_replace = array();
    $array_html_preg_replace[] = array('#^<font>(.*)</font>$#siU', '${1}');
    $array_html_preg_replace[] = array('#^<font ([^>]*)size="(\d+)"([^>]*)>(.*)</font>$#siU', '[font ${1}${3} size="${2}of"]${4}[/font]');
    $array_html_preg_replace[] = array('#^<font([^>]*)>(.*)</font>$#siU', '[font${1}]${2}[/font]');
    $semihtml = array_html_preg_replace('font', $array_html_preg_replace, $semihtml);
    $semihtml = preg_replace_callback('#(\[font [^\]]*color=")rgb\((\s*\d+\s*),(\s*\d+\s*),(\s*\d+\s*)\)("[^\]]*\])#', '_css_color_fixup', $semihtml);
    $semihtml = preg_replace_callback('#<a ([^>]*)href="([^"]*)"([^>]*)>#', '_a_tag_link_fixup', $semihtml);
    require_code('obfuscate');
    if (stripos($semihtml, '<a') !== false) {
        $array_html_preg_replace = array();
        $array_html_preg_replace[] = array('#^<a ([^>]*)href="mailto:(?-U) ?(?U)([^"]+)"([^>]*)>(.*)</a>$#siU', '[email="${2}"]${4}[/email]');
        $array_html_preg_replace[] = array('#^<a ([^>]*)href="' . preg_quote(mailto_obfuscated(), '#') . '([^"]+)"([^>]*)>(.*)</a>$#siU', '[email="${4}"]${2}[/email]');
        $array_html_preg_replace[] = array('#^<a ([^>]*)href="([^"]+)"([^>]*) rel="([^"]*)" target="([^"]*)"([^>]*)>(.*)</a>$#siU', '[url="${2}" rel="${4}" target="${5}"]${7}[/url]');
        $array_html_preg_replace[] = array('#^<a ([^>]*)href="([^"]+)"([^>]*) target="([^"]*)"([^>]*)>(.*)</a>$#siU', '[url="${2}" target="${4}"]${6}[/url]');
        $array_html_preg_replace[] = array('#^<a ([^>]*)href="([^"]+)"([^>]*) rel="([^"]*)"([^>]*)>(.*)</a>$#siU', '[url="${2}" rel="${4}"]${6}[/url]');
        $array_html_preg_replace[] = array('#^<a ([^>]*)href="([^"]+)"([^>]*)>(.*)</a>$#siU', '[url="${2}"]${4}[/url]');
        $semihtml = array_html_preg_replace('a', $array_html_preg_replace, $semihtml);
    }
    if (stripos($semihtml, '<p') !== false) {
        $array_html_preg_replace = array();
        $array_html_preg_replace[] = array('#^<p class="msoNormal">\s*(.*)\s*</p>$#siU', '${1}<br />');
        $array_html_preg_replace[] = array('#^<p align="(\w+)" class="msoNormal">\s*(.*)\s*</p>$#siU', '[align="${1}"]${2}[/align]');
        $array_html_preg_replace[] = array('#^<p class="msoNormal" style="margin:\s*\d+pt 0[\w;]*">\s*(.*)\s*</p>$#siU', '<br />${1}<br />'); // Cleanup from Word
        $array_html_preg_replace[] = array('#^<p class="msoNormal" style="margin:\s*0[\w;]* 0[\w;]* 0[\w;]*">\s*(.*)\s*</p>$#siU', '${1}<br />'); // Cleanup from Word
        $array_html_preg_replace[] = array('#^<p style="margin:\s*\d+pt 0[\w;]*">\s*(.*)\s*</p>$#siU', '<br />${1}<br />'); // Cleanup from Word
        $array_html_preg_replace[] = array('#^<p style="margin:\s*0[\w;]* 0[\w;]* 0[\w;]*">\s*(.*)\s*</p>$#siU', '${1}<br />'); // Cleanup from Word
        $array_html_preg_replace[] = array('#^<p class="Mso\w*" style="[^"]*">\s*(.*)\s*</p>$#siU', '<br />${1}<br />'); // Aggressive cleanup from Word (it's here last because we want the nicer matches to get a chance to work instead. It's a shame we need to do this, as we are throwing away potentially important styling (although actually the spans etc far above will have got most of this - we only match p level styling here)- but Word throws so much into a mix it's impossible to "remove the wheat from the chaff". People will need to put it back in using the WYSIWYG editor directly.
        $array_html_preg_replace[] = array('#^<p>\s*(.*)\s*</p>$#siU', '${1}<br /><br />');
        $array_html_preg_replace[] = array('#^<p align="(\w+)">\s*(.*)\s*</p>$#siU', '[align="${1}"]${2}[/align]');
        $semihtml = array_html_preg_replace('p', $array_html_preg_replace, $semihtml);
    }
    $array_html_preg_replace = array();
    $array_html_preg_replace[] = array('#^<div align="justify">(.*)</div>$#siU', '[align="justify"]${1}[/align]');
    $array_html_preg_replace[] = array('#^<div style="text-align:\s*?justify;?">(.*)</div>$#siU', '[align="justify"]${1}[/align]');
    $complex_equivs = array('<div align="right">(.*)</div>' => 'right', '<div align="left">(.*)</div>' => 'left', '<div align="center">(.*)</div>' => 'center', '<div style="text-align:\s*?right;?">(.*)</div>' => 'right', '<div style="text-align:\s*?left;?">(.*)</div>' => 'left', '<div style="text-align:\s*?center;*">(.*)</div>' => 'center');
    foreach ($complex_equivs as $from => $to) {
        $array_html_preg_replace[] = array('#^' . $from . '$#siU', '[' . $to . ']${1}[/' . $to . ']');
    }
    $array_html_preg_replace[] = array('#^<div style="margin-left:\s*?(\d+)px;?">(.*)</div>$#siU', '[indent="${1}"]${2}[/indent]');
    $array_html_preg_replace[] = array('#^<div class="([^"]+)">(.*)</div>$#siU', '[surround="${1}"]${2}[/surround]');
    $array_html_preg_replace[] = array('#^<div>(.*)</div>$#siU', '${1}<br />');
    if (stripos($semihtml, '<div') !== false) {
        $semihtml = array_html_preg_replace('div', $array_html_preg_replace, $semihtml);
    }
    if (stripos($semihtml, '<span') !== false) {
        $_array_html_preg_replace = array();
        foreach ($array_html_preg_replace as $i => $x) {
            $_array_html_preg_replace[$i] = array();
            $_array_html_preg_replace[$i][0] = str_replace('div', 'span', $x[0]);
            $_array_html_preg_replace[$i][1] = $x[1];
        }
        $semihtml = array_html_preg_replace('span', $_array_html_preg_replace, $semihtml);
    }
    if (stripos($semihtml, '<p') !== false) {
        $_array_html_preg_replace = array();
        foreach ($array_html_preg_replace as $i => $x) {
            $_array_html_preg_replace[$i] = array();
            $_array_html_preg_replace[$i][0] = str_replace('div', 'p', $x[0]);
            $_array_html_preg_replace[$i][1] = str_replace('<br />', '<br /><br />', $x[1]);
        }
        $semihtml = array_html_preg_replace('p', $_array_html_preg_replace, $semihtml);
    }
    $array_html_preg_replace = array();
    $array_html_preg_replace[] = array('#^<kbd>(.*)</kbd>$#siU', "[tt]\${1}[/tt]");
    $semihtml = array_html_preg_replace('kbd', $array_html_preg_replace, $semihtml);
    $array_html_preg_replace = array();
    $array_html_preg_replace[] = array('#^<ul dir="ltr">(.*)</ul>$#siU', '[list]' . '${1}[/list]');
    $array_html_preg_replace[] = array('#^<ul>(.*)</ul>$#siU', '[list]' . '${1}[/list]');
    $semihtml = array_html_preg_replace('ul', $array_html_preg_replace, $semihtml);
    $array_html_preg_replace = array();
    $array_html_preg_replace[] = array('#^<ol>(.*)</ol>$#siU', '[list="1"]' . '${1}[/list]');
    $array_html_preg_replace[] = array('#^<ol style="list-style-type:\s*?([^";]*);?">(.*)(</ol>|<ol />)$#siU', '[list="${1}"]' . '${2}[/list]');
    $array_html_preg_replace[] = array('#^<ol type="([^"]*)">(.*)</ol>$#siU', '[list="${1}"]' . '${2}[/list]');
    $semihtml = array_html_preg_replace('ol', $array_html_preg_replace, $semihtml);
    $array_html_preg_replace = array();
    if (strpos($semihtml, '[list') !== false) { // Because not all HTML list tags will convert, e.g. if has CSS class on it
        $array_html_preg_replace[] = array('#^<li>(.*)</li>$#siU', '[*]${1}[/*]' . "\n");
        $semihtml = array_html_preg_replace('li', $array_html_preg_replace, $semihtml);
    }
    $semihtml = str_replace('<strong class="comcode_bold">', '<strong>', $semihtml);
    $semihtml = str_replace('<em class="comcode_italic">', '<em>', $semihtml);
    $equivs = array('blockquote' => 'indent', 'code' => 'code', 'tt' => 'tt', 'sub' => 'sub', 'sup' => 'sup', 'center' => 'center', '!abbr' => 'abbr', '!acronym' => 'acronym', 'address' => 'address', 'dfn' => 'dfn', 'cite' => 'cite', 'strong' => 'b', 'b' => 'b', 'em' => 'i', 'i' => 'i', 'u' => 'u', 'strike' => 's', 'del' => 'del', 'ins' => 'ins');
    foreach ($equivs as $from => $to) {
        if (stripos($semihtml, '<' . $from) !== false) {
            $array_html_preg_replace = array();
            if ($from[0] == '!') {
                $from = substr($from, 1);
                $array_html_preg_replace[] = array('#^<' . $from . '([^>]*)>(.*)</' . $from . '>$#siU', '[' . $to . '${1}]${2}[/' . $to . ']');
            } else {
                $array_html_preg_replace[] = array('#^<' . $from . '>(.*)</' . $from . '>$#siU', '[' . $to . ']${1}[/' . $to . ']');
            }
            $semihtml = array_html_preg_replace($from, $array_html_preg_replace, $semihtml);
        }
    }

    if (stripos($semihtml, '[font') !== false) {
        // Fonts that set nothing
        $test = preg_replace('#\[font param="verdana,arial,helvetica,sans-serif"#', '', $semihtml);
        $test = preg_replace('#\[font="verdana,arial,helvetica,sans-serif"#', '', $test);

        if ((strpos($test, '[font=') === false) && (strpos($test, '[font param=') === false)) {
            $semihtml = comcode_preg_replace('font', '#^\[font( param)?="verdana,arial,helvetica,sans-serif"](.*)\[/font\]$#si', '${2}', $semihtml);
            $semihtml = str_replace(' param="verdana,arial,helvetica,sans-serif"', '', $semihtml);
            $semihtml = str_replace('="verdana,arial,helvetica,sans-serif"', '', $semihtml);
        }
    }

    // Our cleanup loop. These optimisations trickle-through, as they depend on each other. We keep looping until we've done all we can.
    $old_semihtml = '';
    $text_formatting_tags = array('b', 'i', 'u', 'tt', 'font', 'title', 'center', 'left', 'right', 'color');
    do {
        $old_semihtml = $semihtml;

        // Empty tags
        $semihtml = preg_replace('#\<(\w+)\>\</\1\>#', '', $semihtml);
        if (stripos($semihtml, '[font') !== false) {
            $semihtml = preg_replace('#\[font[^\]]*\]\[/font\]#', '', $semihtml);
        }
        if (stripos($semihtml, '[b') !== false) {
            $semihtml = preg_replace('#\[b[^\]]*\]\[/b\]#', '', $semihtml);
        }
        if (stripos($semihtml, '[i') !== false) {
            $semihtml = preg_replace('#\[i[^\]]*\]\[/i\]#', '', $semihtml);
        }

        // Canonical order to make sure we can find pointless nestings. Unfortunately we can only bubble out one level due to constraints in our regexp checking (we need to make sure we don't cross-tags, but we can't in a regexp unless we make sure we have no nesting at all)
        foreach ($text_formatting_tags as $i => $tag) {
            foreach ($text_formatting_tags as $j => $tag_2) {
                if ($i < $j) {
                    $semihtml = comcode_preg_replace($tag_2, '#^(\[' . $tag_2 . '( [^\]]*)?\])(\[' . $tag . '( [^\]]*\])?)([^\[\]]*)(\[/' . $tag . '[^\]]*\])(\[/' . $tag_2 . '[^\]]*\])$#si', '${3}${1}${5}${7}${6}', $semihtml);
                }
            }
        }

        // Cleanup nested fonts
        $semihtml = preg_replace('#<span[^<>]*></span>#siU', '', $semihtml);
        if (stripos($semihtml, '[font') !== false) {
            $semihtml = comcode_preg_replace('font', '#^\[font([^\]]*)\](\s*)\[font([^\]]*)\](.*)\[/font\](\s*)\[/font\]$#si', '[font${1}${3}]${2}${4}${5}[/font]', $semihtml);
            $semihtml = preg_replace('#\[font ([^\]]*)face="([^"]*)"([^\]]*)face="([^"]*)"([^\]]*)\]#si', '[font ${1}${3}${5} face="${4}"]', $semihtml);
            $semihtml = preg_replace('#\[font ([^\]]*)size="([^"]*)"([^\]]*)size="([^"]*)"([^\]]*)\]#si', '[font ${1}${3}${5} size="${4}"]', $semihtml); // This is imperfect (due to relative font sizes), but at least it encourages cleanup
            $semihtml = preg_replace('#\[font ([^\]]*)color="([^"]*)"([^\]]*)color="([^"]*)"([^\]]*)\]#si', '[font ${1}${3}${5} color="${4}"]', $semihtml);
            $semihtml = preg_replace('#\[font ([^\]]*)param="([^"]*)"([^\]]*)param="([^"]*)"([^\]]*)\]#si', '[font ${1}${3}${5} param="${4}"]', $semihtml);
            $semihtml = preg_replace('#(\[font.*)(?-U)\s+(?U)(.*\])#U', '${1} ${2}', $semihtml); // safe because no whitespace runs can be expected within a font tag
        }

        // Cleanup other nestings / close then reopen patterns
        foreach (array('b', 'i', 'u', 'tt', 'font size="[^"]*"') as $tag) {
            $tagx = (strpos($tag, ' ') !== false) ? substr($tag, 0, strpos($tag, ' ')) : $tag;

            if (stripos($semihtml, '[' . $tagx) !== false) {
                $semihtml = comcode_preg_replace($tagx, '#^(\[' . $tag . '\])(.*)\\1(.*)\[/' . $tagx . '\](.*)\[/' . $tagx . '\]$#si', '${1}${2}${3}${4}[/' . $tagx . ']', $semihtml);

                $semihtml = cms_preg_replace_safe('#(\[' . $tag . '\])([^\[\]]*)\[/' . $tagx . '\]((&nbsp;|</CDATA\_\_space>|\s)*)\\1#si', '${1}${2}${3}', $semihtml); // Only works in simple case, not when there are tags nested within first tag. Can't use comcode_preg_replace as we are joining two tags (i.e. not operating over single bind)
            }
        }

        // Cleanup lines filled with spaces/font-junk
        foreach ($text_formatting_tags as $tag) {
            if (stripos($semihtml, '[' . $tag) !== false) {
                $semihtml = cms_preg_replace_safe('#(\[' . $tag . '[^\]]*\])((&nbsp;|</CDATA\_\_space>|\s|<br\s*/>|\n)*)#i', '${2}${1}', $semihtml); // Tag starting unnecessarily early -> Move it back
                $semihtml = cms_preg_replace_safe('#((&nbsp;|</CDATA\_\_space>|\s|<br\s*/>|\n)*)(\[/' . $tag . '\])#i', '${3}${1}', $semihtml); // Tag ending unnecessarily late -> Move it back
                $semihtml = cms_preg_replace_safe('#\[' . $tag . '[^\]]*\]((&nbsp;|</CDATA\_\_space>|\s|<br\s*/>|\n)*)\[/' . $tag . '\]#i', '${1}', $semihtml); // Tag wrapping whitespace -> White space
            }
        }
        $semihtml = cms_preg_replace_safe('#(&nbsp;|</CDATA\_\_space>|\s)*<br\s*/>#i', '<br />', $semihtml); // Spaces on end of line -> (Remove)
    } while (cms_preg_replace_safe('#(\s|<br[^<>]*>|&nbsp;)#i', '', $semihtml) != cms_preg_replace_safe('#(\s|<br[^<>]*>|&nbsp;)#i', '', $old_semihtml));

    // Undone center tagging
    $semihtml = comcode_preg_replace('left', '#^\[left\]\[center\](.*)\[/center\]\[/left\]$#si', '[left]${1}[/left]', $semihtml);
    $semihtml = comcode_preg_replace('right', '#^\[right\]\[center\](.*)\[/center\]\[/right\]$#si', '[right]${1}[/right]', $semihtml);
    $semihtml = comcode_preg_replace('center', '#^\[center\]\[left\](.*)\[/left\]\[/center\]$#si', '[center]${1}[/center]', $semihtml);
    $semihtml = comcode_preg_replace('center', '#^\[center\]\[right\](.*)\[/right\]\[/center\]$#si', '[center]${1}[/center]', $semihtml);

    // Clean redundant CSS syntax (again)
    do {
        $old = $semihtml;
        $semihtml = preg_replace('# style="([^"]*); ?; ?+[^"]*#', ' style="$1;', $semihtml);
    }
    while ($old != $semihtml);
    $semihtml = str_replace(' style=""', '', $semihtml);

    // Clean some now-empty span/p/align tags
    $semihtml = preg_replace('#<span( style="[^"]*")?' . '>&nbsp;</span>#', ' ', $semihtml);
    $semihtml = preg_replace('#<p( style="[^"]*")?' . '>\s*(&nbsp;)?\s*</p>#', '<br /><br />', $semihtml);
    $semihtml = preg_replace('#\[align="\w+"\]\s*(&nbsp;)?\s*\[/align\]#', '', $semihtml);

    // Cleanup list Comcode (nice and pretty)
    $semihtml = cms_preg_replace_safe('#(&nbsp;|</CDATA\_\_space>|\s|<br\s*/>|\n)*\[/\*\](&nbsp;|</CDATA\_\_space>|\s|<br\s*/>|\n)*#', '[/*]', $semihtml);
    $semihtml = cms_preg_replace_safe('#(&nbsp;|</CDATA\_\_space>|\s|<br\s*/>|\n)*\[\*\](&nbsp;|</CDATA\_\_space>|\s|<br\s*/>|\n)*#', '[*]', $semihtml);
    $semihtml = cms_preg_replace_safe('#\[/\*\]([^\s])#', '[/*]<cmsbr />${1}', $semihtml);
    $semihtml = cms_preg_replace_safe('#\[list\]([^\s])#', '[list]<cmsbr />${1}', $semihtml);

    // Cleanup various blocks where we can afford a blank line
    global $BLOCK_TAGS;
    foreach (array_keys($BLOCK_TAGS) as $tag) {
        if (strpos($semihtml, '[' . $tag) !== false) {
            $semihtml = preg_replace('#( |</CDATA\_\_space>)*(\[' . $tag . '[\] ])#', '${2}', $semihtml);
            $semihtml = preg_replace('#\[/' . $tag . '\](?!\[/)(?!<br)#', '[/' . $tag . ']' . (($tag == 'title') ? '<cmsbr /><cmsbr />' : '<cmsbr />'), $semihtml);
        }
    }

    // Remove our CDATA protections
    $semihtml = str_replace('</CDATA__space>', ' ', $semihtml);
    $semihtml = str_replace('</CDATA__tab>', "\t", $semihtml);
    $semihtml = str_replace('</CDATA__nl>', "\n", $semihtml);
    $semihtml = str_replace('</CDATA__lf>', "\r", $semihtml);
    $semihtml = str_replace('</CDATA__amp>', '&', $semihtml);

    // Tempcode escaping
    /* No - people should be able to type this if they want
    $semihtml = str_replace('{+', '\{+', $semihtml);
    $semihtml = str_replace('{$', '\{$', $semihtml);
    $semihtml = str_replace('{!', '\{!', $semihtml);
    */

    $semihtml = str_replace('[ html', '[html', $semihtml);
    $semihtml = str_replace('[ semihtml', '[semihtml', $semihtml);
    $semihtml = str_replace('[ / html', '[/html', $semihtml);
    $semihtml = str_replace('[ / semihtml', '[/semihtml', $semihtml);

    // People without comcode_dangerous have further cleanups, that might lose some quality...
    if ((!has_privilege(get_member(), 'allow_html')) || ($force)) {
        $semihtml2 = $semihtml;

        if (stripos($semihtml2, '<table') !== false) {
            $array_html_preg_replace = array();
            $array_html_preg_replace[] = array('#^<table summary="([^"]*)"([^>]*)>(.*)</table>$#siU', "\n{| \${2}\${3}\n\n|}\n");
            $array_html_preg_replace[] = array('#^<table([^>]*)>(.*)</table>$#siU', "\n{|\n\${2}\n\n|}\n");
            $semihtml2 = array_html_preg_replace('table', $array_html_preg_replace, $semihtml2);
            $array_html_preg_replace = array();
            $array_html_preg_replace[] = array('#^<thead([^>]*)>(.*)</thead>$#siU', '${2}');
            $semihtml2 = array_html_preg_replace('thead', $array_html_preg_replace, $semihtml2);
            $array_html_preg_replace = array();
            $array_html_preg_replace[] = array('#^<colgroup([^>]*)>(.*)</colgroup>$#siU', '');
            $semihtml2 = array_html_preg_replace('colgroup', $array_html_preg_replace, $semihtml2);
            $array_html_preg_replace = array();
            $array_html_preg_replace[] = array('#^<tbody([^>]*)>(.*)</tbody>$#siU', '${2}');
            $semihtml2 = array_html_preg_replace('tbody', $array_html_preg_replace, $semihtml2);
            $array_html_preg_replace = array();
            $array_html_preg_replace[] = array('#^<tfoot([^>]*)>(.*)</tfoot>$#siU', '');
            $semihtml2 = array_html_preg_replace('tfoot', $array_html_preg_replace, $semihtml2);
            $array_html_preg_replace = array();
            $array_html_preg_replace[] = array('#^<tr([^>]*)>(.*)</tr>$#siU', "\n\n|-\n\${2}");
            $semihtml2 = array_html_preg_replace('tr', $array_html_preg_replace, $semihtml2);
            $semihtml2 = preg_replace("#\{\|(.*)\n+\t*\|-\n+#", "{|\${1}\n", $semihtml2);
            $array_html_preg_replace = array();
            $array_html_preg_replace[] = array('#^<th([^>]*)>(.*)</th>$#siU', "\n\n! \${2}");
            $semihtml2 = array_html_preg_replace('th', $array_html_preg_replace, $semihtml2);
            $array_html_preg_replace = array();
            $array_html_preg_replace[] = array('#^<td([^>]*)>(.*)</td>$#siU', "\n| \${2}");
            $semihtml2 = array_html_preg_replace('td', $array_html_preg_replace, $semihtml2);
        }
        if (stripos($semihtml2, '<span') !== false) {
            $array_html_preg_replace = array();
            $array_html_preg_replace[] = array('#^<span style="font-family: monospace;  font-size: 1.2em;">(.*)</span>$#siU', "[tt]\${1}[/tt]");
            $semihtml2 = array_html_preg_replace('span', $array_html_preg_replace, $semihtml2);
        }
        if (strpos($semihtml2, '[code') === false) {
            $array_html_preg_replace = array();
            $array_html_preg_replace[] = array('#^<pre[^>]*>(.*)</pre>$#siU', "[code]\${1}[/code]");
            $semihtml2 = array_html_preg_replace('pre', $array_html_preg_replace, $semihtml2);
        }
        if (stripos($semihtml, '<table') !== false) {
            $array_html_preg_replace = array();
            $array_html_preg_replace[] = array('#^<table([^>]*)>(.*)</table>$#siU', "<table class=\"bordered_table\">\${2}</table>");
            $semihtml = array_html_preg_replace('table', $array_html_preg_replace, $semihtml);
            $array_html_preg_replace = array();
            $array_html_preg_replace[] = array('#^<tr([^>]*)>(.*)</tr>$#siU', "<tr>\${2}</tr>");
            $semihtml = array_html_preg_replace('tr', $array_html_preg_replace, $semihtml);
            $array_html_preg_replace = array();
            $array_html_preg_replace[] = array('#^<th([^>]*)>(.*)</th>$#siU', "<th>\${2}</th>");
            $semihtml = array_html_preg_replace('th', $array_html_preg_replace, $semihtml);
            $array_html_preg_replace = array();
            $array_html_preg_replace[] = array('#^<td([^>]*)>(.*)</td>$#siU', "<td>\${2}</td>");
            $semihtml = array_html_preg_replace('td', $array_html_preg_replace, $semihtml);
        }
    } else {
        $semihtml2 = $semihtml;
    }

    // Cleanup impossible stuff in code tags
    foreach (array_keys($CODE_TAGS) as $tag) {
        $semihtml2 = comcode_preg_replace($tag, '#^(\[' . $tag . '\])([.\n]*)(\[/' . $tag . '\])$#i', array('_semihtml_to_comcode_wrap'), $semihtml2);
    }

    // These can only be used outside semihtml - so we do them in a copy of our output, and only use that copy if we find we are able to do a 100% Comcode conversion
    $semihtml2 = str_replace('<cmsbr />', "\n", $semihtml2);
    if (stripos($semihtml2, '<br') !== false) {
        $semihtml2 = str_replace('<br />', "\n", $semihtml2);
        $semihtml2 = str_replace('<br  />', "\n", $semihtml2);
        $semihtml2 = str_replace('<br>', "\n", $semihtml2);
    }
    if (stripos($semihtml2, '<hr') !== false) {
        $semihtml2 = str_replace('<hr width="100%" size="2" />', '<hr />', $semihtml2);
        $semihtml2 = str_replace('<hr size="2" width="100%" />', '<hr />', $semihtml2);
        $semihtml2 = str_replace('<hr width="100%" />', '<hr />', $semihtml2);
        $semihtml2 = str_replace("\n" . '<hr />', "\n---------------\n", $semihtml2);
        $semihtml2 = str_replace("\n" . '<hr>', "\n---------------\n", $semihtml2);
        $semihtml2 = preg_replace('#<hr\s*/>#', "\n---------------\n", $semihtml2);
        $semihtml2 = str_replace('<hr>', "\n---------------\n", $semihtml2);
    }

    // We transform any HTML in there to Comcode if we can
    if (stripos($semihtml2, '<img') !== false) {
        $emoticons = $GLOBALS['FORUM_DRIVER']->find_emoticons();
        foreach ($emoticons as $code => $imgcode) {
            if ($imgcode[0] == 'EMOTICON_IMG_CODE_THEMED') {
                $imgcode[1] = find_theme_image($imgcode[1], true);
                if ($imgcode[1] == '') {
                    continue; // Theme image gone missing
                }
            }
            $imgcode[1] = str_replace(get_base_url(), '', $imgcode[1]);

            $semihtml2 = preg_replace('#<img [^>]*src="[^"]*' . preg_quote(escape_html($imgcode[1]), '#') . '"[^>]*>([ \t])?[ \t]*#si', $code . '$1', $semihtml2);
        }

        if (stripos($semihtml2, '<img') !== false) {
            $semihtml2 = preg_replace_callback('#<img([^>]*) src="([^"]*)"([^>]*) />#siU', '_img_tag_fixup', $semihtml2);
            $semihtml2 = preg_replace_callback('#<img([^>]*) src="([^"]*)"([^>]*)>#siU', '_img_tag_fixup', $semihtml2);
        }
    }

    // Then, if there is no HTML left, we can avoid the 'semihtml' tag
    if (
        (strpos($semihtml2, '<') === false) && /* No remaining HTML tags */
        (strpos($semihtml2, '&#091;') === false) && (strpos($semihtml2, '&#123;') === false) /* No [ ] which will interfere with Comcode */
    ) {
        $semihtml2 = @html_entity_decode($semihtml2, ENT_NOQUOTES/*Quotes may interfere with Comcode if inside Comcode attributes, so leave as entities*/, get_charset());
        return $semihtml2;
    }

    // Oh well, we couldn't do a perfect conversion, so we'll have to use semihtml.

    if (stripos($semihtml, '<img') !== false) {
        $semihtml = preg_replace_callback('#<img([^>]*) src="([^"]*)"([^>]*) />#siU', '_img_tag_fixup', $semihtml);
        $semihtml = preg_replace_callback('#<img([^>]*) src="([^"]*)"([^>]*)>#siU', '_img_tag_fixup', $semihtml);
    }

    $semihtml = str_replace('<cmsbr />', "\n", $semihtml);

    // Make it look slightly reasonable first (to the reader of the Comcode)
    $semihtml = str_replace('<br  />', '<br />', $semihtml);
    $semihtml = str_replace('<br />', '<br />' . "\n", $semihtml);
    $semihtml = str_replace('</p>', '</p>' . "\n", $semihtml);
    $semihtml = str_replace('[/align]', '[/align]' . "\n", $semihtml);

    if (cms_trim($semihtml) == '') {
        return '';
    }

    return '[semihtml]' . /*apply_emoticons can cause problems inside Comcode tags*/($semihtml) . '[/semihtml]';
}

/**
 * preg_replace callback to strip HTML tags from inside a Comcode tag, except formatting ones that we'll convert to white-space.
 *
 * @param  array $matches Matches
 * @return string Result
 */
function comcode_strip_html_tags($matches)
{
    return $matches[1] . strip_tags($matches[2], '<p><br><div><CDATA__space><CDATA__tab><CDATA__nl><CDATA__lf><CDATA__amp>') . $matches[3];
}

/**
 * Do a regular expression match, locked correctly to single Comcode elements. This is necessary to make sure nesting is handled correctly, which regular expressions cannot do on their own.
 * This is a good test case:
 * exit(comcode_preg_replace('test','#\[test\](.*)\[/test\]#','>${1}<','[test]x[test a]y[/test]z[/test]'));
 *
 * @param  string $element The element name to replace over
 * @param  string $pattern Pattern
 * @param  mixed $replacement Replacement (string or single element array specifying a function name)
 * @param  string $semihtml Haystack
 * @return string Result
 */
function comcode_preg_replace($element, $pattern, $replacement, $semihtml)
{
    // Quick exit, for efficiency
    if (strpos($semihtml, '[' . $element) === false) {
        return $semihtml;
    }

    $old_semihtml = '';
    do {
        $old_semihtml = $semihtml;

        $matches = array();
        $count = preg_match_all('#\[' . $element . '[\s\]]#', $semihtml, $matches, PREG_OFFSET_CAPTURE);
        $starts = array();
        for ($i = 0; $i < $count; $i++) {
            $starts[] = $matches[0][$i][1];
        }
        $count = preg_match_all('#\[/' . $element . '[\s\]]#', $semihtml, $matches, PREG_OFFSET_CAPTURE);
        $ends = array();
        $lengths = array();
        for ($i = 0; $i < $count; $i++) {
            $ends[] = $matches[0][$i][1];
            $lengths[] = strlen($matches[0][$i][0]);
        }
        foreach ($starts as $start) {
            foreach ($ends as $i => $end) {
                if ($end < $start) {
                    continue;
                }
                $segment = substr($semihtml, $start, $end + $lengths[$i] - $start);
                if (substr_count($segment, '[' . $element . ' ') + substr_count($segment, '[' . $element . ']') == substr_count($segment, '[/' . $element . ']')) {
                    $before = substr($semihtml, 0, $start);
                    $after = substr($semihtml, $end + $lengths[$i]);
                    if (is_array($replacement)) {
                        $subbed = cms_preg_replace_callback_safe($pattern, $replacement[0], $segment);
                    } else {
                        $subbed = cms_preg_replace_safe($pattern, $replacement, $segment);
                    }
                    $semihtml = $before . $subbed . $after;

                    if ($semihtml != $old_semihtml) {
                        break 2;
                    }
                    break; // Ok, well at least we know we found our tag bound, so no more need to search
                }
            }
        }
    } while (cms_preg_replace_safe('#(\s|<br[^<>]*>|&nbsp;)#i', '', $semihtml) != cms_preg_replace_safe('#(\s|<br[^<>]*>|&nbsp;)#i', '', $old_semihtml));

    return $semihtml;
}

/**
 * Do some regular expression matches, locked correctly to single HTML elements. This is necessary to make sure nesting is handled correctly, which regular expressions cannot do on their own.
 * It is case-sensitive for performance reasons. But everyone uses lower-case tags for a long time now. Also assumes no tabs within tag definition.
 *
 * @param  string $element The element name to replace over
 * @param  array $array A list of pairs: Pattern, Replacement
 * @param  string $semihtml Haystack
 * @return string Result
 */
function array_html_preg_replace($element, $array, $semihtml)
{
    // Quick exit, for efficiency
    if (strpos($semihtml, '<' . $element) === false) {
        return $semihtml;
    }

    // See if we have no nesting (no nesting --> $easy_replace)
    $easy_replace = true;
    $on_closer = true;
    $pos = 0;
    do {
        $pos_opener_1 = strpos($semihtml, '<' . $element . '>', $pos);
        $pos_opener_2 = strpos($semihtml, '<' . $element . ' ', $pos);
        $pos_opener = ($pos_opener_1 !== false && ($pos_opener_2 === false || $pos_opener_1 < $pos_opener_2)) ? $pos_opener_1 : $pos_opener_2;
        if ($pos_opener === false) {
            break;
        }

        if ($pos == 0) { // First iteration is just to find first opener
            $pos = $pos_opener + 1;
            continue;
        }

        $pos_closer_1 = strpos($semihtml, '</' . $element . '>', $pos);
        $pos_closer_2 = strpos($semihtml, '</' . $element . ' ', $pos);
        $pos_closer = ($pos_closer_1 !== false && ($pos_closer_2 === false || $pos_closer_1 < $pos_closer_2)) ? $pos_closer_1 : $pos_closer_2;
        if ($pos_closer === false) {
            break;
        }

        if ($pos_opener < $pos_closer) {
            $easy_replace = false;
            break;
        }

        $pos = $pos_opener + 1;
    } while ($pos !== false);

    // Short way
    if ($easy_replace) {
        foreach ($array as $temp) {
            list($pattern, $replacement) = $temp;
            $semihtml = cms_preg_replace_safe(str_replace('$#', '#', str_replace('#^', '#', $pattern)), $replacement, $semihtml);
        }
        return $semihtml;
    }

    // Long way
    $old_semihtml = '';
    do {
        $old_semihtml = $semihtml;

        // Find offset of openers and closers
        $matches = array();
        $count = preg_match_all('#<(/?)' . $element . '[ >]#', $semihtml, $matches, PREG_OFFSET_CAPTURE);
        $tags = array();
        for ($i = 0; $i < $count; $i++) {
            $is_closer = ($matches[1][$i][0] == '/');
            $tags[] = array(
                $is_closer ? -1 : 1, // Balancer
                $matches[0][$i][1], // Offset
                strlen($matches[0][$i][0]), // Length
            );
        }
        $num_tags = count($tags);
        foreach ($array as $index => $temp) {
            list($pattern, $replacement) = $temp;
            foreach ($tags as $i => $tag) {
                if ($tag[0] == 1) {
                    $start = $tag[1];

                    // Find the matching end position
                    $end = null;
                    $balance = 0;
                    for ($j = $i ; $j < $num_tags; $j++) {
                        $balance += $tags[$j][0];
                        if ($balance == 0) {
                            $end = $tags[$j][1];
                            $length = $tags[$j][2];
                            break;
                        }
                    }
                    if ($end === null) {
                        break;
                    }

                    // Process segment
                    $segment = substr($semihtml, $start, $end + $length - $start);
                    $before = substr($semihtml, 0, $start);
                    $after = substr($semihtml, $end + $length);
                    $subbed = cms_preg_replace_safe($pattern . 'A', $replacement, $segment);
                    $semihtml = $before . $subbed . $after;
                    if ($semihtml != $old_semihtml) {
                        break 2; // We need to start again now as the offsets have all changed
                    }
                }
            }
            unset($array[$index]); // If we are going to recurse, we don't want extra work -- let's record that this one completed
        }
    } while (cms_preg_replace_safe('#(\s|<br[^<>]*>|&nbsp;)#i', '', $semihtml) != cms_preg_replace_safe('#(\s|<br[^<>]*>|&nbsp;)#i', '', $old_semihtml));

    return $semihtml;
}
