NYCPHP Meetup

NYPHP.org

[nycphp-talk] Converting the pesky MS Word quotes and other characters

Daniel Convissor danielc at analysisandsolutions.com
Wed Oct 29 23:55:26 EDT 2008


<?php
/**
 * Gets rid of stupid quotes, etc.
 *
 * This uses a convoluted preg_replace() approach, rather than
 * str_replace(), because Chrome (the REAL Chrome, as in Firefox 
 * applications, not Google's usurpation) translates the characters.
 *
 * If you need to see what characters are coming in, uncomment
 * the debug call to analyze_string_polaris() that exists at the top
 * of this method.
 *
 * @see analyze_string_polaris()
 */
function filter_fancy_characters_polaris($in) {
    static $search, $replace;

    // echo analyze_string_polaris($in);

    if (!isset($search)) {
        $search = array(
            '/\x96/',
            '/\xE2\x80\x93/',
            '/\x97/',
            '/\xE2\x80\x94/',
            '/\x91/',
            '/\xE2\x80\x98/',
            '/\x92/',
            '/\xE2\x80\x99/',
            '/\x93/',
            '/\xE2\x80\x9C/',
            '/\x94/',
            '/\xE2\x80\x9D/',
            '/\x85/',
            '/\xE2\x80\xA6/',
            '/\x95/',
            '/\xE2\x80\xA2/',
            '/\x09/',

            // The order of these is very important.
            '/\xC2\xBC/',
            '/\xBC/',
            '/\xC2\xBD/',
            '/\xBD/',
            '/\xC2\xBE/',
            '/\xBE/',
        );

        $replace = array(
            '-',
            '-',
            '--',
            '--',
            "'",
            "'",
            "'",
            "'",
            '"',
            '"',
            '"',
            '"',
            '...',
            '...',
            '*',
            '*',
            ' ',

            '1/4',
            '1/4',
            '1/2',
            '1/2',
            '3/4',
            '3/4',
        );
    }
    return preg_replace($search, $replace, $in);
}

/**
 * Returns the hex, oct and ord numbers of characters found in a string;
 * makes debugging odd user input much easier
 *
 * @see filter_fancy_characters_polaris()
 */
function analyze_string_polaris($in) {
    $out = '';
    for ($i = 0, $len = strlen($in); $i < $len; $i++) {
        $out .= '  Chr:' . $in[$i];
        $out .= '  Hex:' . dechex(ord($in[$i]));
        $out .= '  Oct:' . decoct(ord($in[$i]));
        $out .= '  Ord:' . ord($in[$i]);
        $out .= "\n-------\n";
    }
    return $out;
}
?>

-- 
 T H E   A N A L Y S I S   A N D   S O L U T I O N S   C O M P A N Y
            data intensive web and database programming
                http://www.AnalysisAndSolutions.com/
 4015 7th Ave #4, Brooklyn NY 11232  v: 718-854-0335 f: 718-854-0409



More information about the talk mailing list