/* Functions for the cleanerhtml plugin */

function unifyFonts(html) {
    // replace color with black.

    var re = new RegExp("[^a-z-]color:[ ]?([\#a-z0-9\(\), ]+)[;]?", 'gi');
    html = html.replace(re, 'color: #000000;');
    
    var re = new RegExp("font-family:[ ]?[']?[a-z 0-9]+[']?[;]?", 'gi');
    html = html.replace(re, 'font-family: Arial,Helvetica,sans-serif;');
    
    var re = new RegExp("font-size:[ ]?([a-z 0-9\.]+)[;]?", 'gi');
    html = html.replace(re, 'font-size: 1em;');

    //    @todo dry this up..
    var removeTags = new Array(
        'strong', 'em', 'i', 'u'
    );

    for (i=0; i<removeTags.length; i++) {
    // remove parameter-less entities (<i>)
    var re = new RegExp('<' + removeTags[i] + ">", 'gi');
        //console.log(re);
        html = html.replace(re, '');
    
    // remove entities with params (<i style="asdf">)
    // this avoids problems with <i*> removing <img*> also.
        var re = new RegExp('<' + removeTags[i] + " [^>]*>", 'gi');
        //console.log(re);
        html = html.replace(re, '');

        var re = new RegExp('<\/' + removeTags[i] + "[^>]*>", 'gi');
        //console.log(re);
        html = html.replace(re, '');
    }

    return html;
}

function cleanFonts(html, tryHard) {
    // custom stuff for eschool.
    //console.log("hitting eschool stuff");
    //@todo pull this out into options that can be passed.
    
    var removeTags = new Array(
        'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'font', 'pre', 'tt'
    );

    if (tryHard) {
        removeTags.push('strong');
        removeTags.push('em');
        removeTags.push('u');
        removeTags.push('i');
        removeTags.push('span');
        removeTags.push('div');
    
    var removeAttributes = new Array(
    'style', 'class', 'bgcolor', 'border'
    );
    
    for (i=0; i<removeAttributes.length; i++) {
    // twice for ' or "
    var re = new RegExp(removeAttributes[i] + '[ ]?=[ ]?["][^"]*["]', 'gi');
    html = html.replace(re, '');
    var re = new RegExp(removeAttributes[i] + '[ ]?=[ ]?[\'][^"]*[\']', 'gi');
    html = html.replace(re, '');
    }
    
    // remove various line breaks.
    var re = new RegExp("<p[ ]?> </p>\n", 'gi');
    html = html.replace(re, ' ');

    var re = new RegExp("<p[ ]?> </p>\n", 'gi');
    html = html.replace(re, ' ');
    
    var re = new RegExp('<p[ ]?>&nbsp;</p>', 'gi');
    html = html.replace(re, ' ');
    
    var re = new RegExp('<p[ ]?> </p>', 'gi');
    html = html.replace(re, ' ');
    }

    for (i=0; i<removeTags.length; i++) {
        //console.log("removing " + removeTags[i]);
    
        // remove parameter-less entities (<i>)
        var re = new RegExp('<' + removeTags[i] + ">", 'gi');
        //console.log(re);
        html = html.replace(re, '');
    
        // remove entities with params (<i style="asdf">)
        // this avoids problems with <i*> removing <img*> also.
        var re = new RegExp('<' + removeTags[i] + " [^>]*>", 'gi');
        //console.log(re);
        html = html.replace(re, '');

        var re = new RegExp('<\/' + removeTags[i] + "[^>]*>", 'gi');
        //console.log(re);
        html = html.replace(re, '');
    }

    return html;
}

function cleanWord( html, bIgnoreFont, bRemoveStyles, bKeepStructure ) {
    html = html.replace(/<o:p>\s*<\/o:p>/g, '') ;
    html = html.replace(/<o:p>[\s\S]*?<\/o:p>/g, '&nbsp;') ;

    // Remove mso-xxx styles.
    html = html.replace( /\s*mso-[^:]+:[^;"]+;?/gi, '' ) ;

    // Remove margin styles.
    html = html.replace( /\s*MARGIN: 0cm 0cm 0pt\s*;/gi, '' ) ;
    html = html.replace( /\s*MARGIN: 0cm 0cm 0pt\s*"/gi, "\"" ) ;

    html = html.replace( /\s*TEXT-INDENT: 0cm\s*;/gi, '' ) ;
    html = html.replace( /\s*TEXT-INDENT: 0cm\s*"/gi, "\"" ) ;

    html = html.replace( /\s*TEXT-ALIGN: [^\s;]+;?"/gi, "\"" ) ;

    html = html.replace( /\s*PAGE-BREAK-BEFORE: [^\s;]+;?"/gi, "\"" ) ;

    html = html.replace( /\s*FONT-VARIANT: [^\s;]+;?"/gi, "\"" ) ;

    html = html.replace( /\s*tab-stops:[^;"]*;?/gi, '' ) ;
    html = html.replace( /\s*tab-stops:[^"]*/gi, '' ) ;

    // Remove FONT face attributes.
    if ( bIgnoreFont )
    {
        html = html.replace( /\s*face="[^"]*"/gi, '' ) ;
        html = html.replace( /\s*face=[^ >]*/gi, '' ) ;

        html = html.replace( /\s*FONT-FAMILY:[^;"]*;?/gi, '' ) ;
    }

    // Remove Class attributes
    html = html.replace(/<(\w[^>]*) class=([^ |>]*)([^>]*)/gi, "<$1$3") ;

    // Remove styles.
    if ( bRemoveStyles )
        html = html.replace( /<(\w[^>]*) style="([^\"]*)"([^>]*)/gi, "<$1$3" ) ;

    // Remove style, meta and link tags
    html = html.replace( /<STYLE[^>]*>[\s\S]*?<\/STYLE[^>]*>/gi, '' ) ;
    html = html.replace( /<(?:META|LINK)[^>]*>\s*/gi, '' ) ;

    // Remove empty styles.
    html =  html.replace( /\s*style="\s*"/gi, '' ) ;

    html = html.replace( /<SPAN\s*[^>]*>\s*&nbsp;\s*<\/SPAN>/gi, ' ' ) ;

    html = html.replace( /<SPAN\s*[^>]*><\/SPAN>/gi, '' ) ;

    // Remove Lang attributes
    html = html.replace(/<(\w[^>]*) lang=([^ |>]*)([^>]*)/gi, "<$1$3") ;

    html = html.replace( /<SPAN\s*>([\s\S]*?)<\/SPAN>/gi, '$1' ) ;

    html = html.replace( /<FONT\s*>([\s\S]*?)<\/FONT>/gi, '$1' ) ;

    // Remove XML elements and declarations
    html = html.replace(/<\\?\?xml[^>]*>/gi, '' ) ;

    // Remove w: tags with contents.
    html = html.replace( /<w:[^>]*>[\s\S]*?<\/w:[^>]*>/gi, '' ) ;

    // Remove Tags with XML namespace declarations: <o:p><\/o:p>
    html = html.replace(/<\/?\w+:[^>]*>/gi, '' ) ;

    // Remove comments [SF BUG-1481861].
    html = html.replace(/<\!--[\s\S]*?-->/g, '' ) ;

    html = html.replace( /<(U|I|STRIKE)>&nbsp;<\/\1>/g, '&nbsp;' ) ;

    html = html.replace( /<H\d>\s*<\/H\d>/gi, '' ) ;

    // Remove "display:none" tags.
    html = html.replace( /<(\w+)[^>]*\sstyle="[^"]*DISPLAY\s?:\s?none[\s\S]*?<\/\1>/ig, '' ) ;

    // Remove language tags
    html = html.replace( /<(\w[^>]*) language=([^ |>]*)([^>]*)/gi, "<$1$3") ;

    // Remove onmouseover and onmouseout events (from MS Word comments effect)
    html = html.replace( /<(\w[^>]*) onmouseover="([^\"]*)"([^>]*)/gi, "<$1$3") ;
    html = html.replace( /<(\w[^>]*) onmouseout="([^\"]*)"([^>]*)/gi, "<$1$3") ;

    if ( bKeepStructure )
    {
        // The original <Hn> tag send from Word is something like this: <Hn style="margin-top:0px;margin-bottom:0px">
        html = html.replace( /<H(\d)([^>]*)>/gi, '<h$1>' ) ;

        // Word likes to insert extra <font> tags, when using MSIE. (Wierd).
        html = html.replace( /<(H\d)><FONT[^>]*>([\s\S]*?)<\/FONT><\/\1>/gi, '<$1>$2<\/$1>' );
        html = html.replace( /<(H\d)><EM>([\s\S]*?)<\/EM><\/\1>/gi, '<$1>$2<\/$1>' );
    }
    else
    {
        html = html.replace( /<H1([^>]*)>/gi, '<div$1><b><font size="6">' ) ;
        html = html.replace( /<H2([^>]*)>/gi, '<div$1><b><font size="5">' ) ;
        html = html.replace( /<H3([^>]*)>/gi, '<div$1><b><font size="4">' ) ;
        html = html.replace( /<H4([^>]*)>/gi, '<div$1><b><font size="3">' ) ;
        html = html.replace( /<H5([^>]*)>/gi, '<div$1><b><font size="2">' ) ;
        html = html.replace( /<H6([^>]*)>/gi, '<div$1><b><font size="1">' ) ;

        html = html.replace( /<\/H\d>/gi, '<\/font><\/b><\/div>' ) ;

        // Transform <P> to <DIV>
        var re = new RegExp( '(<P)([^>]*>[\\s\\S]*?)(<\/P>)', 'gi' ) ;    // Different because of a IE 5.0 error
        html = html.replace( re, '<div$2<\/div>' ) ;

        // Remove empty tags (three times, just to be sure).
        // This also removes any empty anchor
        html = html.replace( /<([^\s>]+)(\s[^>]*)?>\s*<\/\1>/g, '' ) ;
        html = html.replace( /<([^\s>]+)(\s[^>]*)?>\s*<\/\1>/g, '' ) ;
        html = html.replace( /<([^\s>]+)(\s[^>]*)?>\s*<\/\1>/g, '' ) ;
    }
    
    // regex style stuff to remove.
    var styleRegex = new Array(
            'z-index:[ ]?[0-9][;]?',
            'text-indent:[ ]?-[0-9a-z\.]+[;]?',
            'margin: 0in 0in 0pt [0-9a-z\. ]+[;]?',
            'margin: 0in 0in 0pt'
    );
    
    for (i=0; i<styleRegex.length; i++) {        
        var re = new RegExp('(style[ ]?=[ ]?[\'"])([^\'"]+)?(' + styleRegex[i] + ')(([^"\']+)?")', 'gi');
        //console.log(re);
        
        html = html.replace(re, "$1$2$4$5");
    }

    return html ;
}

function cleanImages(html, courseId, baseURL) {
    // search for file.php/[0-9]./
    // replace the [0-9]. with the real course id.

    // replace all production images and links.
    var re = new RegExp('src[ ]?=[ ]?([\'"])(http://moodle.eschoolconsultants.com/)(file.php/[0-9]+/)', 'gi');
    html = html.replace(re, 'src=$1' + baseURL + 'file.php/' + courseId + '/');
    
    var re = new RegExp('href[ ]?=[ ]?([\'"])(http://moodle.eschoolconsultants.com/)(file.php/[0-9]+/)', 'gi');
    html = html.replace(re, 'href=$1' + baseURL + 'file.php/' + courseId + '/');
    
    // replace all incorrect course ids.
    var re = new RegExp('/file.php/[0-9]+/', 'gi');
    html = html.replace(re, '/file.php/' + courseId + '/');

    return html;
}

