Roxen.git / server / modules / tags / html_wash.pike

version» Context lines:

Roxen.git/server/modules/tags/html_wash.pike:1:   // This is a roxen module. Copyright © 2000 - 2009, Roxen IS.   //      #include <module.h>   inherit "module";    - constant cvs_version = "$Id: html_wash.pike,v 1.36 2009/11/20 00:17:20 mast Exp $"; + constant cvs_version = "$Id$";   constant thread_safe = 1;   constant module_type = MODULE_TAG;   constant module_name = "Tags: HTML washer";   constant module_doc =   #"<p>This module provides a &lt;wash-html&gt; tag that is perhaps most   useful for turning user freetext input from a form into HTML   intelligently; perhaps turning sections separated by more than one   newline into &lt;p&gt;paragraphs&lt;/p&gt;, filtering out or   explicitly allowing some HTML tags in the input</p>   
Roxen.git/server/modules/tags/html_wash.pike:77:    if(keep_attrs)    args &= (keep_attrs[tag] || ({ }));       return ({ replace(RXML.t_xml->format_tag(tag, args, 0, (close_tags?0:    RXML.FLAG_COMPAT_PARSE|    RXML.FLAG_EMPTY_ELEMENT)),    ({ "<",">" }), ({ "\0[","\0]" }) ) });    }       string filter_body(string s, array keep_tags, array keep_containers, -  string close_tags, string keep_attributes) +  string close_tags, string keep_attributes, string remove_unwanted_tags)    {    // Replace < and > with \1 and \2 in stead of quoting with &lt; and &gt; to    // be able regexp match on single characters.    // \0 is used to keep allowed tags.    s -= "\0";    s -= "\1";    s -= "\2";       mapping keep_attrs;    if(keep_attributes)
Roxen.git/server/modules/tags/html_wash.pike:107:    Parser.HTML parser = Parser.HTML();    parser->case_insensitive_tag(1);    parser->set_extra(close_tags, keep_attrs);       foreach(keep_tags, string tag)    parser->add_tag(tag, safe_tag);       foreach(keep_containers, string container)    parser->add_container(container, safe_container);    +  if(remove_unwanted_tags) +  parser->_set_tag_callback( lambda(Parser.HTML p, string str, mixed... extra) { +  return ""; +  }); +     return replace(parser->finish(s)->read(),    ({ "<", ">", "&", "\0[", "\0]" }),    ({ "\1", "\2", "&amp;", "<", ">" }));    }    -  string linkify(string s, string|void target) +  string linkify(string s, void|mapping attrs)    {    string fix_link(string l)    {    if (has_prefix(l, "http://") ||    has_prefix(l, "https://") ||    has_prefix(l, "ftp://") ||    has_prefix(l, "mailto:"))    return l;       if (has_prefix(l, "ftp."))    return "ftp://" + l;       return "http://"+l;    };    -  Parser.HTML parser = Parser.HTML(); +  string attrs_string = "";    -  +  if (attrs && sizeof(attrs)) { +  attrs_string = Roxen.make_tag_attributes(attrs); +  } +  +  Parser.HTML parser = Parser.HTML();    parser->add_container("a", lambda(Parser.HTML p, mapping args)    { return ({ p->current() }); });    parser->_set_data_callback(    lambda(Parser.HTML p, string data)    { return ({ utf8_to_string(link_regexp->    replace(string_to_utf8(data), lambda(string link)    {    link = fix_link(link); -  return "<a href='"+link+"'"+(target?" "+Roxen.make_tag_attributes((["target":target])):"")+">"+ -  link+"</a>"; +  return sprintf("<a href=\"%s\"%s>%s</a>", +  link, attrs_string, link);    }) ) }); });       string res = parser->finish(s)->read();    parser = 0; // Avoid trampoline garbage.    return res;    }       string remove_illegal_chars(string s)    {    string result = "";
Roxen.git/server/modules/tags/html_wash.pike:192:    result = unparagraphify(result);       if(args["unlinkify"])    result = unlinkify(result);       if(!args["keep-all"])    result = filter_body(result,    parse_arg_array(args["keep-tags"]),    parse_arg_array(args["keep-containers"]),    args["close-tags"], -  args["keep-attributes"]); +  args["keep-attributes"], +  args["remove-unwanted-tags"]);       if(args->paragraphify)    result = paragraphify(result);    -  if(args["linkify"]) -  result = linkify(result, args["link-target"]); +  if(args["linkify"]) { +  mapping attrs = ([]);    -  +  if (args["link-target"]) { +  attrs->target = args["link-target"]; +  } +  +  if (args["link-rel"]) { +  attrs->rel = args["link-rel"]; +  } +  +  result = linkify(result, attrs); +  } +     if (!args["keep-all"])    result = replace(result, ({ "\1", "\2" }), ({ "&lt;", "&gt;" }));       if(args["remove-illegal-xml-chars"])    result = remove_illegal_chars(result);       return 0;    }    }   
Roxen.git/server/modules/tags/html_wash.pike:221:    {    req_arg_types = ([ ]);    opt_arg_types = ([ "keep-all":RXML.t_text(RXML.PXml),    "keep-tags":RXML.t_text(RXML.PXml),    "keep-containers":RXML.t_text(RXML.PXml),    "keep-attributes":RXML.t_text(RXML.PXml),    "paragraphify":RXML.t_text(RXML.PXml),    "unparagraphify":RXML.t_text(RXML.PXml),    "linkify":RXML.t_text(RXML.PXml),    "link-target":RXML.t_text(RXML.PXml), +  "link-rel":RXML.t_text(RXML.PXml),    "unlinkify":RXML.t_text(RXML.PXml), -  "close-tags":RXML.t_text(RXML.PXml) ]); +  "close-tags":RXML.t_text(RXML.PXml), +  "remove-unwanted-tags":RXML.t_text(RXML.PXml) ]);      #define VALID_CHARS "[^ \t\n\r<>\"'`(){}|\1\2]"    link_regexp =    Regexp("(((http)|(https)|(ftp))://(" VALID_CHARS "+)(\\." VALID_CHARS "+)+)|"    "(((www)|(ftp))(\\." VALID_CHARS "+)+)");    }   }      // --------------------- Documentation -----------------------   
Roxen.git/server/modules/tags/html_wash.pike:310: Inside #if defined(manual)
     <ex><wash-html keep-containers=\"a,font\" keep-attributes=\"a:href\">    <a href=\"http://docs.roxen.com\">Roxen docs</a>    <font style=\"color:red;\">Text</font>   </wash-html></ex>      <p>Only the href attribute for the a tag is kept.</p>      </attr>    + <attr name='remove-unwanted-tags'><p> +  Removes tags not listed in keep-tags or keep-containers instead of html escaping them. +  Container content is preserved. + </p> +  + <ex><wash-html keep-containers=\"div\"> +  <span style=\"color: blue;\">span content</span><div style=\"color: red;\">div content</div> + </wash-html></ex> + <ex><wash-html remove-unwanted-tags=\"yes\" keep-containers=\"div\"> +  <span style=\"color: blue;\">span content</span><div style=\"color: red;\">div content</div> + </wash-html></ex> + </attr> +    <attr name='linkify'><p>    Makes text that looks like it might be useful as a link, e g    http://www.roxen.com/, into a link. Text that starts with    \"http://\", \"https://\", \"ftp://\", \"www.\" or \"http.\" will be    converted to a clickable link with the text as the link label.</p>      <ex><wash-html linkify='a' keep-containers='a' keep-tags='br'>    <a href=\"http://docs.roxen.com\">Roxen docs</a><br />    http://pike.roxen.com<br />    www.roxen.com   </wash-html></ex>   </attr>      <attr name='link-target'><p>    If the linkify attribute is used, set the link target to this.</p>   </attr>    -  + <attr name='link-rel'><p> +  If the linkify attribute is used, set the link attribute \"rel\" to this.</p> + </attr> +    <attr name='unlinkify'><p>    Undo a linkify-conversion. Only the links that has the same label as    address will be converted to plain text.</p>      <ex><wash-html unlinkify='1' keep-tags='br' keep-containers='a'>    <a href=\"http://www.roxen.com\">http://www.roxen.com</a><br />    <a href=\"http://www.roxen.com\">Roxen IS</a>   </wash-html></ex>   </attr>