HTMLUtils#

NAME#

Kernel::System::HTMLUtils - creating and modifying html strings

DESCRIPTION#

A module for creating and modifying html strings.

PUBLIC INTERFACE#

new()#

Don’t use the constructor directly, use the ObjectManager instead:

my $HTMLUtilsObject = $Kernel::OM->Get('Kernel::System::HTMLUtils');

ToAscii()#

convert an HTML string to an ASCII string

my $Ascii = $HTMLUtilsObject->ToAscii( String => $String );

ToHTML()#

convert an ASCII string to an HTML string

my $HTMLString = $HTMLUtilsObject->ToHTML(
    String             => $String,
    ReplaceDoubleSpace => 0,        # replace    with "  ", optional 1 or 0 (defaults to 1)
);

DocumentComplete()#

check and e. g. add <html> and <body> tags to given html string

my $HTMLDocument = $HTMLUtilsObject->DocumentComplete(
    String  => $String,
    Charset => $Charset,
);

DocumentStrip()#

remove html document tags from string

my $HTMLString = $HTMLUtilsObject->DocumentStrip(
    String  => $String,
);

DocumentCleanup()#

perform some sanity checks on HTML content.

-  Replace MS Word 12 <p|div> with class "MsoNormal" by using <br/> because
   it's not used as <p><div> (margin:0cm; margin-bottom:.0001pt;).

-  Replace <blockquote> by using
   "<div style="border:none;border-left:solid blue 1.5pt;padding:0cm 0cm 0cm 4.0pt" type="cite">"
   because of cross mail client and browser compatibility.

-  If there is no HTML doctype present, inject the HTML5 doctype, because it is compatible with HTML4
   and causes the browsers to render the content in standards mode, which is safer.

   $HTMLBody = $HTMLUtilsObject->DocumentCleanup(
       String => $HTMLBody,
   );

TruncateBodyQuote()#

Strips document content to the limited number of lines.

$Body = $HTMLUtilsObject->TruncateBodyQuote(
    Body       => $Body,
    Limit      => 10000,
    HTMLOutput => 1|0,
);

LinkQuote()#

detect links in HTML code, add a href if missing

my $HTMLWithLinks = $HTMLUtilsObject->LinkQuote(
    String    => $HTMLString,
    Target    => 'TargetName', # content of target="?", e. g. _blank
    TargetAdd => 1,            # add target="_blank" to all existing "<a href"
);

also string ref is possible

my $HTMLWithLinksRef = $HTMLUtilsObject->LinkQuote(
    String => \$HTMLStringRef,
);

Safety()#

To remove/strip active html tags/addons (JavaScript, applets, embeds and objects) from html strings.

my %Safe = $HTMLUtilsObject->Safety(
    String         => $HTMLString,
    NoApplet       => 1,
    NoObject       => 1,
    NoEmbed        => 1,
    NoSVG          => 1,
    NoImg          => 1,
    NoIntSrcLoad   => 0,
    NoExtSrcLoad   => 1,
    NoJavaScript   => 1,
    ReplacementStr => 'string',          # optional, string to show instead of applet, object, embed, svg and img tags
);

also string ref is possible

my %Safe = $HTMLUtilsObject->Safety(
    String       => \$HTMLStringRef,
    NoApplet     => 1,
    NoObject     => 1,
    NoEmbed      => 1,
    NoSVG        => 1,
    NoImg        => 1,
    NoIntSrcLoad => 0,
    NoExtSrcLoad => 1,
    NoJavaScript => 1,
);

returns

my %Safe = (
    String  => $HTMLString, # modified html string (scalar or ref)
    Replace => 1,           # info if something got replaced
);

EmbeddedImagesExtract()#

extracts embedded images with data-URLs from an HTML document.

$HTMLUtilsObject->EmbeddedImagesExtract(
    DocumentRef    => \$Body,
    AttachmentsRef => \@Attachments,
);

Returns nothing. If embedded images were found, these will be appended to the attachments list, and the image data URL will be replaced with a cid: URL in the document.