File: html2utf8

package info (click to toggle)
gfxboot 4.5.103-2
  • links: PTS, VCS
  • area: main
  • in suites: sid
  • size: 21,212 kB
  • sloc: asm: 12,810; perl: 5,188; ansic: 4,018; pascal: 2,266; makefile: 569; xml: 256; sh: 235
file content (18 lines) | stat: -rwxr-xr-x 227 bytes parent folder | download | duplicates (6)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
#! /usr/bin/perl -i

use Encode;
use HTML::PullParser;

sub enc_ent;


while(<>) {
  s/(&[^;]+;)/enc_ent($1)/ge;
  print
}


sub enc_ent
{
  encode_utf8(HTML::PullParser->new(doc => \$_[0], text => 'dtext')->get_token->[0]);
}