File: txt2html.dict

package info (click to toggle)
txt2html 2.23-1
links: PTS
area: main
in suites: sarge
size: 552 kB
ctags: 135
sloc: perl: 2,993; makefile: 2
file content (174 lines) | stat: -rw-r--r-- 6,714 bytes
#
# Sample links dictionary file for HTML::TextToHTML
# http://www.katspace.com/tools/text_to_html
# http://txt2html.sourceforge.net/
# based on links dictionary for Seth Golub's txt2html
# http://www.aigeek.com/txt2html/
#
# This dictionary contains some patterns for converting obvious URLs,
# ftp sites, hostnames, email addresses and the like to hrefs.
#
# Original adapted from the html.pl package by Oscar Nierstrasz in
# the Software Archive of the Software Composition Group
# http://iamwww.unibe.ch/~scg/Src/
#

# Some people even like to mark the URL label explicitly <URL:foo:label>
/&lt;URL:([-\w\.\/:~_\@]+):([a-zA-Z0-9'() ]+)&gt;/ -h-> <A HREF="$1">$2</A>

# Some people like to mark URLs explicitly <URL:foo>
/&lt;URL:\s*(\S+?)\s*&gt;/ -h-> <A HREF="$1">$1</A>

#  <http://site>
/&lt;(http:\S+?)\s*&gt;/ -h-> &lt;<A HREF="$1">$1</A>&gt;

# Urls: <service>:<rest-of-url>

|snews:[\w\.]+|        -> $&
|news:[\w\.]+|         -> $&
|nntp:[\w/\.:+\-]+|    -> $&
|http:[\w/\.:\@+\-~\%#?=&;,]+[\w/]|  -> $&
|shttp:[\w/\.:+\-~\%#?=&;,]+| -> $&
|https:[\w/\.:+\-~\%#?=&;,]+| -> $&
|file:[\w/\.:+\-]+|     -> $&
|ftp:[\w/\.:+\-]+|      -> $&
|wais:[\w/\.:+\-]+|     -> $&
|gopher:[\w/\.:+\-]+|   -> $&
|telnet:[\w/\@\.:+\-]+|   -> $&


# catch some newsgroups to avoid confusion with sites:
|([^\w\-/\.:\@>])(alt\.[\w\.+\-]+[\w+\-]+)|    -h-> $1<A HREF="news:$2">$2</A>
|([^\w\-/\.:\@>])(bionet\.[\w\.+\-]+[\w+\-]+)| -h-> $1<A HREF="news:$2">$2</A>
|([^\w\-/\.:\@>])(bit\.[\w\.+\-]+[\w+\-]+)|    -h-> $1<A HREF="news:$2">$2</A>
|([^\w\-/\.:\@>])(biz\.[\w\.+\-]+[\w+\-]+)|    -h-> $1<A HREF="news:$2">$2</A>
|([^\w\-/\.:\@>])(clari\.[\w\.+\-]+[\w+\-]+)|  -h-> $1<A HREF="news:$2">$2</A>
|([^\w\-/\.:\@>])(comp\.[\w\.+\-]+[\w+\-]+)|   -h-> $1<A HREF="news:$2">$2</A>
|([^\w\-/\.:\@>])(gnu\.[\w\.+\-]+[\w+\-]+)|    -h-> $1<A HREF="news:$2">$2</A>
|([^\w\-/\.:\@>])(humanities\.[\w\.+\-]+[\w+\-]+)| 
          -h-> $1<A HREF="news:$2">$2</A>
|([^\w\-/\.:\@>])(k12\.[\w\.+\-]+[\w+\-]+)|    -h-> $1<A HREF="news:$2">$2</A>
|([^\w\-/\.:\@>])(misc\.[\w\.+\-]+[\w+\-]+)|   -h-> $1<A HREF="news:$2">$2</A>
|([^\w\-/\.:\@>])(news\.[\w\.+\-]+[\w+\-]+)|   -h-> $1<A HREF="news:$2">$2</A>
|([^\w\-/\.:\@>])(rec\.[\w\.+\-]+[\w+\-]+)|    -h-> $1<A HREF="news:$2">$2</A>
|([^\w\-/\.:\@>])(soc\.[\w\.+\-]+[\w+\-]+)|    -h-> $1<A HREF="news:$2">$2</A>
|([^\w\-/\.:\@>])(talk\.[\w\.+\-]+[\w+\-]+)|   -h-> $1<A HREF="news:$2">$2</A>
|([^\w\-/\.:\@>])(us\.[\w\.+\-]+[\w+\-]+)|     -h-> $1<A HREF="news:$2">$2</A>
|([^\w\-/\.:\@>])(ch\.[\w\.+\-]+[\w+\-]+)|     -h-> $1<A HREF="news:$2">$2</A>
|([^\w\-/\.:\@>])(de\.[\w\.+\-]+[\w+\-]+)|     -h-> $1<A HREF="news:$2">$2</A>

# FTP locations (with directory):
# anonymous@<site>:<path>
|(anonymous\@)([a-zA-Z][\w\.+\-]+\.[a-zA-Z]{2,}):(\s*)([\w\d+\-/\.]+)|
  -h-> $1<A HREF="ftp://$2/$4">$2:$4</A>$3

# ftp@<site>:<path>
|(ftp\@)([a-zA-Z][\w\.+\-]+\.[a-zA-Z]{2,}):(\s*)([\w\d+\-/\.]+)|
  -h-> $1<A HREF="ftp://$2/$4">$2:$4</A>$3

# Email address
|[a-zA-Z0-9_\+\-\.]+\@([a-zA-Z0-9][\w\.+\-]+\.[a-zA-Z]{2,})|
  -> mailto:$&

# <site>:<path>
|([^\w\-/\.:\@>])([a-zA-Z][\w\.+\-]+\.[a-zA-Z]{2,}):(\s*)([\w\d+\-/\.]+)|
  -h-> $1<A HREF="ftp://$2/$4">$2:$4</A>$3

# NB: don't confuse an http server with a port number for
# an FTP location!
# internet number version: <internet-num>:<path>
|([^\w\-/\.:\@])(\d{2,}\.\d{2,}\.\d+\.\d+):([\w\d+\-/\.]+)|
  -h-> $1<A HREF="ftp://$2/$3">$2:$3</A>

# telnet <site> <port>
|telnet ([a-zA-Z][\w+\-]+(\.[\w\.+\-]+)+\.[a-zA-Z]{2,})\s+(\d{2,4})|
  -h-> telnet <A HREF="telnet://$1:$3/">$1 $3</A>

# ftp <site>
|ftp ([a-zA-Z][\w+\-]+(\.[\w\.+\-]+)+\.[a-zA-Z]{2,})|
  -h-> ftp <A HREF="ftp://$1/">$1</A>

# host with "ftp" in the machine name
|(^|[^\w\d\-/\.:!]|^<P>)(([a-zA-Z][\w+\-]*)?ftp[\w+\-]*\.[\w\.+\-]+\.[a-zA-Z]{2,})([^\w\d\-/\.:!])|
  -h-> $1ftp <A HREF="ftp://$2/">$2</A>$4

# ftp.foo.net/blah/
|ftp(\.[a-zA-Z0-9_\@:-]+)+/\S+| -> ftp://$&

# www.thehouse.org/txt2html/
|www(\.[a-zA-Z0-9_\@:-]+)+/\S+| -> http://$&

# host with "www" in the machine name
|(^|[^\w\d\-/\.:!]|^<P>)(([a-zA-Z][\w+\-]*)?www[\w+\-]*\.[\w\.+\-]+\.[a-zA-Z]{2,})([^\w\d\-/\.:!\@])|
  -h-> $1<A HREF="http://$2/">$2</A>$4

# <site> <port>
|([a-zA-Z][\w+\-]+\.[\w+\-]+\.[a-zA-Z]{2,})\s+(\d{2,4})|
  -h-> <A HREF="telnet://$1:$2/">$1 $2</A>

# just the site name: <site>
# But this gets mixed up with things line .tar.gz files!
# |([^\w\-/\.:\@>])([a-zA-Z][\w+\-]+(\.[\w+\-]+)+\.[a-zA-Z]{2,})|
#  -h-> $1<A HREF="http://$2">$2</A>/

# just internet numbers with port:
|([^\w\-/\.:\@])(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})\s+(\d{1,4})|
  -h-> $1<A HREF="telnet://$2:$3">$2 $3</A>

# just internet numbers:
|([^\w\-/\.:\@])(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})|
  -h-> $1<A HREF="telnet://$2">$2</A>


# (see "relative path") as used by Tom Fine
# /\(see \"([^\"]+)\"\)/  -> $1.html

# RFCs
/RFC ?(\d+)/ -i-> http://www.cis.ohio-state.edu/rfc/rfc$1.txt

# This would turn "f^H_o^H_o^H_" into "<U>foo</U>".  Gross, isn't it?
# Thanks to Mark O'Dell <emark@cns.caltech.edu> for fixing this. 
#
# /(.\\010_)+/ -he-> $tmp = $&;$tmp =~ s@\010_@@g;"<U>$tmp</U>"
# /(_\\010.)+/ -he-> $tmp = $&;$tmp =~ s@_\010@@g;"<U>$tmp</U>"
# /(.\^H_)+/ -he-> $tmp = $&;$tmp =~ s@\^H_@@g;"<U>$tmp</U>"
# /(_\^H.)+/ -he-> $tmp = $&;$tmp =~ s@_\^H@@g;"<U>$tmp</U>"

# Mark _underlined stuff_ as <U>underlined stuff</U>
# take account of possible trailing punctuation
/([ \t\n])_([a-z][a-z0-9 -]*[a-z])_([ \t\n\.;:,\!\?])/ -hi-> $1<U>$2</U>$3

# Mark *emphasized stuff* as <EM>emphasized stuff</EM>
#/\B\*([a-z][a-z -]*[a-z])\*\B/ -hi-> <EM>$1</EM>

# Use this one instead if you want it to match more aggressively.
/\*((\w|["'])+(\w|\s|[-!?,:;._\"\'<>#%&=+\/\$]+)+)\*/ -hi-> <EM>$1</EM>

# We also need a special case for *x*
/\B\*([a-z])\*\B/ -hi-> <EM>$1</EM>

# Mark #bolded stuff# as <STRONG>bolded stuff</STRONG>
# /\B#([a-z][a-z -]*[a-z])#\B/ -hi-> <STRONG>$1</STRONG>

# Use this one instead if you want it to match more aggressively.
# note that this doesn't use \w because we don't want to match numbers
# since #1 is a common enough usage.
/\B#([a-z-_]([a-z-_]|\s|\!|\?|,|;|:|\'|\.)*([a-z-_]|\.|\'|\"|:|\!|\?))#\B/ -hi-> <STRONG>$1</STRONG>

# We also need a special case for #x#
/\B#([a-z])#\B/ -hi-> <STRONG>$1</STRONG>


# Seth and his amazing conversion program    :-)

"Seth Golub"  -io-> http://www.aigeek.com/
"txt2html"    -io-> http://txt2html.sourceforge.net/

# Kathryn and her amazing modules 8-)
"Kathryn Andersen"  -io-> http://www.katspace.com/
"HTML::TextToHTML"  -io-> http://www.katspace.com/tools/text_to_html/
"hypertoc"          -io-> http://www.katspace.com/tools/hypertoc/
"HTML::GenToc"      -io-> http://www.katspace.com/tools/hypertoc/

# End of sample dictionary