#
# FILE            $Id: docregex.py,v 1.5 1998/05/19 19:42:24 dlarsson Exp $
#
# DESCRIPTION     Parsing document markups.
#
# AUTHOR          SEISY/LKSB Daniel Larsson
#
# Copyright (C) ABB Industrial Systems AB, 1996
# Unpublished work.  All Rights Reserved.
#
# Permission to use, copy, modify, and distribute this software and its
# documentation for any purpose and without fee is hereby granted,
# provided that the above copyright notice appear in all copies and that
# both that copyright notice and this permission notice appear in
# supporting documentation, and that the name of ABB Industrial Systems
# not be used in advertising or publicity pertaining to
# distribution of the software without specific, written prior permission.
#
# ABB INDUSTRIAL SYSTEMS DISCLAIMS ALL WARRANTIES WITH REGARD TO
# THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
# FITNESS, IN NO EVENT SHALL ABB INDUSTRIAL SYSTEMS BE LIABLE
# FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
# OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
#
# $Log: /Gendoc/docregex.py $
# 
# 2     98-05-25 22:13 Daniel
# Updated code_regex.
# Revision 1.5  1998/05/19 19:42:24  dlarsson
# Fixed code_regex.
#
# Revision 1.4  1998/02/05 18:12:16  dlarsson
# Fixed hyperlink definition "link" -> [link] (this have been fixed before!?).
#
# Revision 1.3  1998/01/19 11:41:46  dlarsson
# Cosmetic changes
#
# Revision 1.2  1996/09/06 19:21:43  omfadmin
# Added groups to handle sorrounding characters.
#
# Revision 1.1  1996/09/06  09:53:30  omfadmin
# Initial revision
#
#
#

"""Parse utilities for python docstrings.

This module contains parse functions and regular expressions
for markups valid in python docstrings (valid = supported by
gendoc).
"""


import regex
import string

wsp = string.whitespace

letter_digit_ = string.letters + string.digits + '_'

# Punctuation and other characters that may follow a
# proper markup.
endm = '\([ \t\n,.:;!?]\)'

# Strong:
# format: "**strong text**"
strong_regex=regex.compile("\([ \t\n]\)\*\*\([^ \t][^\n*]*[^ \t]\)\*\*" + endm)

# Emphasized:
# format: "*emphasized*"
emph_regex = regex.compile("\([ \t\n]\)\*\([^ \t][^\n*]*[^ \t]\)\*" + endm)

# Bullet:
# format: "* bulleted list"
#         "- bulleted list"
#         "o bulleted list"

bullet_regex = regex.compile('\([ \t\n]*[o*-][ \t\n]+\)\([^\0]*\)')

# Numbered list:
# format: "1. some text"
#         "a. some text"
#         "1  some text"
ol_regex = regex.compile('[ \t]*\(\([0-9]+\|[a-zA-Z]+\)\.\)+[ \t\n]+\([^\0]*\|$\)')
ol2_regex = regex.compile('[ \t]*\(\([0-9]+\|[a-zA-Z]+\)\.\)+[ \t]+\(.*\)$')
olp_regex = regex.compile('[ \t]*([0-9]+)[ \t\n]+\([^\0]*\|$\)')

# Definition List:
# format: "term -- definition text"
#
dl_regex = regex.compile('\([^\n]+\)[ \t]+--[ \t\n]+\([^\0]*\)')

# Hypertext link:
# format: '"this is a link"'
hypertext_regex = regex.compile('\([ \t\n]\)\[\([^]\n]*\)\]' + endm)

# Hypertext definition:
# format: '.. "this is a link" http://www.anne.elk'
hyperdef_word = '\[\\([^]\n]*\\)\]'
wspc = '['+wsp+']+'
hyperdef_str = '^\.\.'+wspc+hyperdef_word+wspc+'\\([^'+wsp+']*\\)\n'
hyperdef_regex = regex.compile(hyperdef_str)

# Example section:
#
example_regex = regex.compile('[Ee][Xx][Aa][Mm][Pp][Ll][Ee][Ss]?:[ \t\n]*$')


nl_regex=regex.compile('\n')
code_regex=regex.compile("\(.*\)'\([^ \t']\([^\n']*[^ \t']\)?\)'" + endm)