1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141
|
# -*- coding: utf-8 -*- #
# frozen_string_literal: true
module Rouge
module Lexers
class HTML < RegexLexer
title "HTML"
desc "HTML, the markup language of the web"
tag 'html'
filenames '*.htm', '*.html', '*.xhtml', '*.cshtml', '*.razor'
mimetypes 'text/html', 'application/xhtml+xml'
def self.detect?(text)
return true if text.doctype?(/\bhtml\b/i)
return false if text =~ /\A<\?xml\b/
return true if text =~ /<\s*html\b/
end
start do
@javascript = Javascript.new(options)
@css = CSS.new(options)
end
state :root do
rule %r/[^<&]+/m, Text
rule %r/&\S*?;/, Name::Entity
rule %r/<!DOCTYPE .*?>/im, Comment::Preproc
rule %r/<!\[CDATA\[.*?\]\]>/m, Comment::Preproc
rule %r/<!--/, Comment, :comment
rule %r/<\?.*?\?>/m, Comment::Preproc # php? really?
rule %r/<\s*script\s*/m do
token Name::Tag
@javascript.reset!
push :script_content
push :tag
end
rule %r/<\s*style\s*/m do
token Name::Tag
@css.reset!
@lang = @css
push :style_content
push :tag
end
rule %r(</), Name::Tag, :tag_end
rule %r/</, Name::Tag, :tag_start
rule %r(<\s*[\p{L}:_-][\p{Word}\p{Cf}:.·-]*), Name::Tag, :tag # opening tags
rule %r(<\s*/\s*[\p{L}:_-][\p{Word}\p{Cf}:.·-]*\s*>), Name::Tag # closing tags
end
state :tag_end do
mixin :tag_end_end
rule %r/[\p{L}:_-][\p{Word}\p{Cf}:.·-]*/ do
token Name::Tag
goto :tag_end_end
end
end
state :tag_end_end do
rule %r/\s+/, Text
rule %r/>/, Name::Tag, :pop!
end
state :tag_start do
rule %r/\s+/, Text
rule %r/[\p{L}:_-][\p{Word}\p{Cf}:.·-]*/ do
token Name::Tag
goto :tag
end
rule(//) { goto :tag }
end
state :comment do
rule %r/[^-]+/, Comment
rule %r/-->/, Comment, :pop!
rule %r/-/, Comment
end
state :tag do
rule %r/\s+/m, Text
rule %r/[\p{L}:_\[\]()*.-][\p{Word}\p{Cf}:.·\[\]()*-]*\s*=\s*/m, Name::Attribute, :attr
rule %r/[\p{L}:_*#-][\p{Word}\p{Cf}:.·*#-]*/, Name::Attribute
rule %r(/?\s*>)m, Name::Tag, :pop!
end
state :attr do
# TODO: are backslash escapes valid here?
rule %r/"/ do
token Str
goto :dq
end
rule %r/'/ do
token Str
goto :sq
end
rule %r/[^\s>]+/, Str, :pop!
end
state :dq do
rule %r/"/, Str, :pop!
rule %r/[^"]+/, Str
end
state :sq do
rule %r/'/, Str, :pop!
rule %r/[^']+/, Str
end
state :script_content do
rule %r([^<]+) do
delegate @javascript
end
rule %r(<\s*/\s*script\s*>)m, Name::Tag, :pop!
rule %r(<) do
delegate @javascript
end
end
state :style_content do
rule %r/[^<]+/ do
delegate @lang
end
rule %r(<\s*/\s*style\s*>)m, Name::Tag, :pop!
rule %r/</ do
delegate @lang
end
end
end
end
end
|