File: email_parser.rb

package info (click to toggle)
ruby-parslet 1.6.1-1
  • links: PTS, VCS
  • area: main
  • in suites: jessie, jessie-kfreebsd
  • size: 908 kB
  • ctags: 473
  • sloc: ruby: 5,220; makefile: 2
file content (54 lines) | stat: -rw-r--r-- 1,441 bytes parent folder | download | duplicates (6)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
#!/usr/bin/env ruby

# Example contributed by Hal Brodigan (postmodern). Thanks!

$:.unshift File.dirname(__FILE__) + "/../lib"
require 'parslet'
require 'parslet/convenience'

class EmailParser < Parslet::Parser
  rule(:space) { match('\s').repeat(1) }
  rule(:space?) { space.maybe }
  rule(:dash?) { match['_-'].maybe }

  rule(:at) {
    str('@') |
    (dash? >> (str('at') | str('AT')) >> dash?)
  }
  rule(:dot) {
    str('.') |
    (dash? >> (str('dot') | str('DOT')) >> dash?)
  }

  rule(:word) { match('[a-z0-9]').repeat(1).as(:word) >> space? }
  rule(:separator) { dot.as(:dot) >> space? | space }
  rule(:words) { word >> (separator >> word).repeat }

  rule(:email) {
    (words.as(:username) >> space? >> at >> space? >> words).as(:email)
  }

  root(:email)
end

class EmailSanitizer < Parslet::Transform
  rule(:dot => simple(:dot), :word => simple(:word)) { ".#{word}" }
  rule(:word => simple(:word)) { word }

  rule(:username => sequence(:username)) { username.join + "@" }
  rule(:username => simple(:username)) { username.to_s + "@" }

  rule(:email => sequence(:email)) { email.join }
end

parser = EmailParser.new
sanitizer = EmailSanitizer.new

input = ARGV[0] || begin
  default = "a.b.c.d@gmail.com"
  STDERR.puts "usage: #{$0} \"EMAIL_ADDR\""
  STDOUT.puts "since you haven't specified any EMAIL_ADDR, for testing purposes we're using #{default}"
  default
end

p sanitizer.apply(parser.parse_with_debug(input))