File: demo_tokenre4

package info (click to toggle)
runawk 1.6.1-2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 712 kB
  • sloc: awk: 1,127; ansic: 736; sh: 420; makefile: 103
file content (27 lines) | stat: -rwxr-xr-x 678 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
#!/usr/bin/env runawk

#use "tokenre.awk"

# This demo extracts e-mails and URLs from input text and outputs them
# Using tokenre.awk for searching was inspired by talks with Vlad Shakhov

# Input files for this demo: examples/demo_tokenre3.in*

BEGIN { 
	# regexp for emails and URLs are just examples and therefore don't
	# conform to appropriate RFCs.
	domains = "(com|org|net|ru|by|ua)"
	re =         "[^ [:punct:]]+@([^ [:punct:]]+[.])+" domains
	re = re "|" "(https?|ftp|dict)://([^ [:punct:]]+[.])+" domains "(/[^ [:punct:]]*)?"
} 

{ 
	cnt = splitre($0, arr, re)
	for (i=1; i <= cnt; ++i){ 
		if (i > 1)
			printf " "
		printf "%s", arr [i]
	}
	if (cnt)
		printf "\n"
}