File: grep.rb

package info (click to toggle)
ruby-unicode-utils 1.4.0-2
  • links: PTS, VCS
  • area: main
  • in suites: bookworm, bullseye, forky, sid, trixie
  • size: 1,988 kB
  • sloc: ruby: 1,877; makefile: 4
file content (23 lines) | stat: -rw-r--r-- 668 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
# -*- encoding: utf-8 -*-

require "unicode_utils/codepoint"

module UnicodeUtils

  # Get an array of all Codepoint instances in Codepoint::RANGE whose
  # name matches regexp. Matching is case insensitive.
  #
  #   require "unicode_utils/grep"
  #   UnicodeUtils.grep(/angstrom/) => [#<U+212B "Å" ANGSTROM SIGN utf8:e2,84,ab>]
  def grep(regexp)
    # TODO: enhance behaviour by searching aliases in NameAliases.txt
    unless regexp.casefold?
      regexp = Regexp.new(regexp.source, Regexp::IGNORECASE)
    end
    Codepoint::RANGE.select { |cp|
      regexp =~ UnicodeUtils.char_name(cp)
    }.map { |cp| Codepoint.new(cp) }
  end
  module_function :grep

end