File: grep.rb

package info (click to toggle)

ruby-unicode-utils 1.4.0-2

links: PTS, VCS
area: main
in suites: bookworm, bullseye, forky, sid, trixie
size: 1,988 kB
sloc: ruby: 1,877; makefile: 4

file content (23 lines) | stat: -rw-r--r-- 668 bytes

# -*- encoding: utf-8 -*-

require "unicode_utils/codepoint"

module UnicodeUtils

  # Get an array of all Codepoint instances in Codepoint::RANGE whose
  # name matches regexp. Matching is case insensitive.
  #
  #   require "unicode_utils/grep"
  #   UnicodeUtils.grep(/angstrom/) => [#<U+212B "Å" ANGSTROM SIGN utf8:e2,84,ab>]
  def grep(regexp)
    # TODO: enhance behaviour by searching aliases in NameAliases.txt
    unless regexp.casefold?
      regexp = Regexp.new(regexp.source, Regexp::IGNORECASE)
    end
    Codepoint::RANGE.select { |cp|
      regexp =~ UnicodeUtils.char_name(cp)
    }.map { |cp| Codepoint.new(cp) }
  end
  module_function :grep

end