// This file was generated AUTOMATICALLY from a template file Sat Jan 15 11:52:14 PST 2005 /* @(#)Character.java.template 1.7 03/01/13 * * Copyright 1994-2002 Sun Microsystems, Inc. All Rights Reserved. * * This software is the proprietary information of Sun Microsystems, Inc. * Use is subject to license terms. * */ package java.lang; /** * The Character class wraps a value of the primitive * type char in an object. An object of type * Character contains a single field whose type is * char. *

* In addition, this class provides several methods for determining * a character's category (lowercase letter, digit, etc.) and for converting * characters from uppercase to lowercase and vice versa. *

* Character information is based on the Unicode Standard, version 3.0. *

* The methods and data of class Character are defined by * the information in the UnicodeData file that is part of the * Unicode Character Database maintained by the Unicode * Consortium. This file specifies various properties including name * and general category for every defined Unicode code point or * character range. *

* The file and its description are available from the Unicode Consortium at: *

http://www.unicode.org *

* * @author Lee Boynton * @author Guy Steele * @author Akira Tanaka * @since 1.0 */ public final class Character extends Object implements java.io.Serializable, Comparable { /** * The minimum radix available for conversion to and from strings. * The constant value of this field is the smallest value permitted * for the radix argument in radix-conversion methods such as the * digit method, the forDigit * method, and the toString method of class * Integer. * * @see java.lang.Character#digit(char, int) * @see java.lang.Character#forDigit(int, int) * @see java.lang.Integer#toString(int, int) * @see java.lang.Integer#valueOf(java.lang.String) */ public static final int MIN_RADIX = 2; /** * The maximum radix available for conversion to and from strings. * The constant value of this field is the largest value permitted * for the radix argument in radix-conversion methods such as the * digit method, the forDigit * method, and the toString method of class * Integer. * * @see java.lang.Character#digit(char, int) * @see java.lang.Character#forDigit(int, int) * @see java.lang.Integer#toString(int, int) * @see java.lang.Integer#valueOf(java.lang.String) */ public static final int MAX_RADIX = 36; /** * The constant value of this field is the smallest value of type * char, '\u0000'. * * @since 1.0.2 */ public static final char MIN_VALUE = '\u0000'; /** * The constant value of this field is the largest value of type * char, '\uFFFF'. * * @since 1.0.2 */ public static final char MAX_VALUE = '\uffff'; /** * The Class instance representing the primitive type * char. * * @since 1.1 */ public static final Class TYPE = Class.getPrimitiveClass("char"); /* * Normative general types */ /* * General character types */ /** * General category "Cn" in the Unicode specification. * @since 1.1 */ public static final byte UNASSIGNED = 0; /** * General category "Lu" in the Unicode specification. * @since 1.1 */ public static final byte UPPERCASE_LETTER = 1; /** * General category "Ll" in the Unicode specification. * @since 1.1 */ public static final byte LOWERCASE_LETTER = 2; /** * General category "Lt" in the Unicode specification. * @since 1.1 */ public static final byte TITLECASE_LETTER = 3; /** * General category "Lm" in the Unicode specification. * @since 1.1 */ public static final byte MODIFIER_LETTER = 4; /** * General category "Lo" in the Unicode specification. * @since 1.1 */ public static final byte OTHER_LETTER = 5; /** * General category "Mn" in the Unicode specification. * @since 1.1 */ public static final byte NON_SPACING_MARK = 6; /** * General category "Me" in the Unicode specification. * @since 1.1 */ public static final byte ENCLOSING_MARK = 7; /** * General category "Mc" in the Unicode specification. * @since 1.1 */ public static final byte COMBINING_SPACING_MARK = 8; /** * General category "Nd" in the Unicode specification. * @since 1.1 */ public static final byte DECIMAL_DIGIT_NUMBER = 9; /** * General category "Nl" in the Unicode specification. * @since 1.1 */ public static final byte LETTER_NUMBER = 10; /** * General category "No" in the Unicode specification. * @since 1.1 */ public static final byte OTHER_NUMBER = 11; /** * General category "Zs" in the Unicode specification. * @since 1.1 */ public static final byte SPACE_SEPARATOR = 12; /** * General category "Zl" in the Unicode specification. * @since 1.1 */ public static final byte LINE_SEPARATOR = 13; /** * General category "Zp" in the Unicode specification. * @since 1.1 */ public static final byte PARAGRAPH_SEPARATOR = 14; /** * General category "Cc" in the Unicode specification. * @since 1.1 */ public static final byte CONTROL = 15; /** * General category "Cf" in the Unicode specification. * @since 1.1 */ public static final byte FORMAT = 16; /** * General category "Co" in the Unicode specification. * @since 1.1 */ public static final byte PRIVATE_USE = 18; /** * General category "Cs" in the Unicode specification. * @since 1.1 */ public static final byte SURROGATE = 19; /** * General category "Pd" in the Unicode specification. * @since 1.1 */ public static final byte DASH_PUNCTUATION = 20; /** * General category "Ps" in the Unicode specification. * @since 1.1 */ public static final byte START_PUNCTUATION = 21; /** * General category "Pe" in the Unicode specification. * @since 1.1 */ public static final byte END_PUNCTUATION = 22; /** * General category "Pc" in the Unicode specification. * @since 1.1 */ public static final byte CONNECTOR_PUNCTUATION = 23; /** * General category "Po" in the Unicode specification. * @since 1.1 */ public static final byte OTHER_PUNCTUATION = 24; /** * General category "Sm" in the Unicode specification. * @since 1.1 */ public static final byte MATH_SYMBOL = 25; /** * General category "Sc" in the Unicode specification. * @since 1.1 */ public static final byte CURRENCY_SYMBOL = 26; /** * General category "Sk" in the Unicode specification. * @since 1.1 */ public static final byte MODIFIER_SYMBOL = 27; /** * General category "So" in the Unicode specification. * @since 1.1 */ public static final byte OTHER_SYMBOL = 28; /** * General category "Pi" in the Unicode specification. * @since 1.4 */ public static final byte INITIAL_QUOTE_PUNCTUATION = 29; /** * General category "Pf" in the Unicode specification. * @since 1.4 */ public static final byte FINAL_QUOTE_PUNCTUATION = 30; /** * Error or non-char flag * @since 1.4 */ static final char CHAR_ERROR = '\uFFFF'; /** * Undefined bidirectional character type. Undefined char * values have undefined directionality in the Unicode specification. * @since 1.4 */ public static final byte DIRECTIONALITY_UNDEFINED = -1; /** * Strong bidirectional character type "L" in the Unicode specification. * @since 1.4 */ public static final byte DIRECTIONALITY_LEFT_TO_RIGHT = 0; /** * Strong bidirectional character type "R" in the Unicode specification. * @since 1.4 */ public static final byte DIRECTIONALITY_RIGHT_TO_LEFT = 1; /** * Strong bidirectional character type "AL" in the Unicode specification. * @since 1.4 */ public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC = 2; /** * Weak bidirectional character type "EN" in the Unicode specification. * @since 1.4 */ public static final byte DIRECTIONALITY_EUROPEAN_NUMBER = 3; /** * Weak bidirectional character type "ES" in the Unicode specification. * @since 1.4 */ public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR = 4; /** * Weak bidirectional character type "ET" in the Unicode specification. * @since 1.4 */ public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR = 5; /** * Weak bidirectional character type "AN" in the Unicode specification. * @since 1.4 */ public static final byte DIRECTIONALITY_ARABIC_NUMBER = 6; /** * Weak bidirectional character type "CS" in the Unicode specification. * @since 1.4 */ public static final byte DIRECTIONALITY_COMMON_NUMBER_SEPARATOR = 7; /** * Weak bidirectional character type "NSM" in the Unicode specification. * @since 1.4 */ public static final byte DIRECTIONALITY_NONSPACING_MARK = 8; /** * Weak bidirectional character type "BN" in the Unicode specification. * @since 1.4 */ public static final byte DIRECTIONALITY_BOUNDARY_NEUTRAL = 9; /** * Neutral bidirectional character type "B" in the Unicode specification. * @since 1.4 */ public static final byte DIRECTIONALITY_PARAGRAPH_SEPARATOR = 10; /** * Neutral bidirectional character type "S" in the Unicode specification. * @since 1.4 */ public static final byte DIRECTIONALITY_SEGMENT_SEPARATOR = 11; /** * Neutral bidirectional character type "WS" in the Unicode specification. * @since 1.4 */ public static final byte DIRECTIONALITY_WHITESPACE = 12; /** * Neutral bidirectional character type "ON" in the Unicode specification. * @since 1.4 */ public static final byte DIRECTIONALITY_OTHER_NEUTRALS = 13; /** * Strong bidirectional character type "LRE" in the Unicode specification. * @since 1.4 */ public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING = 14; /** * Strong bidirectional character type "LRO" in the Unicode specification. * @since 1.4 */ public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE = 15; /** * Strong bidirectional character type "RLE" in the Unicode specification. * @since 1.4 */ public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING = 16; /** * Strong bidirectional character type "RLO" in the Unicode specification. * @since 1.4 */ public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE = 17; /** * Weak bidirectional character type "PDF" in the Unicode specification. * @since 1.4 */ public static final byte DIRECTIONALITY_POP_DIRECTIONAL_FORMAT = 18; // Maximum character handled by internal fast-path code which // avoids initializing large tables. // Note: performance of this "fast-path" code may be sub-optimal // in negative cases for some accessors due to complicated ranges. // Should revisit after optimization of table initialization. private static final int FAST_PATH_MAX = 255; /** * Instances of this class represent particular subsets of the Unicode * character set. The only family of subsets defined in the * Character class is

{@link Character.UnicodeBlock
     * UnicodeBlock}

. Other portions of the Java API may define other * subsets for their own purposes. * * @since 1.2 */ public static class Subset { private String name; /** * Constructs a new Subset instance. * * @exception NullPointerException if name is null * @param name The name of this subset */ protected Subset(String name) { if (name == null) { throw new NullPointerException("name"); } this.name = name; } /** * Compares two Subset objects for equality. * This method returns true if and only if * this and the argument refer to the same * object; since this method is final, this * guarantee holds for all subclasses. */ public final boolean equals(Object obj) { return (this == obj); } /** * Returns the standard hash code as defined by the * {@link Object#hashCode} method. This method * is final in order to ensure that the * equals and hashCode methods will * be consistent in all subclasses. */ public final int hashCode() { return super.hashCode(); } /** * Returns the name of this subset. */ public final String toString() { return name; } } /** * A family of character subsets representing the character blocks in the * Unicode specification. Character blocks generally define characters * used for a specific script or purpose. A character is contained by * at most one Unicode block. * * @since 1.2 */ public static final class UnicodeBlock extends Subset { private UnicodeBlock(String name) { super(name); } /** * Constant for the Unicode character block of the same name. */ public static final UnicodeBlock BASIC_LATIN = new UnicodeBlock("BASIC_LATIN"), LATIN_1_SUPPLEMENT = new UnicodeBlock("LATIN_1_SUPPLEMENT"), LATIN_EXTENDED_A = new UnicodeBlock("LATIN_EXTENDED_A"), LATIN_EXTENDED_B = new UnicodeBlock("LATIN_EXTENDED_B"), IPA_EXTENSIONS = new UnicodeBlock("IPA_EXTENSIONS"), SPACING_MODIFIER_LETTERS = new UnicodeBlock("SPACING_MODIFIER_LETTERS"), COMBINING_DIACRITICAL_MARKS = new UnicodeBlock("COMBINING_DIACRITICAL_MARKS"), GREEK = new UnicodeBlock("GREEK"), CYRILLIC = new UnicodeBlock("CYRILLIC"), ARMENIAN = new UnicodeBlock("ARMENIAN"), HEBREW = new UnicodeBlock("HEBREW"), ARABIC = new UnicodeBlock("ARABIC"), DEVANAGARI = new UnicodeBlock("DEVANAGARI"), BENGALI = new UnicodeBlock("BENGALI"), GURMUKHI = new UnicodeBlock("GURMUKHI"), GUJARATI = new UnicodeBlock("GUJARATI"), ORIYA = new UnicodeBlock("ORIYA"), TAMIL = new UnicodeBlock("TAMIL"), TELUGU = new UnicodeBlock("TELUGU"), KANNADA = new UnicodeBlock("KANNADA"), MALAYALAM = new UnicodeBlock("MALAYALAM"), THAI = new UnicodeBlock("THAI"), LAO = new UnicodeBlock("LAO"), TIBETAN = new UnicodeBlock("TIBETAN"), GEORGIAN = new UnicodeBlock("GEORGIAN"), HANGUL_JAMO = new UnicodeBlock("HANGUL_JAMO"), LATIN_EXTENDED_ADDITIONAL = new UnicodeBlock("LATIN_EXTENDED_ADDITIONAL"), GREEK_EXTENDED = new UnicodeBlock("GREEK_EXTENDED"), GENERAL_PUNCTUATION = new UnicodeBlock("GENERAL_PUNCTUATION"), SUPERSCRIPTS_AND_SUBSCRIPTS = new UnicodeBlock("SUPERSCRIPTS_AND_SUBSCRIPTS"), CURRENCY_SYMBOLS = new UnicodeBlock("CURRENCY_SYMBOLS"), COMBINING_MARKS_FOR_SYMBOLS = new UnicodeBlock("COMBINING_MARKS_FOR_SYMBOLS"), LETTERLIKE_SYMBOLS = new UnicodeBlock("LETTERLIKE_SYMBOLS"), NUMBER_FORMS = new UnicodeBlock("NUMBER_FORMS"), ARROWS = new UnicodeBlock("ARROWS"), MATHEMATICAL_OPERATORS = new UnicodeBlock("MATHEMATICAL_OPERATORS"), MISCELLANEOUS_TECHNICAL = new UnicodeBlock("MISCELLANEOUS_TECHNICAL"), CONTROL_PICTURES = new UnicodeBlock("CONTROL_PICTURES"), OPTICAL_CHARACTER_RECOGNITION = new UnicodeBlock("OPTICAL_CHARACTER_RECOGNITION"), ENCLOSED_ALPHANUMERICS = new UnicodeBlock("ENCLOSED_ALPHANUMERICS"), BOX_DRAWING = new UnicodeBlock("BOX_DRAWING"), BLOCK_ELEMENTS = new UnicodeBlock("BLOCK_ELEMENTS"), GEOMETRIC_SHAPES = new UnicodeBlock("GEOMETRIC_SHAPES"), MISCELLANEOUS_SYMBOLS = new UnicodeBlock("MISCELLANEOUS_SYMBOLS"), DINGBATS = new UnicodeBlock("DINGBATS"), CJK_SYMBOLS_AND_PUNCTUATION = new UnicodeBlock("CJK_SYMBOLS_AND_PUNCTUATION"), HIRAGANA = new UnicodeBlock("HIRAGANA"), KATAKANA = new UnicodeBlock("KATAKANA"), BOPOMOFO = new UnicodeBlock("BOPOMOFO"), HANGUL_COMPATIBILITY_JAMO = new UnicodeBlock("HANGUL_COMPATIBILITY_JAMO"), KANBUN = new UnicodeBlock("KANBUN"), ENCLOSED_CJK_LETTERS_AND_MONTHS = new UnicodeBlock("ENCLOSED_CJK_LETTERS_AND_MONTHS"), CJK_COMPATIBILITY = new UnicodeBlock("CJK_COMPATIBILITY"), CJK_UNIFIED_IDEOGRAPHS = new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS"), HANGUL_SYLLABLES = new UnicodeBlock("HANGUL_SYLLABLES"), SURROGATES_AREA = new UnicodeBlock("SURROGATES_AREA"), PRIVATE_USE_AREA = new UnicodeBlock("PRIVATE_USE_AREA"), CJK_COMPATIBILITY_IDEOGRAPHS = new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS"), ALPHABETIC_PRESENTATION_FORMS = new UnicodeBlock("ALPHABETIC_PRESENTATION_FORMS"), ARABIC_PRESENTATION_FORMS_A = new UnicodeBlock("ARABIC_PRESENTATION_FORMS_A"), COMBINING_HALF_MARKS = new UnicodeBlock("COMBINING_HALF_MARKS"), CJK_COMPATIBILITY_FORMS = new UnicodeBlock("CJK_COMPATIBILITY_FORMS"), SMALL_FORM_VARIANTS = new UnicodeBlock("SMALL_FORM_VARIANTS"), ARABIC_PRESENTATION_FORMS_B = new UnicodeBlock("ARABIC_PRESENTATION_FORMS_B"), HALFWIDTH_AND_FULLWIDTH_FORMS = new UnicodeBlock("HALFWIDTH_AND_FULLWIDTH_FORMS"), SPECIALS = new UnicodeBlock("SPECIALS"); /** * Constant for the Unicode character block of the same name. * * @since 1.4 */ public static final UnicodeBlock SYRIAC = new UnicodeBlock("SYRIAC"), THAANA = new UnicodeBlock("THAANA"), SINHALA = new UnicodeBlock("SINHALA"), MYANMAR = new UnicodeBlock("MYANMAR"), ETHIOPIC = new UnicodeBlock("ETHIOPIC"), CHEROKEE = new UnicodeBlock("CHEROKEE"), UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS = new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS"), OGHAM = new UnicodeBlock("OGHAM"), RUNIC = new UnicodeBlock("RUNIC"), KHMER = new UnicodeBlock("KHMER"), MONGOLIAN = new UnicodeBlock("MONGOLIAN"), BRAILLE_PATTERNS = new UnicodeBlock("BRAILLE_PATTERNS"), CJK_RADICALS_SUPPLEMENT = new UnicodeBlock("CJK_RADICALS_SUPPLEMENT"), KANGXI_RADICALS = new UnicodeBlock("KANGXI_RADICALS"), IDEOGRAPHIC_DESCRIPTION_CHARACTERS = new UnicodeBlock("IDEOGRAPHIC_DESCRIPTION_CHARACTERS"), BOPOMOFO_EXTENDED = new UnicodeBlock("BOPOMOFO_EXTENDED"), CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A = new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A"), YI_SYLLABLES = new UnicodeBlock("YI_SYLLABLES"), YI_RADICALS = new UnicodeBlock("YI_RADICALS"); private static final char blockStarts[] = { '\u0000', // Basic Latin '\u0080', // Latin-1 Supplement '\u0100', // Latin Extended-A '\u0180', // Latin Extended-B '\u0250', // IPA Extensions '\u02B0', // Spacing Modifier Letters '\u0300', // Combining Diacritical Marks '\u0370', // Greek '\u0400', // Cyrillic '\u0500', // unassigned '\u0530', // Armenian '\u0590', // Hebrew '\u0600', // Arabic '\u0700', // Syriac '\u0750', // unassigned '\u0780', // Thaana '\u07C0', // unassigned '\u0900', // Devanagari '\u0980', // Bengali '\u0A00', // Gurmukhi '\u0A80', // Gujarati '\u0B00', // Oriya '\u0B80', // Tamil '\u0C00', // Telugu '\u0C80', // Kannada '\u0D00', // Malayalam '\u0D80', // Sinhala '\u0E00', // Thai '\u0E80', // Lao '\u0F00', // Tibetan '\u1000', // Myanmar '\u10A0', // Georgian '\u1100', // Hangul Jamo '\u1200', // Ethiopic '\u1380', // unassigned '\u13A0', // Cherokee '\u1400', // Unified Canadian Aboriginal Syllabics '\u1680', // Ogham '\u16A0', // Runic '\u1700', // unassigned '\u1780', // Khmer '\u1800', // Mongolian '\u18B0', // unassigned '\u1E00', // Latin Extended Additional '\u1F00', // Greek Extended '\u2000', // General Punctuation '\u2070', // Superscripts and Subscripts '\u20A0', // Currency Symbols '\u20D0', // Combining Marks for Symbols '\u2100', // Letterlike Symbols '\u2150', // Number Forms '\u2190', // Arrows '\u2200', // Mathematical Operators '\u2300', // Miscellaneous Technical '\u2400', // Control Pictures '\u2440', // Optical Character Recognition '\u2460', // Enclosed Alphanumerics '\u2500', // Box Drawing '\u2580', // Block Elements '\u25A0', // Geometric Shapes '\u2600', // Miscellaneous Symbols '\u2700', // Dingbats '\u27C0', // unassigned '\u2800', // Braille Patterns '\u2900', // unassigned '\u2E80', // CJK Radicals Supplement '\u2F00', // Kangxi Radicals '\u2FE0', // unassigned '\u2FF0', // Ideographic Description Characters '\u3000', // CJK Symbols and Punctuation '\u3040', // Hiragana '\u30A0', // Katakana '\u3100', // Bopomofo '\u3130', // Hangul Compatibility Jamo '\u3190', // Kanbun '\u31A0', // Bopomofo Extended '\u31C0', // unassigned '\u3200', // Enclosed CJK Letters and Months '\u3300', // CJK Compatibility '\u3400', // CJK Unified Ideographs Extension A '\u4DB6', // unassigned '\u4E00', // CJK Unified Ideographs '\uA000', // Yi Syllables '\uA490', // Yi Radicals '\uA4D0', // unassigned '\uAC00', // Hangul Syllables '\uD7A4', // unassigned '\uD800', // Surrogates '\uE000', // Private Use '\uF900', // CJK Compatibility Ideographs '\uFB00', // Alphabetic Presentation Forms '\uFB50', // Arabic Presentation Forms-A '\uFE00', // unassigned '\uFE20', // Combining Half Marks '\uFE30', // CJK Compatibility Forms '\uFE50', // Small Form Variants '\uFE70', // Arabic Presentation Forms-B '\uFEFF', // Specials '\uFF00', // Halfwidth and Fullwidth Forms '\uFFF0', // Specials '\uFFFE', // non-characters }; private static final UnicodeBlock[] blocks = { BASIC_LATIN, LATIN_1_SUPPLEMENT, LATIN_EXTENDED_A, LATIN_EXTENDED_B, IPA_EXTENSIONS, SPACING_MODIFIER_LETTERS, COMBINING_DIACRITICAL_MARKS, GREEK, CYRILLIC, null, ARMENIAN, HEBREW, ARABIC, SYRIAC, null, THAANA, null, DEVANAGARI, BENGALI, GURMUKHI, GUJARATI, ORIYA, TAMIL, TELUGU, KANNADA, MALAYALAM, SINHALA, THAI, LAO, TIBETAN, MYANMAR, GEORGIAN, HANGUL_JAMO, ETHIOPIC, null, CHEROKEE, UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS, OGHAM, RUNIC, null, KHMER, MONGOLIAN, null, LATIN_EXTENDED_ADDITIONAL, GREEK_EXTENDED, GENERAL_PUNCTUATION, SUPERSCRIPTS_AND_SUBSCRIPTS, CURRENCY_SYMBOLS, COMBINING_MARKS_FOR_SYMBOLS, LETTERLIKE_SYMBOLS, NUMBER_FORMS, ARROWS, MATHEMATICAL_OPERATORS, MISCELLANEOUS_TECHNICAL, CONTROL_PICTURES, OPTICAL_CHARACTER_RECOGNITION, ENCLOSED_ALPHANUMERICS, BOX_DRAWING, BLOCK_ELEMENTS, GEOMETRIC_SHAPES, MISCELLANEOUS_SYMBOLS, DINGBATS, null, BRAILLE_PATTERNS, null, CJK_RADICALS_SUPPLEMENT, KANGXI_RADICALS, null, IDEOGRAPHIC_DESCRIPTION_CHARACTERS, CJK_SYMBOLS_AND_PUNCTUATION, HIRAGANA, KATAKANA, BOPOMOFO, HANGUL_COMPATIBILITY_JAMO, KANBUN, BOPOMOFO_EXTENDED, null, ENCLOSED_CJK_LETTERS_AND_MONTHS, CJK_COMPATIBILITY, CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A, null, CJK_UNIFIED_IDEOGRAPHS, YI_SYLLABLES, YI_RADICALS, null, HANGUL_SYLLABLES, null, SURROGATES_AREA, PRIVATE_USE_AREA, CJK_COMPATIBILITY_IDEOGRAPHS, ALPHABETIC_PRESENTATION_FORMS, ARABIC_PRESENTATION_FORMS_A, null, COMBINING_HALF_MARKS, CJK_COMPATIBILITY_FORMS, SMALL_FORM_VARIANTS, ARABIC_PRESENTATION_FORMS_B, SPECIALS, HALFWIDTH_AND_FULLWIDTH_FORMS, SPECIALS, null, }; /** * Returns the object representing the Unicode block containing the * given character, or null if the character is not a * member of a defined block. * * @param c The character in question * @return The UnicodeBlock instance representing the * Unicode block of which this character is a member, or * null if the character is not a member of any * Unicode block */ public static UnicodeBlock of(char c) { int top, bottom, current; bottom = 0; top = blockStarts.length; current = top/2; // invariant: top > current >= bottom && ch >= unicodeBlockStarts[bottom] while (top - bottom > 1) { if (c >= blockStarts[current]) { bottom = current; } else { top = current; } current = (top + bottom) / 2; } return blocks[current]; } } /** * The value of the Character. * * @serial */ private char value; /** use serialVersionUID from JDK 1.0.2 for interoperability */ private static final long serialVersionUID = 3786198910865385080L; /** * Constructs a newly allocated Character object that * represents the specified char value. * * @param value the value to be represented by the * Character object. */ public Character(char value) { this.value = value; } /** * Returns the value of this Character object. * @return the primitive char value represented by * this object. */ public char charValue() { return value; } /** * Returns a hash code for this Character. * @return a hash code value for this object. */ public int hashCode() { return (int)value; } /** * Compares this object against the specified object. * The result is true if and only if the argument is not * null and is a Character object that * represents the same char value as this object. * * @param obj the object to compare with. * @return true if the objects are the same; * false otherwise. */ public boolean equals(Object obj) { if (obj instanceof Character) { return value == ((Character)obj).charValue(); } return false; } /** * Returns a String object representing this * Character's value. The result is a string of * length 1 whose sole component is the primitive * char value represented by this * Character object. * * @return a string representation of this object. */ public String toString() { char buf[] = {value}; return String.valueOf(buf); } /** * Returns a String object representing the * specified char. The result is a string of length * 1 consisting solely of the specified char. * * @param c the char to be converted * @return the string representation of the specified char * @since 1.4 */ public static String toString(char c) { return String.valueOf(c); } /** * Determines if the specified character is a lowercase character. *

* A character is lowercase if its general category type, provided * by Character.getType(ch), is * LOWERCASE_LETTER. *

* The following are examples of lowercase characters: *

     * a b c d e f g h i j k l m n o p q r s t u v w x y z
     * '\u00DF' '\u00E0' '\u00E1' '\u00E2' '\u00E3' '\u00E4' '\u00E5' '\u00E6' 
     * '\u00E7' '\u00E8' '\u00E9' '\u00EA' '\u00EB' '\u00EC' '\u00ED' '\u00EE'
     * '\u00EF' '\u00F0' '\u00F1' '\u00F2' '\u00F3' '\u00F4' '\u00F5' '\u00F6'
     * '\u00F8' '\u00F9' '\u00FA' '\u00FB' '\u00FC' '\u00FD' '\u00FE' '\u00FF'
     *

Many other Unicode characters are lowercase too. *

* * @param ch the character to be tested. * @return true if the character is lowercase; * false otherwise. * @see java.lang.Character#isLowerCase(char) * @see java.lang.Character#isTitleCase(char) * @see java.lang.Character#toLowerCase(char) * @see java.lang.Character#getType(char) */ public static boolean isLowerCase(char ch) { if (ch <= FAST_PATH_MAX) { return CharacterDataLatin1.isLowerCase(ch); } else { return CharacterData.isLowerCase(ch); } } /** * Determines if the specified character is an uppercase character. *

* A character is uppercase if its general category type, provided by * Character.getType(ch), is UPPERCASE_LETTER. *

* The following are examples of uppercase characters: *

     * A B C D E F G H I J K L M N O P Q R S T U V W X Y Z
     * '\u00C0' '\u00C1' '\u00C2' '\u00C3' '\u00C4' '\u00C5' '\u00C6' '\u00C7'
     * '\u00C8' '\u00C9' '\u00CA' '\u00CB' '\u00CC' '\u00CD' '\u00CE' '\u00CF'
     * '\u00D0' '\u00D1' '\u00D2' '\u00D3' '\u00D4' '\u00D5' '\u00D6' '\u00D8'
     * '\u00D9' '\u00DA' '\u00DB' '\u00DC' '\u00DD' '\u00DE'
     *

Many other Unicode characters are uppercase too.

* * @param ch the character to be tested. * @return true if the character is uppercase; * false otherwise. * @see java.lang.Character#isLowerCase(char) * @see java.lang.Character#isTitleCase(char) * @see java.lang.Character#toUpperCase(char) * @see java.lang.Character#getType(char) * @since 1.0 */ public static boolean isUpperCase(char ch) { if (ch <= FAST_PATH_MAX) { return CharacterDataLatin1.isUpperCase(ch); } else { return CharacterData.isUpperCase(ch); } } /** * Determines if the specified character is a titlecase character. *

* A character is a titlecase character if its general * category type, provided by Character.getType(ch), * is TITLECASE_LETTER. *

* Some characters look like pairs of Latin letters. For example, there * is an uppercase letter that looks like "LJ" and has a corresponding * lowercase letter that looks like "lj". A third form, which looks like "Lj", * is the appropriate form to use when rendering a word in lowercase * with initial capitals, as for a book title. *

* These are some of the Unicode characters for which this method returns * true: *

LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON *
LATIN CAPITAL LETTER L WITH SMALL LETTER J *
LATIN CAPITAL LETTER N WITH SMALL LETTER J *
LATIN CAPITAL LETTER D WITH SMALL LETTER Z *

Many other Unicode characters are titlecase too.

* * @param ch the character to be tested. * @return true if the character is titlecase; * false otherwise. * @see java.lang.Character#isLowerCase(char) * @see java.lang.Character#isUpperCase(char) * @see java.lang.Character#toTitleCase(char) * @see java.lang.Character#getType(char) * @since 1.0.2 */ public static boolean isTitleCase(char ch) { if (ch <= FAST_PATH_MAX) { return CharacterDataLatin1.isTitleCase(ch); } else { return CharacterData.isTitleCase(ch); } } /** * Determines if the specified character is a digit. *

* A character is a digit if its general category type, provided * by Character.getType(ch), is * DECIMAL_DIGIT_NUMBER. *

* Some Unicode character ranges that contain digits: *

'\u0030' through '\u0039', * ISO-LATIN-1 digits ('0' through '9') *
'\u0660' through '\u0669', * Arabic-Indic digits *
'\u06F0' through '\u06F9', * Extended Arabic-Indic digits *
'\u0966' through '\u096F', * Devanagari digits *
'\uFF10' through '\uFF19', * Fullwidth digits *

* * Many other character ranges contain digits as well. * * @param ch the character to be tested. * @return true if the character is a digit; * false otherwise. * @see java.lang.Character#digit(char, int) * @see java.lang.Character#forDigit(int, int) * @see java.lang.Character#getType(char) */ public static boolean isDigit(char ch) { if (ch <= FAST_PATH_MAX) { return CharacterDataLatin1.isDigit(ch); } else { return CharacterData.isDigit(ch); } } /** * Determines if a character is defined in Unicode. *

* A character is defined if at least one of the following is true: *

It has an entry in the UnicodeData file. *
It has a value in a range defined by the UnicodeData file. *

* * @param ch the character to be tested * @return true if the character has a defined meaning * in Unicode; false otherwise. * @see java.lang.Character#isDigit(char) * @see java.lang.Character#isLetter(char) * @see java.lang.Character#isLetterOrDigit(char) * @see java.lang.Character#isLowerCase(char) * @see java.lang.Character#isTitleCase(char) * @see java.lang.Character#isUpperCase(char) * @since 1.0.2 */ public static boolean isDefined(char ch) { if (ch <= FAST_PATH_MAX) { return CharacterDataLatin1.isDefined(ch); } else { return CharacterData.isDefined(ch); } } /** * Determines if the specified character is a letter. *

* A character is considered to be a letter if its general * category type, provided by Character.getType(ch), * is any of the following: *

UPPERCASE_LETTER *
LOWERCASE_LETTER *
TITLECASE_LETTER *
MODIFIER_LETTER *
OTHER_LETTER *

* * Not all letters have case. Many characters are * letters but are neither uppercase nor lowercase nor titlecase. * * @param ch the character to be tested. * @return true if the character is a letter; * false otherwise. * @see java.lang.Character#isDigit(char) * @see java.lang.Character#isJavaIdentifierStart(char) * @see java.lang.Character#isJavaLetter(char) * @see java.lang.Character#isJavaLetterOrDigit(char) * @see java.lang.Character#isLetterOrDigit(char) * @see java.lang.Character#isLowerCase(char) * @see java.lang.Character#isTitleCase(char) * @see java.lang.Character#isUnicodeIdentifierStart(char) * @see java.lang.Character#isUpperCase(char) */ public static boolean isLetter(char ch) { if (ch <= FAST_PATH_MAX) { return CharacterDataLatin1.isLetter(ch); } else { return CharacterData.isLetter(ch); } } /** * Determines if the specified character is a letter or digit. *

* A character is considered to be a letter or digit if either * Character.isLetter(char ch) or * Character.isDigit(char ch) returns * true for the character. * * @param ch the character to be tested. * @return true if the character is a letter or digit; * false otherwise. * @see java.lang.Character#isDigit(char) * @see java.lang.Character#isJavaIdentifierPart(char) * @see java.lang.Character#isJavaLetter(char) * @see java.lang.Character#isJavaLetterOrDigit(char) * @see java.lang.Character#isLetter(char) * @see java.lang.Character#isUnicodeIdentifierPart(char) * @since 1.0.2 */ public static boolean isLetterOrDigit(char ch) { if (ch <= FAST_PATH_MAX) { return CharacterDataLatin1.isLetterOrDigit(ch); } else { return CharacterData.isLetterOrDigit(ch); } } /** * Determines if the specified character is permissible as the first * character in a Java identifier. *

* A character may start a Java identifier if and only if * one of the following is true: *

{@link #isLetter(char) isLetter(ch)} returns true *
{@link #getType(char) getType(ch)} returns LETTER_NUMBER *
ch is a currency symbol (such as "$") *
ch is a connecting punctuation character (such as "_"). *

* * @param ch the character to be tested. * @return true if the character may start a Java * identifier; false otherwise. * @see java.lang.Character#isJavaLetterOrDigit(char) * @see java.lang.Character#isJavaIdentifierStart(char) * @see java.lang.Character#isJavaIdentifierPart(char) * @see java.lang.Character#isLetter(char) * @see java.lang.Character#isLetterOrDigit(char) * @see java.lang.Character#isUnicodeIdentifierStart(char) * @since 1.02 * @deprecated Replaced by isJavaIdentifierStart(char). */ public static boolean isJavaLetter(char ch) { return isJavaIdentifierStart(ch); } /** * Determines if the specified character may be part of a Java * identifier as other than the first character. *

* A character may be part of a Java identifier if and only if any * of the following are true: *

it is a letter *
it is a currency symbol (such as '$') *
it is a connecting punctuation character (such as '_') *
it is a digit *
it is a numeric letter (such as a Roman numeral character) *
it is a combining mark *
it is a non-spacing mark *
isIdentifierIgnorable returns * true for the character. *

* * @param ch the character to be tested. * @return true if the character may be part of a * Java identifier; false otherwise. * @see java.lang.Character#isJavaLetter(char) * @see java.lang.Character#isJavaIdentifierStart(char) * @see java.lang.Character#isJavaIdentifierPart(char) * @see java.lang.Character#isLetter(char) * @see java.lang.Character#isLetterOrDigit(char) * @see java.lang.Character#isUnicodeIdentifierPart(char) * @see java.lang.Character#isIdentifierIgnorable(char) * @since 1.02 * @deprecated Replaced by isJavaIdentifierPart(char). */ public static boolean isJavaLetterOrDigit(char ch) { return isJavaIdentifierPart(ch); } /** * Determines if the specified character is * permissible as the first character in a Java identifier. *

* A character may start a Java identifier if and only if * one of the following conditions is true: *

{@link #isLetter(char) isLetter(ch)} returns true *
{@link #getType(char) getType(ch)} returns LETTER_NUMBER *
ch is a currency symbol (such as "$") *
ch is a connecting punctuation character (such as "_"). *

* * @param ch the character to be tested. * @return true if the character may start a Java identifier; * false otherwise. * @see java.lang.Character#isJavaIdentifierPart(char) * @see java.lang.Character#isLetter(char) * @see java.lang.Character#isUnicodeIdentifierStart(char) * @since 1.1 */ public static boolean isJavaIdentifierStart(char ch) { if (ch <= FAST_PATH_MAX) { return CharacterDataLatin1.isJavaIdentifierStart(ch); } else { return CharacterData.isJavaIdentifierStart(ch); } } /** * Determines if the specified character may be part of a Java * identifier as other than the first character. *

* A character may be part of a Java identifier if any of the following * are true: *

it is a letter *
it is a currency symbol (such as '$') *
it is a connecting punctuation character (such as '_') *
it is a digit *
it is a numeric letter (such as a Roman numeral character) *
it is a combining mark *
it is a non-spacing mark *
isIdentifierIgnorable returns * true for the character *

* * @param ch the character to be tested. * @return true if the character may be part of a * Java identifier; false otherwise. * @see java.lang.Character#isIdentifierIgnorable(char) * @see java.lang.Character#isJavaIdentifierStart(char) * @see java.lang.Character#isLetterOrDigit(char) * @see java.lang.Character#isUnicodeIdentifierPart(char) * @since 1.1 */ public static boolean isJavaIdentifierPart(char ch) { if (ch <= FAST_PATH_MAX) { return CharacterDataLatin1.isJavaIdentifierPart(ch); } else { return CharacterData.isJavaIdentifierPart(ch); } } /** * Determines if the specified character is permissible as the * first character in a Unicode identifier. *

* A character may start a Unicode identifier if and only if * one of the following conditions is true: *

{@link #isLetter(char) isLetter(ch)} returns true *
{@link #getType(char) getType(ch)} returns * LETTER_NUMBER. *

* @param ch the character to be tested. * @return true if the character may start a Unicode * identifier; false otherwise. * @see java.lang.Character#isJavaIdentifierStart(char) * @see java.lang.Character#isLetter(char) * @see java.lang.Character#isUnicodeIdentifierPart(char) * @since 1.1 */ public static boolean isUnicodeIdentifierStart(char ch) { if (ch <= FAST_PATH_MAX) { return CharacterDataLatin1.isUnicodeIdentifierStart(ch); } else { return CharacterData.isUnicodeIdentifierStart(ch); } } /** * Determines if the specified character may be part of a Unicode * identifier as other than the first character. *

* A character may be part of a Unicode identifier if and only if * one of the following statements is true: *

it is a letter *
it is a connecting punctuation character (such as '_') *
it is a digit *
it is a numeric letter (such as a Roman numeral character) *
it is a combining mark *
it is a non-spacing mark *
isIdentifierIgnorable returns * true for this character. *

* * @param ch the character to be tested. * @return true if the character may be part of a * Unicode identifier; false otherwise. * @see java.lang.Character#isIdentifierIgnorable(char) * @see java.lang.Character#isJavaIdentifierPart(char) * @see java.lang.Character#isLetterOrDigit(char) * @see java.lang.Character#isUnicodeIdentifierStart(char) * @since 1.1 */ public static boolean isUnicodeIdentifierPart(char ch) { if (ch <= FAST_PATH_MAX) { return CharacterDataLatin1.isUnicodeIdentifierPart(ch); } else { return CharacterData.isUnicodeIdentifierPart(ch); } } /** * Determines if the specified character should be regarded as * an ignorable character in a Java identifier or a Unicode identifier. *

* The following Unicode characters are ignorable in a Java identifier * or a Unicode identifier: *

ISO control characters that are not whitespace *
- '\u0000' through '\u0008' *
- '\u000E' through '\u001B' *
- '\u007F' through '\u009F' *
* *
all characters that have the FORMAT general * category value *

* * @param ch the character to be tested. * @return true if the character is an ignorable control * character that may be part of a Java or Unicode identifier; * false otherwise. * @see java.lang.Character#isJavaIdentifierPart(char) * @see java.lang.Character#isUnicodeIdentifierPart(char) * @since 1.1 */ public static boolean isIdentifierIgnorable(char ch) { if (ch <= FAST_PATH_MAX) { return CharacterDataLatin1.isIdentifierIgnorable(ch); } else { return CharacterData.isIdentifierIgnorable(ch); } } /** * Converts the character argument to lowercase using case * mapping information from the UnicodeData file. *

* Note that * Character.isLowerCase(Character.toLowerCase(ch)) * does not always return true for some ranges of * characters, particularly those that are symbols or ideographs. * * @param ch the character to be converted. * @return the lowercase equivalent of the character, if any; * otherwise, the character itself. * @see java.lang.Character#isLowerCase(char) * @see java.lang.Character#isUpperCase(char) * @see java.lang.Character#toTitleCase(char) * @see java.lang.Character#toUpperCase(char) */ public static char toLowerCase(char ch) { if (ch <= FAST_PATH_MAX) { return CharacterDataLatin1.toLowerCase(ch); } else { return CharacterData.toLowerCase(ch); } } /** * Converts the character argument to uppercase using case mapping * information from the UnicodeData file. *

* Note that * Character.isUpperCase(Character.toUpperCase(ch)) * does not always return true for some ranges of * characters, particularly those that are symbols or ideographs. * * @param ch the character to be converted. * @return the uppercase equivalent of the character, if any; * otherwise, the character itself. * @see java.lang.Character#isLowerCase(char) * @see java.lang.Character#isUpperCase(char) * @see java.lang.Character#toLowerCase(char) * @see java.lang.Character#toTitleCase(char) */ public static char toUpperCase(char ch) { if (ch <= FAST_PATH_MAX) { return CharacterDataLatin1.toUpperCase(ch); } else { return CharacterData.toUpperCase(ch); } } /** * Converts the character argument to titlecase using case mapping * information from the UnicodeData file. If a character has no * explicit titlecase mapping and is not itself a titlecase char * according to UnicodeData, then the uppercase mapping is * returned as an equivalent titlecase mapping. If the * char argument is already a titlecase * char, the same char value will be * returned. *

* Note that * Character.isTitleCase(Character.toTitleCase(ch)) * does not always return true for some ranges of * characters. * * @param ch the character to be converted. * @return the titlecase equivalent of the character, if any; * otherwise, the character itself. * @see java.lang.Character#isTitleCase(char) * @see java.lang.Character#toLowerCase(char) * @see java.lang.Character#toUpperCase(char) * @since 1.0.2 */ public static char toTitleCase(char ch) { if (ch <= FAST_PATH_MAX) { return CharacterDataLatin1.toTitleCase(ch); } else { return CharacterData.toTitleCase(ch); } } /** * Returns the numeric value of the character ch in the * specified radix. *

* If the radix is not in the range MIN_RADIX <= * radix <= MAX_RADIX or if the * value of ch is not a valid digit in the specified * radix, -1 is returned. A character is a valid digit * if at least one of the following is true: *

The method isDigit is true of the character * and the Unicode decimal digit value of the character (or its * single-character decomposition) is less than the specified radix. * In this case the decimal digit value is returned. *
The character is one of the uppercase Latin letters * 'A' through 'Z' and its code is less than * radix + 'A' - 10. * In this case, ch - 'A' + 10 * is returned. *
The character is one of the lowercase Latin letters * 'a' through 'z' and its code is less than * radix + 'a' - 10. * In this case, ch - 'a' + 10 * is returned. *

* * @param ch the character to be converted. * @param radix the radix. * @return the numeric value represented by the character in the * specified radix. * @see java.lang.Character#forDigit(int, int) * @see java.lang.Character#isDigit(char) */ public static int digit(char ch, int radix) { if (ch <= FAST_PATH_MAX) { return CharacterDataLatin1.digit(ch, radix); } else { return CharacterData.digit(ch, radix); } } /** * Returns the int value that the specified Unicode * character represents. For example, the character * '\u216C' (the roman numeral fifty) will return * an int with a value of 50. *

* The letters A-Z in their uppercase ('\u0041' through * '\u005A'), lowercase * ('\u0061' through '\u007A'), and * full width variant ('\uFF21' through * '\uFF3A' and '\uFF41' through * '\uFF5A') forms have numeric values from 10 * through 35. This is independent of the Unicode specification, * which does not assign numeric values to these char * values. *

* If the character does not have a numeric value, then -1 is returned. * If the character has a numeric value that cannot be represented as a * nonnegative integer (for example, a fractional value), then -2 * is returned. * * @param ch the character to be converted. * @return the numeric value of the character, as a nonnegative int * value; -2 if the character has a numeric value that is not a * nonnegative integer; -1 if the character has no numeric value. * @see java.lang.Character#forDigit(int, int) * @see java.lang.Character#isDigit(char) * @since 1.1 */ public static int getNumericValue(char ch) { if (ch <= FAST_PATH_MAX) { return CharacterDataLatin1.getNumericValue(ch); } else { return CharacterData.getNumericValue(ch); } } /** * Determines if the specified character is ISO-LATIN-1 white space. * This method returns true for the following five * characters only: * * * * * * * * * * * *
'\t' '\u0009' HORIZONTAL TABULATION
'\n' '\u000A' NEW LINE
'\f' '\u000C' FORM FEED
'\r' '\u000D' CARRIAGE RETURN
' ' '\u0020' SPACE
* * @param ch the character to be tested. * @return true if the character is ISO-LATIN-1 white * space; false otherwise. * @see java.lang.Character#isSpaceChar(char) * @see java.lang.Character#isWhitespace(char) * @deprecated Replaced by isWhitespace(char). */ public static boolean isSpace(char ch) { return (ch <= 0x0020) && (((((1L << 0x0009) | (1L << 0x000A) | (1L << 0x000C) | (1L << 0x000D) | (1L << 0x0020)) >> ch) & 1L) != 0); } /** * Determines if the specified character is a Unicode space character. * A character is considered to be a space character if and only if * it is specified to be a space character by the Unicode standard. This * method returns true if the character's general category type is any of * the following: *

SPACE_SEPARATOR *
LINE_SEPARATOR *
PARAGRAPH_SEPARATOR *

* * @param ch the character to be tested. * @return true if the character is a space character; * false otherwise. * @see java.lang.Character#isWhitespace(char) * @since 1.1 */ public static boolean isSpaceChar(char ch) { if (ch <= FAST_PATH_MAX) { return CharacterDataLatin1.isSpaceChar(ch); } else { return CharacterData.isSpaceChar(ch); } } /** * Determines if the specified character is white space according to Java. * A character is a Java whitespace character if and only if it satisfies * one of the following criteria: *

It is a Unicode space character (SPACE_SEPARATOR, * LINE_SEPARATOR, or PARAGRAPH_SEPARATOR) * but is not also a non-breaking space ('\u00A0', * '\u2007', '\u202F'). *
It is '\u0009', HORIZONTAL TABULATION. *
It is '\u000A', LINE FEED. *
It is '\u000B', VERTICAL TABULATION. *
It is '\u000C', FORM FEED. *
It is '\u000D', CARRIAGE RETURN. *
It is '\u001C', FILE SEPARATOR. *
It is '\u001D', GROUP SEPARATOR. *
It is '\u001E', RECORD SEPARATOR. *
It is '\u001F', UNIT SEPARATOR. *

* * @param ch the character to be tested. * @return true if the character is a Java whitespace * character; false otherwise. * @see java.lang.Character#isSpaceChar(char) * @since 1.1 */ public static boolean isWhitespace(char ch) { if (ch <= FAST_PATH_MAX) { return CharacterDataLatin1.isWhitespace(ch); } else { return CharacterData.isWhitespace(ch); } } /** * Determines if the specified character is an ISO control * character. A character is considered to be an ISO control * character if its code is in the range '\u0000' * through '\u001F' or in the range * '\u007F' through '\u009F'. * * @param ch the character to be tested. * @return true if the character is an ISO control character; * false otherwise. * * @see java.lang.Character#isSpaceChar(char) * @see java.lang.Character#isWhitespace(char) * @since 1.1 */ public static boolean isISOControl(char ch) { return (ch <= 0x009F) && ((ch <= 0x001F) || (ch >= 0x007F)); } /** * Returns a value indicating a character's general category. * * @param ch the character to be tested. * @return a value of type int representing the * character's general category. * @see java.lang.Character#COMBINING_SPACING_MARK * @see java.lang.Character#CONNECTOR_PUNCTUATION * @see java.lang.Character#CONTROL * @see java.lang.Character#CURRENCY_SYMBOL * @see java.lang.Character#DASH_PUNCTUATION * @see java.lang.Character#DECIMAL_DIGIT_NUMBER * @see java.lang.Character#ENCLOSING_MARK * @see java.lang.Character#END_PUNCTUATION * @see java.lang.Character#FINAL_QUOTE_PUNCTUATION * @see java.lang.Character#FORMAT * @see java.lang.Character#INITIAL_QUOTE_PUNCTUATION * @see java.lang.Character#LETTER_NUMBER * @see java.lang.Character#LINE_SEPARATOR * @see java.lang.Character#LOWERCASE_LETTER * @see java.lang.Character#MATH_SYMBOL * @see java.lang.Character#MODIFIER_LETTER * @see java.lang.Character#MODIFIER_SYMBOL * @see java.lang.Character#NON_SPACING_MARK * @see java.lang.Character#OTHER_LETTER * @see java.lang.Character#OTHER_NUMBER * @see java.lang.Character#OTHER_PUNCTUATION * @see java.lang.Character#OTHER_SYMBOL * @see java.lang.Character#PARAGRAPH_SEPARATOR * @see java.lang.Character#PRIVATE_USE * @see java.lang.Character#SPACE_SEPARATOR * @see java.lang.Character#START_PUNCTUATION * @see java.lang.Character#SURROGATE * @see java.lang.Character#TITLECASE_LETTER * @see java.lang.Character#UNASSIGNED * @see java.lang.Character#UPPERCASE_LETTER * @since 1.1 */ public static int getType(char ch) { if (ch <= FAST_PATH_MAX) { return CharacterDataLatin1.getType(ch); } else { return CharacterData.getType(ch); } } /** * Determines the character representation for a specific digit in * the specified radix. If the value of radix is not a * valid radix, or the value of digit is not a valid * digit in the specified radix, the null character * ('\u0000') is returned. *

* The radix argument is valid if it is greater than or * equal to MIN_RADIX and less than or equal to * MAX_RADIX. The digit argument is valid if * 0 <=digit < radix. *

* If the digit is less than 10, then * '0' + digit is returned. Otherwise, the value * 'a' + digit - 10 is returned. * * @param digit the number to convert to a character. * @param radix the radix. * @return the char representation of the specified digit * in the specified radix. * @see java.lang.Character#MIN_RADIX * @see java.lang.Character#MAX_RADIX * @see java.lang.Character#digit(char, int) */ public static char forDigit(int digit, int radix) { if ((digit >= radix) || (digit < 0)) { return '\0'; } if ((radix < MIN_RADIX) || (radix > MAX_RADIX)) { return '\0'; } if (digit < 10) { return (char)('0' + digit); } return (char)('a' - 10 + digit); } /** * Returns the Unicode directionality property for the given * character. Character directionality is used to calculate the * visual ordering of text. The directionality value of undefined * char values is DIRECTIONALITY_UNDEFINED. * * @param ch char for which the directionality property * is requested. * @return the directionality property of the char value. * * @see Character#DIRECTIONALITY_UNDEFINED * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR * @see Character#DIRECTIONALITY_ARABIC_NUMBER * @see Character#DIRECTIONALITY_COMMON_NUMBER_SEPARATOR * @see Character#DIRECTIONALITY_NONSPACING_MARK * @see Character#DIRECTIONALITY_BOUNDARY_NEUTRAL * @see Character#DIRECTIONALITY_PARAGRAPH_SEPARATOR * @see Character#DIRECTIONALITY_SEGMENT_SEPARATOR * @see Character#DIRECTIONALITY_WHITESPACE * @see Character#DIRECTIONALITY_OTHER_NEUTRALS * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE * @see Character#DIRECTIONALITY_POP_DIRECTIONAL_FORMAT * @since 1.4 */ public static byte getDirectionality(char ch) { if (ch <= FAST_PATH_MAX) { return CharacterDataLatin1.getDirectionality(ch); } else { return CharacterData.getDirectionality(ch); } } /** * Determines whether the character is mirrored according to the * Unicode specification. Mirrored characters should have their * glyphs horizontally mirrored when displayed in text that is * right-to-left. For example, '\u0028' LEFT * PARENTHESIS is semantically defined to be an opening * parenthesis. This will appear as a "(" in text that is * left-to-right but as a ")" in text that is right-to-left. * * @param ch char for which the mirrored property is requested * @return true if the char is mirrored, false * if the char is not mirrored or is not defined. * @since 1.4 */ public static boolean isMirrored(char ch) { if (ch <= FAST_PATH_MAX) { return CharacterDataLatin1.isMirrored(ch); } else { return CharacterData.isMirrored(ch); } } /** * Compares two Character objects numerically. * * @param anotherCharacter the Character to be compared. * @return the value 0 if the argument Character * is equal to this Character; a value less than * 0 if this Character is numerically less * than the Character argument; and a value greater than * 0 if this Character is numerically greater * than the Character argument (unsigned comparison). * Note that this is strictly a numerical comparison; it is not * locale-dependent. * @since 1.2 */ public int compareTo(Character anotherCharacter) { return this.value - anotherCharacter.value; } /** * Compares this Character object to another object. * If the object is a Character, this function * behaves like compareTo(Character). Otherwise, it * throws a ClassCastException (as * Character objects are comparable only to other * Character objects). * * @param o the Object to be compared. * @return the value 0 if the argument is a Character * numerically equal to this Character; a value less than * 0 if the argument is a Character numerically * greater than this Character; and a value greater than * 0 if the argument is a Character numerically * less than this Character. * @exception ClassCastException if the argument is not a * Character. * @see java.lang.Comparable * @since 1.2 */ public int compareTo(Object o) { return compareTo((Character)o); } /** * Converts the character argument to uppercase using case mapping * information from the UnicodeData file. *

* * @param ch the char to be converted. * @return either the uppercase equivalent of the character, if * any, or an error flag (Character.CHAR_ERROR) * that indicates that a 1:M char mapping exists. * @see java.lang.Character#isLowerCase(char) * @see java.lang.Character#isUpperCase(char) * @see java.lang.Character#toLowerCase(char) * @see java.lang.Character#toTitleCase(char) * @since 1.4 */ static char toUpperCaseEx(char ch) { if (ch <= FAST_PATH_MAX) { return CharacterDataLatin1.toUpperCaseEx(ch); } else { return CharacterData.toUpperCaseEx(ch); } } /** * Converts the char argument to uppercase using case * mapping information from the SpecialCasing file in the Unicode * specification. If a character has no explicit uppercase * mapping, then the char itself is returned in the * char[]. * * @param ch the char to uppercase * @return a char[] with the uppercased character. * @since 1.4 */ static char[] sharpsMap = new char[] {'S', 'S'}; static char[] toUpperCaseCharArray(char ch) { char[] upperMap = {ch}; if (ch <= FAST_PATH_MAX) { if (ch == '\u00DF') { upperMap = sharpsMap; } // else ch -> ch } else { int location = findInCharMap(ch); if (location != -1) { upperMap = CharacterData.charMap[location][1]; } } return upperMap; } /** * Finds the character in the uppercase mapping table. * * @param ch the char to search * @return the index location ch in the table or -1 if not found * @since 1.4 */ static int findInCharMap(char ch) { int top, bottom, current; bottom = 0; top = CharacterData.charMap.length; current = top/2; // invariant: top > current >= bottom && ch >= CharacterData.charMap[bottom][0] while (top - bottom > 1) { if (ch >= CharacterData.charMap[current][0][0]) { bottom = current; } else { top = current; } current = (top + bottom) / 2; } if (ch == CharacterData.charMap[current][0][0]) return current; else return -1; } }

`'\t'`	`'\u0009'`	`HORIZONTAL TABULATION`
`'\n'`	`'\u000A'`	`NEW LINE`
`'\f'`	`'\u000C'`	`FORM FEED`
`'\r'`	`'\u000D'`	`CARRIAGE RETURN`
`' '`	`'\u0020'`	`SPACE`