'\" t
.\" Title: unicode_category_lookup
.\" Author: Sam Varshavchik
.\" Generator: DocBook XSL Stylesheets v1.79.1
.\" Date: 03/11/2017
.\" Manual: Courier Unicode Library
.\" Source: Courier Unicode Library
.\" Language: English
.\"
.TH "UNICODE_CATEGORY_LOO" "3" "03/11/2017" "Courier Unicode Library" "Courier Unicode Library"
.\" -----------------------------------------------------------------
.\" * Define some portability stuff
.\" -----------------------------------------------------------------
.\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.\" http://bugs.debian.org/507673
.\" http://lists.gnu.org/archive/html/groff/2009-02/msg00013.html
.\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.ie \n(.g .ds Aq \(aq
.el .ds Aq '
.\" -----------------------------------------------------------------
.\" * set default formatting
.\" -----------------------------------------------------------------
.\" disable hyphenation
.nh
.\" disable justification (adjust text to left margin only)
.ad l
.\" -----------------------------------------------------------------
.\" * MAIN CONTENT STARTS HERE *
.\" -----------------------------------------------------------------
.SH "NAME"
unicode_category_lookup, unicode_isalnum, unicode_isalpha, unicode_isblank, unicode_isdigit, unicode_isgraph, unicode_islower, unicode_ispunct, unicode_isspace, unicode_isupper \- unicode character categorization
.SH "SYNOPSIS"
.sp
.ft B
.nf
#include
.fi
.ft
.HP \w'uint32_t\ unicode_category_lookup('u
.BI "uint32_t unicode_category_lookup(char32_t\ " "c" ");"
.HP \w'int\ unicode_isalnum('u
.BI "int unicode_isalnum(char32_t\ " "c" ");"
.HP \w'int\ unicode_isalpha('u
.BI "int unicode_isalpha(char32_t\ " "c" ");"
.HP \w'int\ unicode_isblank('u
.BI "int unicode_isblank(char32_t\ " "c" ");"
.HP \w'int\ unicode_isdigit('u
.BI "int unicode_isdigit(char32_t\ " "c" ");"
.HP \w'int\ unicode_isgraph('u
.BI "int unicode_isgraph(char32_t\ " "c" ");"
.HP \w'int\ unicode_islower('u
.BI "int unicode_islower(char32_t\ " "c" ");"
.HP \w'int\ unicode_ispunct('u
.BI "int unicode_ispunct(char32_t\ " "c" ");"
.HP \w'int\ unicode_isspace('u
.BI "int unicode_isspace(char32_t\ " "c" ");"
.HP \w'int\ unicode_isupper('u
.BI "int unicode_isupper(char32_t\ " "c" ");"
.SH "DESCRIPTION"
.PP
\fBunicode_category_lookup\fR() looks up the
\m[blue]\fBunicode character\*(Aqs categorization\fR\m[]\&\s-2\u[1]\d\s+2\&.
\fBunicode_category_lookup\fR() returns a 32 bit value\&. The value\*(Aqs
UNICODE_CATEGORY_1
bits specify the first level of the unicode character\*(Aqs category, with
UNICODE_CATEGORY_2,
UNICODE_CATEGORY_3, and
UNICODE_CATEGORY_4
bits specifying the 2nd, 3rd, and 4th level, if given\&. A value of 0 for each corresponding bit set indicates that no category is specified for this level, for this character; otherwise the possible values are defined in
\&.
.PP
The remaining functions implement comparable equivalents of their non\-unicode versions in the standard C library, as follows:
.PP
\fBunicode_isalnum\fR()
.RS 4
Returns non\-0 for all
\fBunicode_isalpha\fR() or
\fBunicode_isdigit\fR()\&.
.RE
.PP
\fBunicode_isalpha\fR()
.RS 4
Returns non\-0 for all
UNICODE_CATEGORY_1_LETTER\&.
.RE
.PP
\fBunicode_isblank\fR()
.RS 4
Return non\-0 for
TAB, and all
UNICODE_CATEGORY_2_SPACE\&.
.RE
.PP
\fBunicode_isdigit\fR()
.RS 4
Returns non\-0 for all
UNICODE_CATEGORY_1_NUMBER
|
UNICODE_CATEGORY_2_DIGIT, only (no third categories)\&.
.RE
.PP
\fBunicode_isgraph\fR()
.RS 4
Returns non\-0 for all codepoints above
SPACE
which are not
\fBunicode_isspace\fR()\&.
.RE
.PP
\fBunicode_islower\fR()
.RS 4
Returns non\-0 for all
\fBunicode_isalpha\fR() for which the character is equal to
\fBunicode_lc\fR(3)
of itself\&.
.RE
.PP
\fBunicode_ispunct\fR()
.RS 4
Returns non\-0 for all
UNICODE_CATEGORY_1_PUNCTUATION\&.
.RE
.PP
\fBunicode_isspace\fR()
.RS 4
Returns non\-0 for unicode_isblank() or for unicode characters with linebreaking properties of
BK,
CR,
LF,
NL, and
SP\&.
.RE
.PP
\fBunicode_isupper\fR()
.RS 4
Returns non\-0 for all
\fBunicode_isalpha\fR() for which the character is equal to
\fBunicode_uc\fR(3)
of itself\&.
.RE
.SH "SEE ALSO"
.PP
\fBcourier-unicode\fR(7),
\fBunicode_convert_tocase\fR(3)\&.
.SH "AUTHOR"
.PP
\fBSam Varshavchik\fR
.RS 4
Author
.RE
.SH "NOTES"
.IP " 1." 4
unicode character's categorization
.RS 4
\%http://unicode.org/notes/tn36/
.RE