.\" Copyright (C) 2001 Information-technology Promotion Agency (IPA) .\" Copyright (C) 2001-2011 .\" National Institute of Advanced Industrial Science and Technology (AIST) .\" This file is part of the m17n library documentation. .\" Permission is granted to copy, distribute and/or modify this document .\" under the terms of the GNU Free Documentation License, Version 1.2 or .\" any later version published by the Free Software Foundation; with no .\" Invariant Section, no Front-Cover Texts, .\" and no Back-Cover Texts. A copy of the license is included in the .\" appendix entitled "GNU Free Documentation License". .TH "M-text" 3m17n "12 Jan 2011" "Version 1.6.2" "The m17n Library" \" -*- nroff -*- .ad l .nh .SH NAME M\-text \- M\-text objects and API for them. .SS "Typedefs" .in +1c .ti -1c .RI "typedef struct \fBMText\fP \fBMText\fP" .br .RI "\fIType of \fIM-texts\fP. \fP" .in -1c .SS "Enumerations" .in +1c .ti -1c .RI "enum \fBMTextFormat\fP { \fBMTEXT_FORMAT_US_ASCII\fP, \fBMTEXT_FORMAT_UTF_8\fP, \fBMTEXT_FORMAT_UTF_16LE\fP, \fBMTEXT_FORMAT_UTF_16BE\fP, \fBMTEXT_FORMAT_UTF_32LE\fP, \fBMTEXT_FORMAT_UTF_32BE\fP, \fBMTEXT_FORMAT_MAX\fP }" .br .RI "\fIEnumeration for specifying the format of an M-text. \fP" .ti -1c .RI "enum \fBMTextLineBreakOption\fP { \fBMTEXT_LBO_SP_CM\fP = 1, \fBMTEXT_LBO_KOREAN_SP\fP = 2, \fBMTEXT_LBO_AI_AS_ID\fP = 4, \fBMTEXT_LBO_MAX\fP }" .br .RI "\fIEnumeration for specifying a set of line breaking option. \fP" .in -1c .SS "Functions" .in +1c .ti -1c .RI "int \fBmtext_line_break\fP (\fBMText\fP *mt, int pos, int option, int *after)" .br .RI "\fIFind a linebreak postion of an M-text. \fP" .ti -1c .RI "\fBMText\fP * \fBmtext\fP ()" .br .RI "\fIAllocate a new M-text. \fP" .ti -1c .RI "\fBMText\fP * \fBmtext_from_data\fP (const void *data, int nitems, enum \fBMTextFormat\fP format)" .br .RI "\fIAllocate a new M-text with specified data. \fP" .ti -1c .RI "void * \fBmtext_data\fP (\fBMText\fP *mt, enum \fBMTextFormat\fP *fmt, int *nunits, int *pos_idx, int *unit_idx)" .br .RI "\fIGet information about the text data in M-text. \fP" .ti -1c .RI "int \fBmtext_len\fP (\fBMText\fP *mt)" .br .RI "\fINumber of characters in M-text. \fP" .ti -1c .RI "int \fBmtext_ref_char\fP (\fBMText\fP *mt, int pos)" .br .RI "\fIReturn the character at the specified position in an M-text. \fP" .ti -1c .RI "int \fBmtext_set_char\fP (\fBMText\fP *mt, int pos, int c)" .br .RI "\fIStore a character into an M-text. \fP" .ti -1c .RI "\fBMText\fP * \fBmtext_cat_char\fP (\fBMText\fP *mt, int c)" .br .RI "\fIAppend a character to an M-text. \fP" .ti -1c .RI "\fBMText\fP * \fBmtext_dup\fP (\fBMText\fP *mt)" .br .RI "\fICreate a copy of an M-text. \fP" .ti -1c .RI "\fBMText\fP * \fBmtext_cat\fP (\fBMText\fP *mt1, \fBMText\fP *mt2)" .br .RI "\fIAppend an M-text to another. \fP" .ti -1c .RI "\fBMText\fP * \fBmtext_ncat\fP (\fBMText\fP *mt1, \fBMText\fP *mt2, int n)" .br .RI "\fIAppend a part of an M-text to another. \fP" .ti -1c .RI "\fBMText\fP * \fBmtext_cpy\fP (\fBMText\fP *mt1, \fBMText\fP *mt2)" .br .RI "\fICopy an M-text to another. \fP" .ti -1c .RI "\fBMText\fP * \fBmtext_ncpy\fP (\fBMText\fP *mt1, \fBMText\fP *mt2, int n)" .br .RI "\fICopy the first some characters in an M-text to another. \fP" .ti -1c .RI "\fBMText\fP * \fBmtext_duplicate\fP (\fBMText\fP *mt, int from, int to)" .br .RI "\fICreate a new M-text from a part of an existing M-text. \fP" .ti -1c .RI "\fBMText\fP * \fBmtext_copy\fP (\fBMText\fP *mt1, int pos, \fBMText\fP *mt2, int from, int to)" .br .RI "\fICopy characters in the specified range into an M-text. \fP" .ti -1c .RI "int \fBmtext_del\fP (\fBMText\fP *mt, int from, int to)" .br .RI "\fIDelete characters in the specified range destructively. \fP" .ti -1c .RI "int \fBmtext_ins\fP (\fBMText\fP *mt1, int pos, \fBMText\fP *mt2)" .br .RI "\fIInsert an M-text into another M-text. \fP" .ti -1c .RI "int \fBmtext_insert\fP (\fBMText\fP *mt1, int pos, \fBMText\fP *mt2, int from, int to)" .br .RI "\fIInsert sub-text of an M-text into another M-text. \fP" .ti -1c .RI "int \fBmtext_ins_char\fP (\fBMText\fP *mt, int pos, int c, int n)" .br .RI "\fIInsert a character into an M-text. \fP" .ti -1c .RI "int \fBmtext_replace\fP (\fBMText\fP *mt1, int from1, int to1, \fBMText\fP *mt2, int from2, int to2)" .br .RI "\fIReplace sub-text of M-text with another. \fP" .ti -1c .RI "int \fBmtext_character\fP (\fBMText\fP *mt, int from, int to, int c)" .br .RI "\fISearch a character in an M-text. \fP" .ti -1c .RI "int \fBmtext_chr\fP (\fBMText\fP *mt, int c)" .br .RI "\fIReturn the position of the first occurrence of a character in an M-text. \fP" .ti -1c .RI "int \fBmtext_rchr\fP (\fBMText\fP *mt, int c)" .br .RI "\fIReturn the position of the last occurrence of a character in an M-text. \fP" .ti -1c .RI "int \fBmtext_cmp\fP (\fBMText\fP *mt1, \fBMText\fP *mt2)" .br .RI "\fICompare two M-texts character-by-character. \fP" .ti -1c .RI "int \fBmtext_ncmp\fP (\fBMText\fP *mt1, \fBMText\fP *mt2, int n)" .br .RI "\fICompare initial parts of two M-texts character-by-character. \fP" .ti -1c .RI "int \fBmtext_compare\fP (\fBMText\fP *mt1, int from1, int to1, \fBMText\fP *mt2, int from2, int to2)" .br .RI "\fICompare specified regions of two M-texts. \fP" .ti -1c .RI "int \fBmtext_spn\fP (\fBMText\fP *mt, \fBMText\fP *accept)" .br .RI "\fISearch an M-text for a set of characters. \fP" .ti -1c .RI "int \fBmtext_cspn\fP (\fBMText\fP *mt, \fBMText\fP *reject)" .br .RI "\fISearch an M-text for the complement of a set of characters. \fP" .ti -1c .RI "int \fBmtext_pbrk\fP (\fBMText\fP *mt, \fBMText\fP *accept)" .br .RI "\fISearch an M-text for any of a set of characters. \fP" .ti -1c .RI "\fBMText\fP * \fBmtext_tok\fP (\fBMText\fP *mt, \fBMText\fP *delim, int *pos)" .br .RI "\fILook for a token in an M-text. \fP" .ti -1c .RI "int \fBmtext_text\fP (\fBMText\fP *mt1, int pos, \fBMText\fP *mt2)" .br .RI "\fILocate an M-text in another. \fP" .ti -1c .RI "int \fBmtext_search\fP (\fBMText\fP *mt1, int from, int to, \fBMText\fP *mt2)" .br .RI "\fILocate an M-text in a specific range of another. \fP" .ti -1c .RI "int \fBmtext_casecmp\fP (\fBMText\fP *mt1, \fBMText\fP *mt2)" .br .RI "\fICompare two M-texts ignoring cases. \fP" .ti -1c .RI "int \fBmtext_ncasecmp\fP (\fBMText\fP *mt1, \fBMText\fP *mt2, int n)" .br .RI "\fICompare initial parts of two M-texts ignoring cases. \fP" .ti -1c .RI "int \fBmtext_case_compare\fP (\fBMText\fP *mt1, int from1, int to1, \fBMText\fP *mt2, int from2, int to2)" .br .RI "\fICompare specified regions of two M-texts ignoring cases. \fP" .ti -1c .RI "int \fBmtext_lowercase\fP (\fBMText\fP *mt)" .br .RI "\fILowercase an M-text. \fP" .ti -1c .RI "int \fBmtext_titlecase\fP (\fBMText\fP *mt)" .br .RI "\fITitlecase an M-text. \fP" .ti -1c .RI "int \fBmtext_uppercase\fP (\fBMText\fP *mt)" .br .RI "\fIUppercase an M-text. \fP" .in -1c .SS "Variables" .in +1c .ti -1c .RI "\fBMSymbol\fP \fBMlanguage\fP" .br .in -1c .SS "Variables: Default Endian of UTF-16 and UTF-32" .in +1c .ti -1c .RI "enum \fBMTextFormat\fP \fBMTEXT_FORMAT_UTF_16\fP" .br .RI "\fIVariable of value MTEXT_FORMAT_UTF_16LE or MTEXT_FORMAT_UTF_16BE. \fP" .ti -1c .RI "const int \fBMTEXT_FORMAT_UTF_32\fP" .br .RI "\fIVariable of value MTEXT_FORMAT_UTF_32LE or MTEXT_FORMAT_UTF_32BE. \fP" .in -1c .SH "Detailed Description" .PP M\-text objects and API for them. In the m17n library, text is represented as an object called \fIM\-text\fP rather than as a C\-string (\fCchar *\fP or \fCunsigned char *\fP). An M\-text is a sequence of characters whose length is equals to or more than 0, and can be coined from various character sources, e.g. C\-strings, files, character codes, etc. .PP M\-texts are more useful than C\-strings in the following points. .PP .PD 0 .IP "\(bu" 2 M\-texts can handle mixture of characters of various scripts, including all Unicode characters and more. This is an indispensable facility when handling multilingual text. .PP .PD 0 .IP "\(bu" 2 Each character in an M\-text can have properties called \fItext\fP \fIproperties\fP. Text properties store various kinds of information attached to parts of an M\-text to provide application programs with a unified view of those information. As rich information can be stored in M\-texts in the form of text properties, functions in application programs can be simple. .PP In addition, the library provides many functions to manipulate an M\-text just the same way as a C\-string. .SH "Typedef Documentation" .PP .SS "typedef struct \fBMText\fP \fBMText\fP" .PP Type of \fIM\-texts\fP. The type \fBMText\fP is for an \fIM\-text\fP object. Its internal structure is concealed from application programs. .SH "Enumeration Type Documentation" .PP .SS "enum \fBMTextFormat\fP" .PP Enumeration for specifying the format of an M\-text. The enum \fBMTextFormat\fP is used as an argument of the \fBmtext_from_data()\fP function to specify the format of data from which an M\-text is created. .PP \fBEnumerator: \fP .in +1c .TP \fB\fIMTEXT_FORMAT_US_ASCII \fP\fP US\-ASCII encoding .TP \fB\fIMTEXT_FORMAT_UTF_8 \fP\fP UTF\-8 encoding .TP \fB\fIMTEXT_FORMAT_UTF_16LE \fP\fP UTF\-16LE encoding .TP \fB\fIMTEXT_FORMAT_UTF_16BE \fP\fP UTF\-16BE encoding .TP \fB\fIMTEXT_FORMAT_UTF_32LE \fP\fP UTF\-32LE encoding .TP \fB\fIMTEXT_FORMAT_UTF_32BE \fP\fP UTF\-32BE encoding .TP \fB\fIMTEXT_FORMAT_MAX \fP\fP .SS "enum \fBMTextLineBreakOption\fP" .PP Enumeration for specifying a set of line breaking option. The enum \fBMTextLineBreakOption\fP is to control the line breaking algorithm of the function \fBmtext_line_break()\fP by specifying logical\-or of the members in the arg \fIoption\fP. .PP \fBEnumerator: \fP .in +1c .TP \fB\fIMTEXT_LBO_SP_CM \fP\fP Specify the legacy support for space character as base for combining marks. See the section 8.3 of UAX#14. .TP \fB\fIMTEXT_LBO_KOREAN_SP \fP\fP Specify to use space characters for line breaking Korean text. .TP \fB\fIMTEXT_LBO_AI_AS_ID \fP\fP Specify to treat characters of ambiguous line\-breaking class as of ideographic line\-breaking class. .TP \fB\fIMTEXT_LBO_MAX \fP\fP .SH "Variable Documentation" .PP .SS "enum \fBMTextFormat\fP \fBMTEXT_FORMAT_UTF_16\fP" .PP Variable of value MTEXT_FORMAT_UTF_16LE or MTEXT_FORMAT_UTF_16BE. The global variable \fBMTEXT_FORMAT_UTF_16\fP is initialized to \fBMTEXT_FORMAT_UTF_16LE\fP on a 'Little Endian' system (storing words with the least significant byte first), and to \fBMTEXT_FORMAT_UTF_16BE\fP on a 'Big Endian' system (storing words with the most significant byte first). .PP \fBSEE ALSO\fp .RS 4 \fBmtext_from_data()\fP .RE .PP .SS "const int \fBMTEXT_FORMAT_UTF_32\fP" .PP Variable of value MTEXT_FORMAT_UTF_32LE or MTEXT_FORMAT_UTF_32BE. The global variable \fBMTEXT_FORMAT_UTF_32\fP is initialized to \fBMTEXT_FORMAT_UTF_32LE\fP on a 'Little Endian' system (storing words with the least significant byte first), and to \fBMTEXT_FORMAT_UTF_32BE\fP on a 'Big Endian' system (storing words with the most significant byte first). .PP \fBSEE ALSO\fp .RS 4 \fBmtext_from_data()\fP .RE .PP .SS "\fBMSymbol\fP \fBMlanguage\fP"The symbol whose name is 'language'. .SH "Author" .PP Generated automatically by Doxygen for The m17n Library from the source code. .SH COPYRIGHT Copyright (C) 2001 Information\-technology Promotion Agency (IPA) .br Copyright (C) 2001\-2011 National Institute of Advanced Industrial Science and Technology (AIST) .br Permission is granted to copy, distribute and/or modify this document under the terms of the GNU Free Documentation License .