'\" t .\" Title: unicode_convert .\" Author: Sam Varshavchik .\" Generator: DocBook XSL Stylesheets vsnapshot .\" Date: 11/25/2020 .\" Manual: Courier Unicode Library .\" Source: Courier Unicode Library .\" Language: English .\" .TH "UNICODE_CONVERT" "3" "11/25/2020" "Courier Unicode Library" "Courier Unicode Library" .\" ----------------------------------------------------------------- .\" * Define some portability stuff .\" ----------------------------------------------------------------- .\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .\" http://bugs.debian.org/507673 .\" http://lists.gnu.org/archive/html/groff/2009-02/msg00013.html .\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .ie \n(.g .ds Aq \(aq .el .ds Aq ' .\" ----------------------------------------------------------------- .\" * set default formatting .\" ----------------------------------------------------------------- .\" disable hyphenation .nh .\" disable justification (adjust text to left margin only) .ad l .\" ----------------------------------------------------------------- .\" * MAIN CONTENT STARTS HERE * .\" ----------------------------------------------------------------- .SH "NAME" unicode_u_ucs4_native, unicode_u_ucs2_native, unicode_convert_init, unicode_convert, unicode_convert_deinit, unicode_convert_tocbuf_init, unicode_convert_tou_init, unicode_convert_fromu_init, unicode_convert_uc, unicode_convert_tocbuf_toutf8_init, unicode_convert_tocbuf_fromutf8_init, unicode_convert_toutf8, unicode_convert_fromutf8, unicode_convert_tobuf, unicode_convert_tou_tobuf, unicode_convert_fromu_tobuf \- unicode character set conversion .SH "SYNOPSIS" .sp .ft B .nf #include extern const char unicode_u_ucs4_native[]; extern const char unicode_u_ucs2_native[]; .fi .ft .HP \w'unicode_convert_handle_t\ unicode_convert_init('u .BI "unicode_convert_handle_t unicode_convert_init(const\ char\ *" "src_chset" ", const\ char\ *" "dst_chset" ", void\ *" "cb_arg" ");" .HP \w'int\ unicode_convert('u .BI "int unicode_convert(unicode_convert_handle_t\ " "handle" ", const\ char\ *" "text" ", size_t\ " "cnt" ");" .HP \w'int\ unicode_convert_deinit('u .BI "int unicode_convert_deinit(unicode_convert_handle_t\ " "handle" ", int\ *" "errptr" ");" .HP 8 .BI "unicode_convert_handle_t unicode_convert_tocbuf_init( const\ char\ *" "src_chset" ", const\ char\ *" "dst_chset" ", char\ **" "cbufptr_ret" ", size_t\ *" "cbufsize_ret" ", int\ " "nullterminate" ");" .HP 8 .BI "unicode_convert_handle_t unicode_convert_tocbuf_toutf8_init( const\ char\ *" "src_chset" ", char\ **" "cbufptr_ret" ", size_t\ *" "cbufsize_ret" ", int\ " "nullterminate" ");" .HP 8 .BI "unicode_convert_handle_t unicode_convert_tocbuf_fromutf8_init( const\ char\ *" "dst_chset" ", char\ **" "cbufptr_ret" ", size_t\ *" "cbufsize_ret" ", int\ " "nullterminate" ");" .HP 8 .BI "unicode_convert_handle_t unicode_convert_tou_init( const\ char\ *" "src_chset" ", char32_t\ **" "ucptr_ret" ", size_t\ *" "ucsize_ret" ", int\ " "nullterminate" ");" .HP 8 .BI "unicode_convert_handle_t unicode_convert_fromu_init( const\ char\ *" "dst_chset" ", char\ **" "cbufptr_ret" ", size_t\ *" "cbufsize_ret" ", int\ " "nullterminate" ");" .HP \w'int\ unicode_convert_uc('u .BI "int unicode_convert_uc(unicode_convert_handle_t\ " "handle" ", const\ char32_t\ *" "text" ", size_t\ " "cnt" ");" .HP \w'char\ *unicode_convert_toutf8('u .BI "char *unicode_convert_toutf8(const\ char\ *" "text" ", const\ char\ *" "charset" ", int\ *" "error" ");" .HP \w'char\ *unicode_convert_fromutf8('u .BI "char *unicode_convert_fromutf8(const\ char\ *" "text" ", const\ char\ *" "charset" ", int\ *" "error" ");" .HP \w'char\ *unicode_convert_tobuf('u .BI "char *unicode_convert_tobuf(const\ char\ *" "text" ", const\ char\ *" "charset" ", const\ char\ *" "dstcharset" ", int\ *" "error" ");" .HP \w'int\ unicode_convert_toubuf('u .BI "int unicode_convert_toubuf(const\ char\ *" "text" ", size_t\ " "text_l" ", const\ char\ *" "charset" ", char32_t\ **" "uc" ", size_t\ *" "ucsize" ", int\ *" "error" ");" .HP \w'int\ unicode_convert_fromu_tobuf('u .BI "int unicode_convert_fromu_tobuf(const\ char32_t\ *" "utext" ", size_t\ " "utext_l" ", const\ char\ *" "charset" ", char\ **" "c" ", size_t\ *" "csize" ", int\ *" "error" ");" .SH "DESCRIPTION" .PP \fIunicode_u_ucs4_native\fR[] contains the string \(lqUCS\-4BE\(rq or \(lqUCS\-4LE\(rq, matching the native char32_t endianness\&. .PP \fIunicode_u_ucs2_native\fR[] contains the string \(lqUCS\-2BE\(rq or \(lqUCS\-2LE\(rq, matching the native char32_t endianness\&. .PP \fBunicode_convert_init\fR(), \fBunicode_convert\fR(), and \fBunicode_convert_deinit\fR() are an adaption of th \m[blue]\fB\fBiconv\fR(3)\fR\m[]\&\s-2\u[1]\d\s+2 API that uses the same calling convention as the other algorithms in this unicode library, with some value\-added features\&. These functions use \fBiconv\fR(3) to effect the actual character set conversion\&. .PP \fBunicode_convert_init\fR() returns a non\-NULL handle for the requested conversion, or NULL if the requested conversion is not available\&. \fBunicode_convert_init\fR() takes a pointer to the output function that receives receives converted character text\&. The output function receives a pointer to the converted character text, and the number of characters in the converted text\&. The output function gets repeatedly called, until it receives the entire converted text\&. .PP The character text to convert gets passed, repeatedly, to \fBunicode_convert\fR()\&. Each call to \fBunicode_convert\fR() results in the output function getting invoked, zero or more times, with each successive part of the converted text\&. Finally, \fBunicode_convert_deinit\fR() stops the conversion and deallocates the conversion handle\&. .PP It\*(Aqs possible that a call to \fBunicode_convert_deinit\fR() results in some additional calls to the output function, passing the remaining, final parts, of the converted text, before \fBunicode_convert_deinit\fR() deallocates the handle, and returns\&. .PP The output function should return 0 normally\&. A non\-0 return indicates n error condition\&. \fBunicode_convert_deinit\fR() returns non\-zero if any previous invocation of the output function returned non\-zero (this includes any invocations of the output function resulting from this call, or prior \fBunicode_convert\fR() calls), or 0 if all invocations of the output function returned 0\&. .PP If the \fIerrptr\fR is not NULL, *\fIerrptr\fR gets set to non\-zero if there were any conversion errors \-\- if there was any text that could not be converted to the destination character text\&. .PP \fBunicode_convert\fR() also returns non\-zero if it calls the output function and it returns non\-zero, however the conversion handle remains allocated, so \fBunicode_convert_deinit\fR() must still be called, to clean that up\&. .SS "Collecting converted text into a buffer" .PP Call \fBunicode_convert_tocbuf_init\fR() instead of \fBunicode_convert_init\fR(), then call \fBunicode_convert\fR() and \fBunicode_convert_deinit\fR() normally\&. The parameters to \fBunicode_convert_init\fR() specify the source and the destination character sets\&. \fBunicode_convert_tocbuf_toutf8_init\fR() is just an alias that specifies UTF\-8 as the destination character set\&. \fBunicode_convert_tocbuf_fromutf8_init\fR() is just an alias that specifies UTF\-8 as the source character st\&. .PP These functions supply an output function that collects the converted text into a malloc()ed buffer\&. If \fBunicode_convert_deinit\fR() returns 0, *\fIcbufptr_ret\fR gets initialized to a malloc()ed buffer, and the number of converted characters, the size of the malloc()ed buffer, get placed into *\fIcbufsize_ret\fR\&. .if n \{\ .sp .\} .RS 4 .it 1 an-trap .nr an-no-space-flag 1 .nr an-break-flag 1 .br .ps +1 \fBNote\fR .ps -1 .br .PP If the converted string is an empty string, *\fIcbufsize_ret\fR gets set to 0, but *\fIcbufptr_ret\fR still gets initialized (to a dummy malloced buffer)\&. .sp .5v .RE .PP A non\-zero \fInullterminate\fR places a trailing \e0 character after the converted string (this is included in *\fIcbufsize_ret\fR)\&. .SS "Converting between character sets and unicode" .PP \fBunicode_convert_tou_init\fR() converts character text into a char32_t buffer\&. It works just like \fBunicode_convert_tocbuf_init\fR(), except that only the source character set gets specified and the output buffer is a char32_t buffer\&. \fInullterminate\fR terminates the converted unicode characters with a U+0000\&. .PP \fBunicode_convert_fromu_init\fR() converts char32_ts to the output character set, and also works like \fBunicode_convert_tocbuf_init\fR()\&. Additionally, in this case, \fBunicode_convert_uc\fR() works just like \fBunicode_convert\fR() except that the input sequence is a char32_t sequence, and the count parameter is th enumber of unicode characters\&. .SS "One\-shot conversions" .PP \fBunicode_convert_toutf8\fR() converts the specified text in the specified text into a UTF\-8 string, returning a malloced buffer\&. If \fIerror\fR is not NULL, even if \fBunicode_convert_toutf8\fR() returns a non NULL value *\fIerror\fR gets set to a non\-zero value if a character conversion error has occurred, and some characters could not be converted\&. .PP \fBunicode_convert_fromutf8\fR() does a similar conversion from UTF\-8 \fItext\fR to the specified character set\&. .PP \fBunicode_convert_tobuf\fR() does a similar conversion between two different character sets\&. .PP \fBunicode_convert_tou_tobuf\fR() calls \fBunicode_convert_tou_init\fR(), feeds the character string through \fBunicode_convert\fR(), then calls \fBunicode_convert_deinit\fR()\&. If this function returns 0, *\fIuc\fR and *\fIucsize\fR are set to a malloced buffer+size holding the unicode char array\&. .PP \fBunicode_convert_fromu_tobuf\fR() calls \fBunicode_convert_fromu_init\fR(), feeds the unicode array through \fBunicode_convert_uc\fR(), then calls unicode_convert_deinit()\&. If this function returns 0, *\fIc\fR and *\fIcsize\fR are set to a malloced buffer+size holding the char array\&. .SH "SEE ALSO" .PP \fBcourier-unicode\fR(7), \fBunicode_convert_tocase\fR(3), \fBunicode_default_chset\fR(3)\&. .SH "AUTHOR" .PP \fBSam Varshavchik\fR .RS 4 Author .RE .SH "NOTES" .IP " 1." 4 \fBiconv\fR(3) .RS 4 \%http://manpages.courier-mta.org/htmlman3/iconv.3.html .RE