'\" t
.\" Title: unicode::wordbreak
.\" Author: Sam Varshavchik
.\" Generator: DocBook XSL Stylesheets v1.79.1
.\" Date: 03/11/2017
.\" Manual: Courier Unicode Library
.\" Source: Courier Unicode Library
.\" Language: English
.\"
.TH "UNICODE::WORDBREAK" "3" "03/11/2017" "Courier Unicode Library" "Courier Unicode Library"
.\" -----------------------------------------------------------------
.\" * Define some portability stuff
.\" -----------------------------------------------------------------
.\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.\" http://bugs.debian.org/507673
.\" http://lists.gnu.org/archive/html/groff/2009-02/msg00013.html
.\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.ie \n(.g .ds Aq \(aq
.el .ds Aq '
.\" -----------------------------------------------------------------
.\" * set default formatting
.\" -----------------------------------------------------------------
.\" disable hyphenation
.nh
.\" disable justification (adjust text to left margin only)
.ad l
.\" -----------------------------------------------------------------
.\" * MAIN CONTENT STARTS HERE *
.\" -----------------------------------------------------------------
.SH "NAME"
unicode::wordbreak_callback_base, unicode::wordbreak_callback_base \- unicode word\-breaking rules
.SH "SYNOPSIS"
.sp
.nf
#include
class wordbreak : public unicode::wordbreak_callback_base {
public:
using unicode::wordbreak_callback_base::operator<<;
using unicode::wordbreak_callback_base::operator();
int callback(bool flag)
{
// \&.\&.\&.
}
};
char32_t c;
std::u32string buf;
wordbreak compute_wordbreak;
compute_wordbreak << c;
compute_wordbreak(buf);
compute_wordbreak(buf\&.begin(), buf\&.end());
compute_wordbreak\&.finish();
// \&.\&.\&.
unicode_wordbreakscan scan;
scan << c;
size_t nchars=scan\&.finish();
.fi
.SH "DESCRIPTION"
.PP
unicode::wordbreak_callback_base
is a C++ binding for the unicode word\-breaking rule implementation described in
\fBunicode_word_break\fR(3)\&.
.PP
Subclass
unicode::wordbreak_callback_base
and implement
callback() that\*(Aqs virtually inherited from
unicode::wordbreak_callback_base\&. The
callback() callback function receives the output values from the word\-breaking algorithm, namely a
bool
indicating whether a word break exists before the unicode character in the underlying input sequence\&.
.PP
callback() should return 0\&. A non\-zero return reports an error, that stops the word\-breaking algorithm\&. See
\fBunicode_word_break\fR(3)
for more information\&.
.PP
The input unicode characters for the word\-breaking algorithm are provided by the
<<
operator, one unicode character at a time; or by the
()
operator, passing either a container, or a beginning and an ending iterator value for an input sequence of unicode characters\&.
finish() indicates the end of the unicode character sequence\&.
.PP
unicode::wordbreakscan
is a C++ binding for the
\fBunicode_wbscan_init\fR(),
\fBunicode_wbscan_next\fR() and
\fBunicode_wbscan_end\fR
methods described in
\fBunicode_word_break\fR(3)\&. Its
<<
iterates over the unicode characters, and
finish() indicates the number of characters before the first unicode word break\&. The
<<
iterator returns a
bool
indicating when the first word break has already been found, so further calls are not necessary\&.
.SH "SEE ALSO"
.PP
\fBcourier-unicode\fR(7),
\fBunicode_word_break\fR(3)\&.
.SH "AUTHOR"
.PP
\fBSam Varshavchik\fR
.RS 4
Author
.RE