.\" Man page generated from reStructuredText.
.
.
.nr rst2man-indent-level 0
.
.de1 rstReportMargin
\\$1 \\n[an-margin]
level \\n[rst2man-indent-level]
level margin: \\n[rst2man-indent\\n[rst2man-indent-level]]
-
\\n[rst2man-indent0]
\\n[rst2man-indent1]
\\n[rst2man-indent2]
..
.de1 INDENT
.\" .rstReportMargin pre:
. RS \\$1
. nr rst2man-indent\\n[rst2man-indent-level] \\n[an-margin]
. nr rst2man-indent-level +1
.\" .rstReportMargin post:
..
.de UNINDENT
. RE
.\" indent \\n[an-margin]
.\" old: \\n[rst2man-indent\\n[rst2man-indent-level]]
.nr rst2man-indent-level -1
.\" new: \\n[rst2man-indent\\n[rst2man-indent-level]]
.in \\n[rst2man-indent\\n[rst2man-indent-level]]u
..
.TH "MONGOC_AGGREGATE" "3" "May 07, 2024" "1.27.1" "libmongoc"
.sp
This document provides a number of practical examples that display the capabilities of the aggregation framework.
.sp
The \fI\%Aggregations using the Zip Codes Data Set\fP examples uses a publicly available data set of all zipcodes and populations in the United States. These data are available at: \fI\%zips.json\fP\&.
.SH REQUIREMENTS
.sp
Let\(aqs check if everything is installed.
.sp
Use the following command to load zips.json data set into mongod instance:
.INDENT 0.0
.INDENT 3.5
.sp
.EX
$ mongoimport \-\-drop \-d test \-c zipcodes zips.json
.EE
.UNINDENT
.UNINDENT
.sp
Let\(aqs use the MongoDB shell to verify that everything was imported successfully.
.INDENT 0.0
.INDENT 3.5
.sp
.EX
$ mongo test
connecting to: test
> db.zipcodes.count()
29467
> db.zipcodes.findOne()
{
      \(dq_id\(dq : \(dq35004\(dq,
      \(dqcity\(dq : \(dqACMAR\(dq,
      \(dqloc\(dq : [
              \-86.51557,
              33.584132
      ],
      \(dqpop\(dq : 6055,
      \(dqstate\(dq : \(dqAL\(dq
}
.EE
.UNINDENT
.UNINDENT
.SH AGGREGATIONS USING THE ZIP CODES DATA SET
.sp
Each document in this collection has the following form:
.INDENT 0.0
.INDENT 3.5
.sp
.EX
{
  \(dq_id\(dq : \(dq35004\(dq,
  \(dqcity\(dq : \(dqAcmar\(dq,
  \(dqstate\(dq : \(dqAL\(dq,
  \(dqpop\(dq : 6055,
  \(dqloc\(dq : [\-86.51557, 33.584132]
}
.EE
.UNINDENT
.UNINDENT
.sp
In these documents:
.INDENT 0.0
.IP \(bu 2
The \fB_id\fP field holds the zipcode as a string.
.IP \(bu 2
The \fBcity\fP field holds the city name.
.IP \(bu 2
The \fBstate\fP field holds the two letter state abbreviation.
.IP \(bu 2
The \fBpop\fP field holds the population.
.IP \(bu 2
The \fBloc\fP field holds the location as a \fB[latitude, longitude]\fP array.
.UNINDENT
.SH STATES WITH POPULATIONS OVER 10 MILLION
.sp
To get all states with a population greater than 10 million, use the following aggregation pipeline:
.sp
aggregation1.c
.INDENT 0.0
.INDENT 3.5
.sp
.EX
#include <mongoc/mongoc.h>
#include <stdio.h>

static void
print_pipeline (mongoc_collection_t *collection)
{
   mongoc_cursor_t *cursor;
   bson_error_t error;
   const bson_t *doc;
   bson_t *pipeline;
   char *str;

   pipeline = BCON_NEW (\(dqpipeline\(dq,
                        \(dq[\(dq,
                        \(dq{\(dq,
                        \(dq$group\(dq,
                        \(dq{\(dq,
                        \(dq_id\(dq,
                        \(dq$state\(dq,
                        \(dqtotal_pop\(dq,
                        \(dq{\(dq,
                        \(dq$sum\(dq,
                        \(dq$pop\(dq,
                        \(dq}\(dq,
                        \(dq}\(dq,
                        \(dq}\(dq,
                        \(dq{\(dq,
                        \(dq$match\(dq,
                        \(dq{\(dq,
                        \(dqtotal_pop\(dq,
                        \(dq{\(dq,
                        \(dq$gte\(dq,
                        BCON_INT32 (10000000),
                        \(dq}\(dq,
                        \(dq}\(dq,
                        \(dq}\(dq,
                        \(dq]\(dq);

   cursor = mongoc_collection_aggregate (collection, MONGOC_QUERY_NONE, pipeline, NULL, NULL);

   while (mongoc_cursor_next (cursor, &doc)) {
      str = bson_as_canonical_extended_json (doc, NULL);
      printf (\(dq%s\en\(dq, str);
      bson_free (str);
   }

   if (mongoc_cursor_error (cursor, &error)) {
      fprintf (stderr, \(dqCursor Failure: %s\en\(dq, error.message);
   }

   mongoc_cursor_destroy (cursor);
   bson_destroy (pipeline);
}

int
main (void)
{
   mongoc_client_t *client;
   mongoc_collection_t *collection;
   const char *uri_string = \(dqmongodb://localhost:27017/?appname=aggregation\-example\(dq;
   mongoc_uri_t *uri;
   bson_error_t error;

   mongoc_init ();

   uri = mongoc_uri_new_with_error (uri_string, &error);
   if (!uri) {
      fprintf (stderr,
               \(dqfailed to parse URI: %s\en\(dq
               \(dqerror message:       %s\en\(dq,
               uri_string,
               error.message);
      return EXIT_FAILURE;
   }

   client = mongoc_client_new_from_uri (uri);
   if (!client) {
      return EXIT_FAILURE;
   }

   mongoc_client_set_error_api (client, 2);
   collection = mongoc_client_get_collection (client, \(dqtest\(dq, \(dqzipcodes\(dq);

   print_pipeline (collection);

   mongoc_uri_destroy (uri);
   mongoc_collection_destroy (collection);
   mongoc_client_destroy (client);

   mongoc_cleanup ();

   return EXIT_SUCCESS;
}

.EE
.UNINDENT
.UNINDENT
.sp
You should see a result like the following:
.INDENT 0.0
.INDENT 3.5
.sp
.EX
{ \(dq_id\(dq : \(dqPA\(dq, \(dqtotal_pop\(dq : 11881643 }
{ \(dq_id\(dq : \(dqOH\(dq, \(dqtotal_pop\(dq : 10847115 }
{ \(dq_id\(dq : \(dqNY\(dq, \(dqtotal_pop\(dq : 17990455 }
{ \(dq_id\(dq : \(dqFL\(dq, \(dqtotal_pop\(dq : 12937284 }
{ \(dq_id\(dq : \(dqTX\(dq, \(dqtotal_pop\(dq : 16986510 }
{ \(dq_id\(dq : \(dqIL\(dq, \(dqtotal_pop\(dq : 11430472 }
{ \(dq_id\(dq : \(dqCA\(dq, \(dqtotal_pop\(dq : 29760021 }
.EE
.UNINDENT
.UNINDENT
.sp
The above aggregation pipeline is build from two pipeline operators: \fB$group\fP and \fB$match\fP\&.
.sp
The \fB$group\fP pipeline operator requires _id field where we specify grouping; remaining fields specify how to generate composite value and must use one of the group aggregation functions: \fB$addToSet\fP, \fB$first\fP, \fB$last\fP, \fB$max\fP, \fB$min\fP, \fB$avg\fP, \fB$push\fP, \fB$sum\fP\&. The \fB$match\fP pipeline operator syntax is the same as the read operation query syntax.
.sp
The \fB$group\fP process reads all documents and for each state it creates a separate document, for example:
.INDENT 0.0
.INDENT 3.5
.sp
.EX
{ \(dq_id\(dq : \(dqWA\(dq, \(dqtotal_pop\(dq : 4866692 }
.EE
.UNINDENT
.UNINDENT
.sp
The \fBtotal_pop\fP field uses the $sum aggregation function to sum the values of all pop fields in the source documents.
.sp
Documents created by \fB$group\fP are piped to the \fB$match\fP pipeline operator. It returns the documents with the value of \fBtotal_pop\fP field greater than or equal to 10 million.
.SH AVERAGE CITY POPULATION BY STATE
.sp
To get the first three states with the greatest average population per city, use the following aggregation:
.INDENT 0.0
.INDENT 3.5
.sp
.EX
pipeline = BCON_NEW (\(dqpipeline\(dq, \(dq[\(dq,
   \(dq{\(dq, \(dq$group\(dq, \(dq{\(dq, \(dq_id\(dq, \(dq{\(dq, \(dqstate\(dq, \(dq$state\(dq, \(dqcity\(dq, \(dq$city\(dq, \(dq}\(dq, \(dqpop\(dq, \(dq{\(dq, \(dq$sum\(dq, \(dq$pop\(dq, \(dq}\(dq, \(dq}\(dq, \(dq}\(dq,
   \(dq{\(dq, \(dq$group\(dq, \(dq{\(dq, \(dq_id\(dq, \(dq$_id.state\(dq, \(dqavg_city_pop\(dq, \(dq{\(dq, \(dq$avg\(dq, \(dq$pop\(dq, \(dq}\(dq, \(dq}\(dq, \(dq}\(dq,
   \(dq{\(dq, \(dq$sort\(dq, \(dq{\(dq, \(dqavg_city_pop\(dq, BCON_INT32 (\-1), \(dq}\(dq, \(dq}\(dq,
   \(dq{\(dq, \(dq$limit\(dq, BCON_INT32 (3) \(dq}\(dq,
\(dq]\(dq);
.EE
.UNINDENT
.UNINDENT
.sp
This aggregate pipeline produces:
.INDENT 0.0
.INDENT 3.5
.sp
.EX
{ \(dq_id\(dq : \(dqDC\(dq, \(dqavg_city_pop\(dq : 303450.0 }
{ \(dq_id\(dq : \(dqFL\(dq, \(dqavg_city_pop\(dq : 27942.29805615551 }
{ \(dq_id\(dq : \(dqCA\(dq, \(dqavg_city_pop\(dq : 27735.341099720412 }
.EE
.UNINDENT
.UNINDENT
.sp
The above aggregation pipeline is build from three pipeline operators: \fB$group\fP, \fB$sort\fP and \fB$limit\fP\&.
.sp
The first \fB$group\fP operator creates the following documents:
.INDENT 0.0
.INDENT 3.5
.sp
.EX
{ \(dq_id\(dq : { \(dqstate\(dq : \(dqWY\(dq, \(dqcity\(dq : \(dqSmoot\(dq }, \(dqpop\(dq : 414 }
.EE
.UNINDENT
.UNINDENT
.sp
Note, that the \fB$group\fP operator can\(aqt use nested documents except the \fB_id\fP field.
.sp
The second \fB$group\fP uses these documents to create the following documents:
.INDENT 0.0
.INDENT 3.5
.sp
.EX
{ \(dq_id\(dq : \(dqFL\(dq, \(dqavg_city_pop\(dq : 27942.29805615551 }
.EE
.UNINDENT
.UNINDENT
.sp
These documents are sorted by the \fBavg_city_pop\fP field in descending order. Finally, the \fB$limit\fP pipeline operator returns the first 3 documents from the sorted set.
.SH AUTHOR
MongoDB, Inc
.SH COPYRIGHT
2017-present, MongoDB, Inc
.\" Generated by docutils manpage writer.
.