2

I have an issue with nested aggregation and filter, basically without filter it returns sum for global scope but with nested doc_count is OK but sum is always 0, here is query I am trying to run:

{
    "query": {
        "nested": { 
          "path": "skills.tree",
          "query": {
            "bool" : {
              "must" : [
                {"match": {"leaf0": "Management"}},
                {"match": {"leaf1": "Financial"}}
              ]
            }
          }
        }
      },
    "aggs": {
        "by_org": {
              "terms": { 
                "field":    "org"
              },
              "aggs": {
                "sum_weight0-filtered": {
                  "filter": {
                    "nested": {
                      "path": "skills.tree",
                      "query": {
                        "bool" : {
                          "must" : [
                            {"match": {"leaf0": "Management"}},
                            {"match": {"leaf1": "Financial"}}
                          ]
                        }
                      }
                    }
                  },
                  "aggs":{
                    "sum0":{
                      "sum": { 
                        "field": "skills.tree.weight0"
                      }
                    },
                    "sum1":{
                      "sum": { 
                        "field": "skills.tree.weight1"
                      }
                    }
                  }
                }
              }
        }
    }
}

and below is a sample output:

{
   "took": 978,
   "timed_out": false,
   "_shards": {
      "total": 50,
      "successful": 50,
      "failed": 0
   },
   "hits": {
      "total": 11337,
      "max_score": 0,
      "hits": []
   },
   "aggregations": {
      "by_org": {
         "buckets": [
            {
               "key": "Aetna",
               "doc_count": 1888,
               "sum_weight0-filtered": {
                  "doc_count": 1888,
                  "sum0": {
                     "value": 0
                  },
                  "sum1": {
                     "value": 0
                  }
               }
            },
            {
               "key": "AECOM",
               "doc_count": 1085,
               "sum_weight0-filtered": {
                  "doc_count": 1085,
                  "sum0": {
                     "value": 0
                  },
                  "sum1": {
                     "value": 0
                  }
               }
            }
....

and here is part schema:

'skills'        => array(
                                'properties'    => array(
                                    'tree'  => array(
                                        'type'  => 'nested',
                                        'properties'    => array(
                                            'leaf0' => array(
                                                "type"      => "multi_field",
                                                "fields"    => array(
                                                    "leaf0"=> array(
                                                        "type"  => "string",
                                                        "index" => "not_analyzed"
                                                    ),
                                                    "search"        => array(
                                                        "type"  => "string",
                                                        "index" => "analyzed"
                                                    )
                                                )
                                            ),
                                            'leaf1' => array(
                                                "type"      => "multi_field",
                                                "fields"    => array(
                                                    "leaf1"=> array(
                                                        "type"  => "string",
                                                        "index" => "not_analyzed"
                                                    ),
                                                    "search"        => array(
                                                        "type"  => "string",
                                                        "index" => "analyzed"
                                                    )
                                                )
                                            ),
                                            'leaf2' => array(
                                                "type"      => "multi_field",
                                                "fields"    => array(
                                                    "leaf2"=> array(
                                                        "type"  => "string",
                                                        "index" => "not_analyzed"
                                                    ),
                                                    "search"        => array(
                                                        "type"  => "string",
                                                        "index" => "analyzed"
                                                    )
                                                )
                                            ),
                                            'leaf3' => array(
                                                "type"      => "multi_field",
                                                "fields"    => array(
                                                    "leaf3"=> array(
                                                        "type"  => "string",
                                                        "index" => "not_analyzed"
                                                    ),
                                                    "search"        => array(
                                                        "type"  => "string",
                                                        "index" => "analyzed"
                                                    )
                                                )
                                            ),
                                            'leaf4' => array(
                                                "type"      => "multi_field",
                                                "fields"    => array(
                                                    "leaf4"=> array(
                                                        "type"  => "string",
                                                        "index" => "not_analyzed"
                                                    ),
                                                    "search"        => array(
                                                        "type"  => "string",
                                                        "index" => "analyzed"
                                                    )
                                                )
                                            ),
                                            'leaf5' => array(
                                                "type"      => "multi_field",
                                                "fields"    => array(
                                                    "leaf5"=> array(
                                                        "type"  => "string",
                                                        "index" => "not_analyzed"
                                                    ),
                                                    "search"        => array(
                                                        "type"  => "string",
                                                        "index" => "analyzed"
                                                    )
                                                )
                                            ),
                                            'weight1' => array(
                                                'type'      => 'integer',
                                            ),
                                            'weight2' => array(
                                                'type'      => 'integer',
                                            ),
                                            'weight3' => array(
                                                'type'      => 'integer',
                                            ),
                                            'weight4' => array(
                                                'type'      => 'integer',
                                            ),
                                            'weight5' => array(
                                                'type'      => 'integer',
                                            )
                                        )
                                    )

The problem is in regards to sum0 and sum1 they all return 0 despite values being in there (it works on higher scope (no filter)). What am I doing wrong here?

2 Answers 2

1

The nested filter that you have applied , applies only to the condition and not where the aggregation will look for the values in subsequent aggregations. This means that the sum value is present in the nested object and not on the parent document and hence you got 0. Now if you use nested aggregation to ask ES to do aggregation on nested objects , it should work -

{
  "query": {
    "nested": {
      "path": "skills.tree",
      "query": {
        "bool": {
          "must": [
            {
              "match": {
                "leaf0": "Management"
              }
            },
            {
              "match": {
                "leaf1": "Financial"
              }
            }
          ]
        }
      }
    }
  },
  "aggs": {
    "by_org": {
      "terms": {
        "field": "org"
      },
      "aggs": {
        "sum_weight0-filtered": {
          "filter": {
            "nested": {
              "path": "skills.tree",
              "query": {
                "bool": {
                  "must": [
                    {
                      "match": {
                        "leaf0": "Management"
                      }
                    },
                    {
                      "match": {
                        "leaf1": "Financial"
                      }
                    }
                  ]
                }
              }
            }
          },
          "aggs": {
            "nestedAgg": {
              "nested": {
                "path": "skills.tree"
              },
              "aggs": {
                "sum0": {
                  "sum": {
                    "field": "skills.tree.weight0"
                  }
                },
                "sum1": {
                  "sum": {
                    "field": "skills.tree.weight1"
                  }
                }
              }
            }
          }
        }
      }
    }
  }
}
Sign up to request clarification or add additional context in comments.

4 Comments

Thanks but for starters you have syntax errors, nestedAgg should have 1 more level called "nested" which then wraps all beneath saying that it still doesn't work, I mean it sums up all numbers but from global scope, so it ignores "field": "org" scope and rather than summing up within each org it goes outside which is my original issue.
pls try now. made changes
Still not right, basically aggregation runs against full index rather than buckets: "aggregations": { "by_org": { "buckets": [ { "key": "Company", "doc_count": 3171, "sum_weight0-filtered": { "doc_count": 3171, "nestedAgg": { "doc_count": 60117, "sum0": { "value": 343885 }, "sum1": { "value": 93162 } }
Here is a screen shot so you can see what I mean by full index - postimg.org/image/4006mqtu9 doc_count under tree is not right
1

The issue may simply be how you are accessing the nested fields, specifically that you have to direct those match statements against the search subfields of leaf0 and leaf1 - based upon your mapping definition, the subfields are ones that are actually analyzed. With that in mind, try the following:

{
    "query": {
        "nested": {
          "path": "skills.tree",
          "query": {
            "bool" : {
              "must" : [
                {"match": {"tree.leaf0.search": "Management"}},
                {"match": {"tree.leaf1.search": "Financial"}}
              ]
            } 
          }
        }
      },  
    "aggs": {
        "by_org": {
              "terms": {
                "field":    "org"
              },
              "aggs": {
                "sum_weight0-filtered": {
                  "filter": {
                    "nested": {
                      "path": "skills.tree",
                      "query": {
                        "bool" : {
                          "must" : [
                            {"match": {"tree.leaf0.search": "Management"}},
                            {"match": {"tree.leaf1.search": "Financial"}}
                          ]
                        } 
                      }
                    }
                  },  
                  "aggs":{
                    "tree" : {
                      "nested" : {"path" : "skills.tree"},
                      "aggs" : {
                        "sum0" : {
                          "sum": {
                            "field": "tree.weight0"
                          }
                        },
                        "sum1": {
                          "sum": {
                            "field": "tree.weight1"
                          }
                        }
                      }
                    }
                  }
                }
              }
            }
    }
}

I got this to work with a tiny contrived test set of data - it may be worth noting that I directed the query against the index at large and not against a particular document type (since in your originally posted query, the nested paths appear to be "fully" qualified).

2 Comments

thanks, but same issue as with another answer, the tree aggregation doesn't respect filter and runs against full index rather than just within buckets
Here is a screen shot so you can see what I mean by full index - postimg.org/image/4006mqtu9 doc_count under tree is not right

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.