2

I use elasticsearch to store my biological data.

I try to make a query with filtered aggs but the returned data are not what I want. The problem come from the fact that I have for each specimens a "d_" attribute who is an array. I need to make aggs on only some elements of this array but I fail to filter them.

// I EDIT manually the data to make more easier to understand so maybe some typo errors

Example of my data :

   [    {
        "_index": "botanique",
        "_type": "specimens",
        "_id": "227CB8A3E2834AAEB50B1ECF6B672180",
        "_score": 1,
        "_source": {
            ....
            "d_": [
                {     // -------------- dont want this
                    "taxonid": "BB7C33A3126648D095BEDDABB0BD2758",
                    "scientificname": "Lastreopsis effusa",
                    "scientificnameauthorship": "(Sw.) Tindale"
                },
                {    // -------------- want this
                    "taxonid": "704FC303D7F74C02912D0FEB5C6FC55D",
                    "scientificname": "Parapolystichum effusum",
                    "scientificnameauthorship": "(sw.) copel."
                }
            ]
        }
    } , {
        "_index": "botanique",
        "_type": "specimens",
        "_id": "11A22DE8E4AD45BBAC7783E508079DCD",
        "_score": 1,
        "_source": {
            ....
            "d_": [
                {     // -------------- want this
                    "taxonid": "A94D243348DF4CAD926B6C3965D948A3",
                    "scientificname": "Parapolystichum effusum",
                    "scientificnameauthorship": "(Sw.) Ching",
                }                   ,
                {    // -------------- dont want this
                    "taxonid": "B01A89AA961A46F2984722C311DC2BDD",
                    "scientificname": "Lastreopsis effusa",
                    "scientificnameauthorship": "(willd. ex schkuhr) proctor"
                }
            ]
        }
    },{
        "_index": "botanique",
        "_type": "specimens",
        "_id": "1647F5E23D304EFAAB9D3E3BE80FD3CE",
        "_score": 1,
        "_source": {
            ...
            "d_": [
                {    // -------------- want this
                    "taxonid": "D70C4478D2B0437AA940994E98D696C5",
                    "scientificname": "Parapolystichum effusum",
                    "scientificnameauthorship": "(Sw.) Ching"
                } ,
                {    // -------------- dont want this
                    "taxonid": "011E5DA526FC4098953DBD1F9E5F4424",
                    "scientificname": "Lastreopsis effusa",
                    "scientificnameauthorship": "(Sw.) Tindale",
                }
            ]
        }
    }
]

For example I want an aggs on all the "d_.scientificnameauthorship" and "d_.taxonid" where "d_.scientificname" equal "parapolystichum effusum". So i should (hope) get for "scientificnameauthorship" : "(sw.) copel." , "(Sw.) Ching" but not "(willd. ex schkuhr) proctor". I FAIL doing this...

My query :

{
  "_source": ["d_" ],
  "size": 3,
  "query": {
    "filtered": {"filter": {"bool": {"must": [{"term": {
                "d_.scientificname": "parapolystichum effusum"
    }}] } }}
  },
  "aggs": {
    "scientificname": {
      "terms": {
        "field": "d_.scientificname",
        "size": 1,
        "include": {
          "pattern": "parapolystichum effusum",
          "flags": "CANON_EQ|CASE_INSENSITIVE"
        }
      },
      "aggs": {
        "scientificnameauthorship": {
          "terms": {
            "field": "d_.scientificnameauthorship",
            "size": 10
          }
        }
      }
    }
  }
}

The returned data include all the "scientificnameauthorship" of the specimens

{
    "aggregations": {
        "scientificname": {
            "buckets": [{
                "key": "parapolystichum effusum",
                "doc_count": 269,
                "scientificnameauthorship": {
                    "buckets": [
                        {   // ------ want this 
                            "key": "(sw.) ching",
                            "doc_count": 269
                        }                        ,
                        {   // ------ want this 
                            "key": "(sw.) copel.",
                            "doc_count": 34
                        }                        , 
                        {   // ------ dont want this 
                            "key": "(sw.) tindale",
                            "doc_count": 262
                        }                        ,
                        {   // ------ dont want this 
                            "key": "(willd. ex schkuhr) proctor",
                            "doc_count": 7
                        }                        ,
                        {   // ------ dont want this 
                            "key": "fée",
                            "doc_count": 2
                        }
                    ]
                }
            }]
        }
    }
}
  1. how to edit in the aggs query ?
  2. how to get only on item of the array in hits ?

Get this :

{   
    "hits": {
        "total": 269,
        "max_score": 1,
        "hits": [
            {
                "_index": "botanique",
                "_type": "specimens",
                "_id": "1647F5E23D304EFAAB9D3E3BE80FD3CE",
                "_score": 1,
                "_source": {
                    ...
                    "d_": [{    // -------------- want this
                            "taxonid": "D70C4478D2B0437AA940994E98D696C5",
                            "scientificname": "Parapolystichum effusum",
                            "scientificnameauthorship": "(Sw.) Ching"
                        }]
                }                       
            }
    }
}

Instead of this :

{   
    "hits": {
        "total": 269,
        "max_score": 1,
        "hits": [
            {
                "_index": "botanique",
                "_type": "specimens",
                "_id": "1647F5E23D304EFAAB9D3E3BE80FD3CE",
                "_score": 1,
                "_source": {
                    ...
                    "d_": [
                        {    // -------------- want this
                            "taxonid": "D70C4478D2B0437AA940994E98D696C5",
                            "scientificname": "Parapolystichum effusum",
                            "scientificnameauthorship": "(Sw.) Ching"
                        } ,
                        {    // -------------- dont want this
                            "taxonid": "011E5DA526FC4098953DBD1F9E5F4424",
                            "scientificname": "Lastreopsis effusa",
                            "scientificnameauthorship": "(Sw.) Tindale",
                        }
                    ]
                }
            }
    }
}

thank you very much

// EDIT 1

I also try to put a filter in the aggs like this but don't work :

{
    "query": {
        "filtered": {"filter": {"bool": {"must": [{"term": {
                    "d_.scientificname": "parapolystichum effusum"
        }}] } }}
    },
    "aggs" : {
        "scientificname" : {
            "filter" : {"term": {
                    "d_.scientificname": "parapolystichum effusum"
            }},
            "aggs": {
                "scientificnameauthorship": {
                  "terms": {
                    "field": "d_.scientificnameauthorship",
                    "size": 10
                  }
                }
              }
        }
    }
}

2 Answers 2

0

You can use nested aggs as parent aggregator.Then inside the parent aggregator make a new filter aggregator to filter the array(list data) and append with another child aggregator for term aggregations. https://www.elastic.co/guide/en/elasticsearch/reference/1.4/search-aggregations-bucket-nested-aggregation.html Sample Query

"filteredaggs" : {
          "nested" : {
            "path" : "D_"
          },
          "aggs" : {
            "maxdays" : {
              "filter" : {
                "terms" : {
                  "scientificname" : ["xyz", "pqr"]
                }
              },
              "aggs" : {
                "myfinalaggregator" : {
                  "terms" : {
                    "field" : "scientificnameauthorship"
                  }
                }
              }
            }
          }
        }

Hope this will work for you.

Sign up to request clarification or add additional context in comments.

3 Comments

thanks for help. i'm trying with your example but i have an error : AggregationExecutionException[[nested] nested path [d_] is not nested. Did you have an idea ,
I see that the mapping need to be ' "type" : "nested" '. I don't put this. I will update the mapping ( it take fews days because i can't edit the elastic server )
oh,,,,yeah missed to include, yes mappings should be of type nested.
0

Finally found the answer, thanks to user3775217 for pointing the "nested" :

{
    "_source" : false,
    "size" : 0,
    "query" : {
        "filtered" : {
            "filter" : {
                "bool" : {
                    "must" : [{
                            "nested" : {
                                "path" : "d_",
                                "query" : {
                                    "bool" : {
                                        "must" : [{
                                                "wildcard" : {
                                                    "d_.scientificname" : {
                                                        "value" : "parapolystichum effusum*"
                                                    }
                                                }
                                            }
                                        ]
                                    }
                                }
                            }
                        }
                    ]
                }
            }
        }
    },
    "aggs" : {
        "general" : {
            "nested" : {"path" : "d_"},
            "aggs" : {
                "scientificname" : {
                    "terms" : {
                        "field" : "d_.scientificname",
                        "size" : 20,
                        "include" : {
                            "pattern" : "parapolystichum effusum*",
                            "flags" : "CANON_EQ|CASE_INSENSITIVE"
                        }
                    },
                    "aggs" : {
                        "scientificnameauthorship" : {
                            "terms" : {
                                "field" : "d_.scientificnameauthorship",
                                "size" : 10
                            }
                        }
                    }
                }
            }
        }
    }
}

Any suggestion are welcome specially a shorter working answer.

Comments

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.