[why] get multi-field aggregation count for overview of report page.
[example] es = ESQuery.new query_items = { 'os' => 'openeuler' } field1 = 'suite' field2 = 'job_state' es.query_fields(field1, field2, query_items) [input]: field1, field2, query_items (optional for query_items, default no scope limitation) [output]: {"build-pkg"=> {"failed"=>3804, "finished"=>800, "incomplete"=>196, "submit"=>136, "OOM"=>11}, "cci-depends"=>{"finished"=>580, "failed"=>218, "incomplete"=>1}, "iperf"=>{"finished"=>136, "failed"=>57}, "spinlock"=>{"finished"=>52, "failed"=>1}, ... }
Signed-off-by: Lu Kaiyi 2392863668@qq.com --- lib/es_query.rb | 59 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 59 insertions(+)
diff --git a/lib/es_query.rb b/lib/es_query.rb index 88a420f..d881f6b 100644 --- a/lib/es_query.rb +++ b/lib/es_query.rb @@ -87,6 +87,52 @@ class ESQuery
result end + + # select field1, field2, count(*) from index where query_items group by field1, field2 + # example: + # es = ESQuery.new + # query_items = { + # 'os' => 'openeuler' + # } + # field1 = 'suite' + # field2 = 'job_state' + # es.query_fields(field1, field2, query_items) + # input: + # field1, field2, query_items + # (optional for query_items, default no scope limitation) + # output: + # {"build-pkg"=> + # {"failed"=>3804, "finished"=>800, "incomplete"=>196, "submit"=>136, "OOM"=>11}, + # "cci-depends"=>{"finished"=>580, "failed"=>218, "incomplete"=>1}, + # "iperf"=>{"finished"=>136, "failed"=>57}, + # "spinlock"=>{"finished"=>52, "failed"=>1}, + # ... + # } + def query_fields(field1, field2, query_items = {}) + query = { + query: { + bool: { + must: build_multi_field_subquery_body(query_items) + } + }, + aggs: { + "all_#{field1}" => { + terms: { field: field1, size: 1000 }, + aggs: { + "all_#{field2}" => { + terms: { field: field2, size: 1000 } + } + } + } + }, + size: 0 + } + es_result = @client.search(index: @index + '*', body: query)['aggregations']["all_#{field1}"]['buckets'] + es_result.sort_by! { |h| h['doc_count'] } + es_result.reverse! + + parse_fields(es_result, field2) + end end
# Range Query Example: @@ -146,3 +192,16 @@ def assign_desc_body(keyword) }] } end + +def parse_fields(es_result, field) + result_hash = {} + es_result.each do |result| + key = result['key'] + all_field = result["all_#{field}"]['buckets'] + field_count = {} + all_field.map { |item| field_count[item['key']] = item['doc_count'] } + result_hash[key] = field_count unless field_count.empty? + end + + result_hash +end
On Tue, Dec 29, 2020 at 03:23:16PM +0800, Lu Kaiyi wrote:
[why] get multi-field aggregation count for overview of report page.
[example] es = ESQuery.new query_items = { 'os' => 'openeuler' } field1 = 'suite' field2 = 'job_state' es.query_fields(field1, field2, query_items) [input]: field1, field2, query_items (optional for query_items, default no scope limitation) [output]: {"build-pkg"=> {"failed"=>3804, "finished"=>800, "incomplete"=>196, "submit"=>136, "OOM"=>11}, "cci-depends"=>{"finished"=>580, "failed"=>218, "incomplete"=>1}, "iperf"=>{"finished"=>136, "failed"=>57}, "spinlock"=>{"finished"=>52, "failed"=>1}, ... }
Signed-off-by: Lu Kaiyi 2392863668@qq.com
lib/es_query.rb | 59 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 59 insertions(+)
diff --git a/lib/es_query.rb b/lib/es_query.rb index 88a420f..d881f6b 100644 --- a/lib/es_query.rb +++ b/lib/es_query.rb @@ -87,6 +87,52 @@ class ESQuery
result
end
- # select field1, field2, count(*) from index where query_items group by field1, field2
- # example:
- # es = ESQuery.new
- # query_items = {
- # 'os' => 'openeuler'
- # }
- # field1 = 'suite'
- # field2 = 'job_state'
- # es.query_fields(field1, field2, query_items)
# es = ESQuery.new # query_items = { # 'os' => 'openeuler' # } # field1 = 'suite' # field2 = 'job_state' # es.query_fields(field1, field2, query_items)
need write the basic knowledge of ruby-lang in comment? delete them?
- # input:
- # input:
- # field1, field2, query_items
- # (optional for query_items, default no scope limitation)
- # output:
- # {"build-pkg"=>
- # {"failed"=>3804, "finished"=>800, "incomplete"=>196, "submit"=>136, "OOM"=>11},
- # "cci-depends"=>{"finished"=>580, "failed"=>218, "incomplete"=>1},
- # "iperf"=>{"finished"=>136, "failed"=>57},
- # "spinlock"=>{"finished"=>52, "failed"=>1},
- # ...
- # }
- def query_fields(field1, field2, query_items = {})
- query = {
query: {
bool: {
must: build_multi_field_subquery_body(query_items)
}
},
aggs: {
"all_#{field1}" => {
terms: { field: field1, size: 1000 },
aggs: {
"all_#{field2}" => {
terms: { field: field2, size: 1000 }
}
}
}
},
if need 3-levels aggs? how about put aggs as a input-param, which build aggs by caller
size: 0
- }
- es_result = @client.search(index: @index + '*', body: query)['aggregations']["all_#{field1}"]['buckets']
- es_result.sort_by! { |h| h['doc_count'] }
- es_result.reverse!
why sort and reverse at here, if caller needn't do this operations?
- parse_fields(es_result, field2)
do we need parse result in such low level lib?
Thanks, Weitao
- end
end
# Range Query Example: @@ -146,3 +192,16 @@ def assign_desc_body(keyword) }] } end
+def parse_fields(es_result, field)
- result_hash = {}
- es_result.each do |result|
- key = result['key']
- all_field = result["all_#{field}"]['buckets']
- field_count = {}
- all_field.map { |item| field_count[item['key']] = item['doc_count'] }
- result_hash[key] = field_count unless field_count.empty?
- end
- result_hash
+end
2.23.0