Before:
When do compare with dimension, first query 100(default size)
jobs by filters, then group these jobs by dimension. But if one kind of
job is done too many, other kinds of jobs may can not be got.
After:
Fisrt query how many kinds of dimensions in es, take dimension =
'os' as an example, the result can be
["openeuler", "archlinux", "centos", "debian", "kylin", "archLinux"]
Then query 100 jobs by each os, then group all these jobs.
Signed-off-by: Li Yuanchao <lyc163mail(a)163.com>
---
lib/compare.rb | 2 +-
lib/matrix2.rb | 3 +-
src/lib/web_backend.rb | 64 ++++++++++++++++++++++++++++--------------
3 files changed, 45 insertions(+), 24 deletions(-)
diff --git a/lib/compare.rb b/lib/compare.rb
index db264f8..da761ef 100644
--- a/lib/compare.rb
+++ b/lib/compare.rb
@@ -69,7 +69,7 @@ end
def create_groups_matrices_list(conditions, dims)
es = ESQuery.new(ES_HOST, ES_PORT)
query_results = es.multi_field_query(conditions)
- combine_group_query_data(query_results, dims)
+ combine_group_query_data(query_results['hits']['hits'], dims)
end
# -------------------------------------------------------------------------------------------
diff --git a/lib/matrix2.rb b/lib/matrix2.rb
index 2507b77..b6cfa79 100644
--- a/lib/matrix2.rb
+++ b/lib/matrix2.rb
@@ -109,9 +109,8 @@ end
# group2_key => {...}
# ...
# }
-def combine_group_query_data(query_data, dims)
+def combine_group_query_data(job_list, dims)
suites_list = []
- job_list = query_data['hits']['hits']
groups = auto_group(job_list, dims)
groups.each do |group_key, value|
if value.empty?
diff --git a/src/lib/web_backend.rb b/src/lib/web_backend.rb
index c4182a3..a680b8a 100644
--- a/src/lib/web_backend.rb
+++ b/src/lib/web_backend.rb
@@ -34,7 +34,7 @@ ALL_FIELDS = FIELDS + NOT_SHOW_FIELDS
NOT_NEED_EXIST_FIELDS = %w[error_ids upstream_repo].freeze
PREFIX_SEARCH_FIELDS = ['tbox_group'].freeze
ES_CLIENT = Elasticsearch::Client.new(url: "http://#{ES_HOST}:#{ES_PORT}")
-COMPARE_RECORDS_NUMBER = 50
+COMPARE_RECORDS_NUMBER = 100
def es_query(query)
ES_CLIENT.search index: 'jobs*', body: query
@@ -148,30 +148,21 @@ def get_es_must(params)
must
end
-def do_get_groups_matrices(query, dimension, total, size, from)
- result = es_query(query)
- matrices, suites_list = combine_group_query_data(result, dimension)
- while matrices.empty?
- from += size
- break if from > total
-
- query[:from] = from
- result = es_query(query)
- matrices, suites_list = combine_group_query_data(result, dimension)
- end
- [matrices, suites_list]
+def get_dimension_list(dimension)
+ query = { size: 0, aggs: { dims: { terms: { size: 10000, field: dimension[0] } } } }
+ buckets = es_query(query)['aggregations']['dims']['buckets']
+ dimension_list = []
+ buckets.each { |dims_agg| dimension_list << dims_agg['key'] }
+ return dimension_list
end
-def get_groups_matrices(conditions, dimension, must, size, from)
- must += build_multi_field_subquery_body(conditions)
- count_query = { query: { bool: { must: must } } }
- total = es_count(count_query)
- return {} if total < 1
-
+def query_dimension(dim_field, dim_value, must, size, from)
+ must_dim = Array.new(must)
+ must_dim << { term: { dim_field => dim_value } }
query = {
query: {
bool: {
- must: must
+ must: must_dim
}
},
size: size,
@@ -180,8 +171,39 @@ def get_groups_matrices(conditions, dimension, must, size, from)
start_time: { order: 'desc' }
}]
}
+ es_query(query)['hits']['hits']
+end
+
+def get_dimension_job_list(dimension, must, size, from)
+ dimension_list = get_dimension_list(dimension)
+ job_list = []
+ dimension_list.each do |dim|
+ job_list += query_dimension(dimension[0], dim, must, size, from)
+ end
+ job_list
+end
+
+def do_get_groups_matrices(must, dimension, total, size, from)
+ job_list = get_dimension_job_list(dimension, must, size, from)
+
+ matrices, suites_list = combine_group_query_data(job_list, dimension)
+ while matrices.empty?
+ from += size
+ break if from > total
+
+ job_list = get_dimension_job_list(dimension, must, size, from)
+ matrices, suites_list = combine_group_query_data(job_list, dimension)
+ end
+ [matrices, suites_list]
+end
+
+def get_groups_matrices(conditions, dimension, must, size, from)
+ must += build_multi_field_subquery_body(conditions)
+ count_query = { query: { bool: { must: must } } }
+ total = es_count(count_query)
+ return {} if total < 1
- do_get_groups_matrices(query, dimension, total, size, from)
+ do_get_groups_matrices(must, dimension, total, size, from)
end
def get_compare_body(params)
--
2.23.0