Source code for src.data._utils
import pandas as pd
[docs]def query_es(client, body=None, index_query='*', scroll='2s'):
"""
Queries an Elastic Search database to get all the results of a query.
Parameters
----------
client : elasticsearch.Elasticsearch
body : dict or NoneType, default=None
The search definition using the Query DSL.
index_query : str, default='*'
Search over indices matching passed index query.
scroll : str, default='2s'
Length of time to keep search context.
Returns
-------
df : DataFrame
Pandas DataFrame with data matching the passed query.
"""
data = []
# iterate over the list of Elasticsearch indices
indices = client.indices.get_alias(index_query)
for index in indices:
# make a search() request to get all docs in the index
resp = client.search(
index=index,
body=body,
scroll=scroll # length of time to keep search context
)
data.extend(resp['hits']['hits'])
# keep track of pass scroll _id
old_scroll_id = resp['_scroll_id']
# use a 'while' iterator to loop over document 'hits'
while len(resp['hits']['hits']):
# make a request using the Scroll API
resp = client.scroll(
scroll_id=old_scroll_id,
scroll=scroll
)
data.extend(resp['hits']['hits'])
# keep track of pass scroll _id
old_scroll_id = resp['_scroll_id']
return pd.DataFrame([i['_source'] for i in data])