aparo / pyes

Python connector for ElasticSearch - the pythonic way to use ElasticSearch
BSD 3-Clause "New" or "Revised" License
607 stars 270 forks source link

ES5 returning data in scroll for the first data response along with scroll_id #518

Open spendyala opened 6 years ago

spendyala commented 6 years ago
 def _do_search(self, auto_increment=False):
        self.iterpos = 0
        process_post_query = True  #used to skip results in first scan
        if self.scroller_id is None:
            if auto_increment:
                self.start += self.chuck_size

            self._results = self._search_raw(self.start, self.chuck_size)

            do_scan = self.query_params.pop("search_type", None) == "scan"
            if do_scan:
                self.scroller_parameters['search_type'] = "scan"
                if 'scroll' in self.query_params:
                    self.scroller_parameters['scroll'] = self.query_params.pop('scroll')
                if 'size' in self.query_params:
                    self.chuck_size = self.scroller_parameters['size'] = self.query_params.pop('size')

            if '_scroll_id' in self._results:
                #scan query, let's load the first bulk of data
                self.scroller_id = self._results['_scroll_id']
                # self._do_search()
                process_post_query = True
        else:
            try:
                self._results = self.connection.search_scroll(self.scroller_id,
                                                              self.scroller_parameters.get("scroll", "10m"))
                self.scroller_id = self._results['_scroll_id']
            except ReduceSearchPhaseException:
                #bad hack, should be not hits on the last iteration
                self._results['hits']['hits'] = []

        if process_post_query:
            self._post_process_query()

ES5 returning results in the scroll search first bulk of data.