varepsilon / clickmodels

ClickModels is a small set of Python scripts for the user click models initially developed at Yandex. A Click Model is a probabilistic graphical model used to predict search engine click data from past observations. This project is aimed to deal with click models used in Information Retrieval (see next README.md) and intended to be easy-to-read and easy-to-modify. If it's not, please let me know how to improve it :)
BSD 3-Clause "New" or "Revised" License
238 stars 71 forks source link

ubm's model doesn't update through iteration? #8

Closed jimmy-walker closed 5 years ago

jimmy-walker commented 5 years ago

I am reading the code. I find it's so wired for UBM as followed code. The alphaFractions will be initialized at the start within each iteration, like [1.0, 2.0]. But the code will assign the each iteration's result of alphaFractions directly to the self.alpha. self.alpha[i][q][url] = new_alpha Does it mean that it will never change through iteration? Should it be in such way at the start within each iteration? alphaFractions = copy.deepcopy(self.alpha)

        for iteration_count in xrange(self.config.get('MAX_ITERATIONS', MAX_ITERATIONS)):
            self.queryIntentsWeights = defaultdict(lambda: [])
            # not like in DBN! xxxFractions[0] is a numerator while xxxFraction[1] is a denominator
            alphaFractions = dict((i, [defaultdict(lambda: [1.0, 2.0]) for q in xrange(max_query_id)]) for i in possibleIntents)
            gammaFractions = [[[[1.0, 2.0] \
                for d in xrange(self.config.get('MAX_DOCS_PER_QUERY', MAX_DOCS_PER_QUERY))] \
                    for r in xrange(self.config.get('MAX_DOCS_PER_QUERY', MAX_DOCS_PER_QUERY))] \
                        for g in xrange(self.gammaTypesNum)]
            if self.explorationBias:
                eFractions = [[1.0, 2.0] \
                        for p in xrange(self.config.get('MAX_DOCS_PER_QUERY', MAX_DOCS_PER_QUERY))]
            # E-step
            for s in sessions:
                query = s.query
                layout = [False] * len(s.layout) if self.ignoreLayout else s.layout
                if self.explorationBias:
                    explorationBiasPossible = any((l and c for (l, c) in zip(s.layout, s.clicks)))
                    firstVerticalPos = -1 if not any(s.layout[:-1]) else [k for (k, l) in enumerate(s.layout) if l][0]
                if self.ignoreIntents:
                    p_I__C_G = {False: 1.0, True: 0}
                else:
                    a = self._getSessionProb(s) * (1 - s.intentWeight)
                    b = 1 * s.intentWeight
                    p_I__C_G = {False: a / (a + b), True: b / (a + b)}
                self.queryIntentsWeights[query].append(p_I__C_G[True])
                prevClick = -1
                for rank, c in enumerate(s.clicks):
                    url = s.results[rank]
                    for intent in possibleIntents:
                        a = self.alpha[intent][query][url]
                        if self.explorationBias and explorationBiasPossible:
                            e = self.e[firstVerticalPos]
                        if c == 0:
                            g = self.getGamma(self.gamma, rank, prevClick, layout, intent)
                            gCorrection = 1
                            if self.explorationBias and explorationBiasPossible and not s.layout[k]:
                                gCorrection = 1 - e
                                g *= gCorrection
                            alphaFractions[intent][query][url][0] += a * (1 - g) / (1 - a * g) * p_I__C_G[intent]
                            self.getGamma(gammaFractions, rank, prevClick, layout, intent)[0] += g / gCorrection * (1 - a) / (1 - a * g) * p_I__C_G[intent]
                            if self.explorationBias and explorationBiasPossible:
                                eFractions[firstVerticalPos][0] += (e if s.layout[k] else e / (1 - a * g)) * p_I__C_G[intent]
                        else:
                            alphaFractions[intent][query][url][0] += 1 * p_I__C_G[intent]
                            self.getGamma(gammaFractions, rank, prevClick, layout, intent)[0] += 1 * p_I__C_G[intent]
                            if self.explorationBias and explorationBiasPossible:
                                eFractions[firstVerticalPos][0] += (e if s.layout[k] else 0) * p_I__C_G[intent]
                        alphaFractions[intent][query][url][1] += 1 * p_I__C_G[intent]
                        self.getGamma(gammaFractions, rank, prevClick, layout, intent)[1] += 1 * p_I__C_G[intent]
                        if self.explorationBias and explorationBiasPossible:
                            eFractions[firstVerticalPos][1] += 1 * p_I__C_G[intent]
                    if c != 0:
                        prevClick = rank
            if not self.config.get('PRETTY_LOG', PRETTY_LOG):
                sys.stderr.write('E')
            # M-step
            sum_square_displacement = 0.0
            for i in possibleIntents:
                for q in xrange(max_query_id):
                    for url, aF in alphaFractions[i][q].iteritems():
                        new_alpha = aF[0] / aF[1]
                        sum_square_displacement += (self.alpha[i][q][url] - new_alpha) ** 2
                        self.alpha[i][q][url] = new_alpha
            for g in xrange(self.gammaTypesNum):
                for r in xrange(self.config.get('MAX_DOCS_PER_QUERY', MAX_DOCS_PER_QUERY)):
                    for d in xrange(self.config.get('MAX_DOCS_PER_QUERY', MAX_DOCS_PER_QUERY)):
                        gF = gammaFractions[g][r][d]
                        new_gamma = gF[0] / gF[1]
                        sum_square_displacement += (self.gamma[g][r][d] - new_gamma) ** 2
                        self.gamma[g][r][d] = new_gamma
            if self.explorationBias:
                for p in xrange(self.config.get('MAX_DOCS_PER_QUERY', MAX_DOCS_PER_QUERY)):
                    new_e = eFractions[p][0] / eFractions[p][1]
                    sum_square_displacement += (self.e[p] - new_e) ** 2
                    self.e[p] = new_e
            if not self.config.get('PRETTY_LOG', PRETTY_LOG):
                sys.stderr.write('M\n')
            rmsd = math.sqrt(sum_square_displacement)
            if self.config.get('PRETTY_LOG', PRETTY_LOG):
                sys.stderr.write('%d..' % (iteration_count + 1))
            else:
                print >>sys.stderr, 'Iteration: %d, ERROR: %f' % (iteration_count + 1, rmsd)
jimmy-walker commented 5 years ago

sorry, I misunderstand it. Appreciate with your job!