Closed linubajy closed 4 years ago
Closing because this is a duplicated of https://github.com/allenai/allennlp/issues/4280
Closing because this is a duplicated of #4280
I haven't received answer for both the issues.
Closing because this is a duplicated of #4280
I haven't received answer for both the issues.
Is your question here different from your question in #4280? As far as I can tell you've asked the same question twice. Hence why I closed this.
I have built a QA system using one of the allennlp pretrained bidaf model.Is there any way that I can get the accuracy (or any other metrics) of the answer that the model has extracted using bidaf model?Attaching the code below.Any small help would be appreciated.
` from rake_nltk import Rake from string import punctuation from nltk.corpus import stopwords from allennlp.predictors.predictor import Predictor import spacy import wikipedia import re import requests from requests_html import HTMLSession from bs4 import BeautifulSoup import traceback from nltk.stem import SnowballStemmer from nltk.util import ngrams from math import log10 from flask import Flask, request, jsonify, render_template from gevent.pywsgi import WSGIServer import time import multiprocessing as mp from gtts import gTTS import os
NLP = spacy.load('en_core_web_md') stop = stopwords.words('english') symbol = r"""!#$%^&*();:\n\t\\"!{}[]<>-\?""" stemmer = SnowballStemmer('english') wikipedia.set_rate_limiting(True) session = HTMLSession() results = 5 try: predictor = Predictor.from_path("bidaf-model-2017.09.15-charpad.tar.gz") except: predictor = Predictor.from_path("https://storage.googleapis.com/allennlp-public-models/bidaf-elmo-model-2018.11.30-charpad.tar.gz") try: srl = Predictor.from_path('srl-model-2018.05.25.tar.gz') except: srl = Predictor.from_path('https://s3-us-west-2.amazonaws.com/allennlp/models/bert-base-srl-2019.06.17.tar.gz') key = Rake(min_length=1, stopwords=stop, punctuations=punctuation, max_length=6) wh_words = "who|what|how|where|when|why|which|whom|whose|explain".split('|') stop.extend(wh_words) session = HTMLSession() output = mp.Queue()
def termFrequency(term, doc): normalizeTermFreq = re.sub('[[]{}()]', '', doc.lower()).split() normalizeTermFreq = [stemmer.stem(i) for i in normalizeTermFreq] dl = len(normalizeTermFreq) normalizeTermFreq = ' '.join(normalizeTermFreq) term_in_document = normalizeTermFreq.count(term)
len_of_document = len(normalizeTermFreq )
def inverseDocumentFrequency(term, allDocs): num_docs_with_given_term = 0 for doc in allDocs: if term in doc: num_docs_with_given_term += 1 if num_docs_with_given_term > 0: total_num_docs = len(allDocs) idf_val = log10(((total_num_docs+1) / num_docs_with_given_term)) term_split = term.split() if len(term_split) == 3: if len([term_split[i] for i in [0, 2] if term_split[i] not in stop]) == 2: return idf_val*1.5 return idf_val return idf_val else: return 0 def sent_formation(question, answer): tags_doc = NLP(question) tags_doc_cased = NLP(question.title()) tags_dictcased = {i.lower:i.pos_ for i in tags_doc_cased} tagsdict = {i.lower:i.pos_ for i in tags_doc} question_cased = [] for i in question[:-1].split(): if tags_dict[i] == 'PROPN' or tags_dict[i] == 'NOUN': question_cased.append(i.title()) else: question_cased.append(i.lower()) question_cased.append('?') question_cased = ' '.join(question_cased)
del tags_dict,tags_doc, tags_doc_cased
class extractAnswer: def init(self): self.wiki_error = (wikipedia.exceptions.DisambiguationError, wikipedia.exceptions.HTTPTimeoutError, wikipedia.exceptions.WikipediaException) self.article_title = None
symbol = """!#$%^&*();:\n\t\\"!{}[]<>-\?"""
Disambiguation_title = {}
extractor = extractAnswer() app = Flask(name) @app.route("/", methods=["POST", "get"]) @app.route("/ans")
def ans(): start = time.time() question = request.args.get('question') topic = request.args.get('topic') passage = request.args.get('passage') if not question: return render_template('p.html') if not topic: topic = '' if passage: answer = extractor.extractAnswer_model(passage, question) else: answer, title = extractor.wiki(question, topic) end = time.time() if answer: mytext = str(answer) language = 'en' myobj = gTTS(text=mytext, lang=language, slow=False) myobj.save("welcome.mp3")
prevName = 'welcome.mp3'
@app.route("/audio_del/", methods=["POST", "get"]) def audio_del(): return render_template('p.html');
@app.route("/audio_play/", methods=["POST", "get"]) def audio_play(): os.system("mpg321 welcome.mp3") return render_template('white.html')
if name == "main": PORT = 7091 HTTP_SERVER = WSGIServer(('0.0.0.0', PORT), app) print('Running on',PORT, '...') HTTP_SERVER.serve_forever()
`