Closed vieyahn2017 closed 3 months ago
#!/usr/bin/python
# -*- coding: utf-8 -*-
"""
默认输入是
parse_console.py
把xxxx产生的题目console.txt,去噪转化为exam.txt
内容为:
1
在对n个元素进行快速排序的过程中,最坏情况下需要进行多少趟排序?
A. n
B. n-1
C. n/2
D. logn
2
以下哪一项不属于工程化软件开发所面临的根本性困难?
A. 软件是庞大的逻辑产品
B. 软件开发工具所提供的开发支持弱
C. 软件的复杂程度高
D. 软件系统的问题空间和解空间之间的巨大鸿沟
"""
import sys
import time
import datetime
import json
import re
import codecs
import difflib
import traceback
reload(sys)
sys.setdefaultencoding('utf8')
def string_similar(s1, s2):
if isinstance(s1, unicode):
s1 = str(s1)
if isinstance(s2, unicode):
s2 = str(s2)
return difflib.SequenceMatcher(None, s1, s2).quick_ratio()
#strtime = (datetime.datetime.now() - datetime.timedelta(hours=1)).strftime("%Y%m%d%H%M%S")
strtime = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
class Question(object):
def __init__(self, subjectTitle, optionList=[], subjectType="1", answer="", subjectId="", lineNo=0):
self.subjectTitle = subjectTitle
self.optionList = optionList
self.subjectType = subjectType
self.subjectId = subjectId
self.answer = answer
self.lineNo = lineNo
self.language = ""
def set_answer(self, answer):
self.answer = answer
def set_language(self, language):
self.language = language
def set_lineno(self, lineNo):
self.lineNo = lineNo
def get_json(self, is_print=False):
json_item = {"subjectTitle": self.subjectTitle,
"subjectType": self.subjectType,
"answer": self.answer,
"language": self.language,
"optionList": self.optionList,
"subjectId": self.subjectId}
item_json = json.dumps(json_item, sort_keys=True, indent=4, ensure_ascii=False)
if is_print:
print(item_json)
return item_json
@staticmethod
def load_from_json(item):
question = Question(item.get("subjectTitle"), item.get("optionList"), item.get("subjectType"), item.get("answer"), item.get("subjectId"))
question_language = item.get("language")
if question_language:
question.set_language(question_language)
return question
def print_question(self):
print("----------")
if str(self.lineNo) != "0":
print(self.lineNo)
print(self.subjectTitle.replace(" ", " "))
question_type = str(self.subjectType)
if question_type == "3":
print(self.answer)
elif question_type == "1":
for option_item in self.optionList:
if option_item.get("optionId") == self.answer:
print(option_item.get("optionTitle"))
break
elif question_type == "2":
question_answer = str(self.answer)
if re.match(r'[A-N]+$', question_answer):
answers = list(question_answer)
print("answer: {}".format(question_answer))
else:
answers = question_answer.split("&")
print("answer: {}".format(answers))
for option_item in self.optionList:
if option_item.get("optionId") in answers:
print(option_item.get("optionTitle"))
def is_similar_question(self, other):
"""
subjectType不相等,直接判定不同。
language非空的时候,如果不相等,直接判定不同。
subjectId如果存在且相等,计权重1
subjectTitle和optionList分别计权重1
然后取平均。
三者相等的题,完全契合
没有subjectId,但是后两者相同的题目,属于待更新的现题
不相等的题,属于新题
self 和 other的区别在于
self是待搜索答案的题目,有LineNo,没有answer
other是题库的题目,有answer,没有LineNo
"""
similarity = 0
if self.subjectType and str(self.subjectType) != str(other.subjectType):
return similarity
if self.language and other.language and str(self.language) != str(other.language):
return similarity
if self.subjectId and other.subjectId and str(self.subjectId) == str(other.subjectId):
similarity += 1
# 标题相似度
similarity_title = string_similar(self.subjectTitle, other.subjectTitle)
similarity += similarity_title
if str(self.subjectType) == "3":
return similarity + 1
# 选项相似度
optionListIds = set([item.get("optionId") for item in self.optionList])
if "A" not in optionListIds:
OtherOptionListIds = set([item.get("optionId") for item in other.optionList])
if optionListIds == OtherOptionListIds:
similarity += 1
return similarity
similarity_options = 0
for option_item in self.optionList:
option_item_title = option_item.get("optionTitle")
similarity_item_option = 0
similarity_item_options = []
for option_other in other.optionList:
similarity_item_options.append(string_similar(option_item_title, option_other.get("optionTitle")))
if similarity_item_options:
similarity_item_option = max(similarity_item_options)
similarity_options += similarity_item_option
if len(self.optionList) > 0:
similarity_options = similarity_options / len(self.optionList)
similarity += similarity_options
return similarity
def search_my_answer(self, questions_bank=[]):
"""
在题库中搜索答案
self 和 other的区别在于
self是待搜索答案的题目,有LineNo,没有answer
other是题库的题目,有answer,没有LineNo
返回值: 2 搜到 1 搜到题目,但是需要更新 0 搜不到
"""
for question in questions_bank:
similarity = self.is_similar_question(question)
if similarity > 2.8:
question.set_lineno(self.lineNo)
return 2, question
elif similarity > 1.8:
question.set_lineno(self.lineNo)
return 1, question
else:
continue
return 0, self
@staticmethod
def load_all_questions(print_type="none"):
"""
默认导入以下6行式的题目
关于可靠性和可用性间的关系,下列哪种说法是正确的?
A. 可靠性等同于可用性
B. 高可用性除了要求具有高可靠性外,还必须具有良好的可维修性
C. 可靠性和可用性是两个概念,两者间没有关系
D. 可用性只与可靠性相关,与可维修性无关
B
"""
questions_json_file="all.json"
load_questions = []
with open(questions_json_file, mode='r') as json_file:
load_dict = json.load(json_file)
for item in load_dict:
question = Question.load_from_json(item)
load_questions.append(question)
if print_type == "json":
question.get_json(True)
elif print_type == "line":
question.print_question()
else:
pass
print("load_all_questions: ")
print(len(load_questions))
return load_questions
@staticmethod
def append_to_all_questions(questions_json_file="all.json"):
"""
更新题库,暂时可以手动处理
每次新的单独生成文件,写了个append_new_questions
dump: 将数据写入json文件中
with open("record.json","w") as f:
json.dump(new_dict,f)
"""
print("append_to_all_questions: ")
@staticmethod
def append_new_questions(append_questions, suffix="append.json"):
"""
更新题库到新的json文件
"""
outputs = []
outputs.append(",")
for question_item in append_questions:
item_json = question_item.get_json(False)
outputs.append(item_json + ",")
strtime3 = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
file3name = "{}-{}".format(strtime3, suffix)
file3 = open(file3name, mode='w')
outputs[-1] = outputs[-1][:-1] # 去掉最后一个","
file3.write("\n".join(outputs))
file3.close()
@staticmethod
def make_question(content_lines):
"""
默认导入以下6行式的题目 【本函数是单个Question处理函数】
关于可靠性和可用性间的关系,下列哪种说法是正确的?
A. 可靠性等同于可用性
B. 高可用性除了要求具有高可靠性外,还必须具有良好的可维修性
C. 可靠性和可用性是两个概念,两者间没有关系
D. 可用性只与可靠性相关,与可维修性无关
B
下面哪些设计是符合权限最小化要求的?()
A. 一个帐号只能拥有该角色必需的权限,不分配不必要的权限。
B. 权限划分的粒度尽可能最小化,帐号权限应基于“need-to-know”和“case-by-case”的原则。
C. 为保证有足够权限读取到数据,连接Web服务数据库应该使用Administrator帐号。
D. 审计日志只有管理员或日志审计员有权限读取,普通用户无法访问读取。
ABD
选项ABCD支持 空白 . , 、 , 【分别是 空白 英文点号,英文逗号,中文顿号,中文逗号 】
【最新修正: 过滤内容行,优化答案行】
"""
# 先排空
if not content_lines:
return
answer = content_lines[-1]
if answer == "对" or answer == "错":
subjectType = "3" # 判断题
title = "\r\n".join(content_lines[:-1])
optionList = []
return Question(title, optionList, subjectType, answer)
option_a_line = -1
option_separator = " "
# 查找选项的第一行,之前的行作为title
for line in content_lines:
option_a_line += 1
if line.startswith("A、"):
option_separator = "、"
break
if line.startswith("A."):
option_separator = "."
break
if line.startswith("A,"):
option_separator = ","
break
if line.startswith("A,"):
option_separator = ","
break
if line.startswith("A "): # 单独的A加空格,再往下找B,才能准确
if content_lines[option_a_line].startswith("B"):
break
# 优化答案行 最后一行如果不符合答案行要求,则置空
answer = content_lines[-1]
if re.match( r'[A-N]+$', answer):
options = content_lines[option_a_line:-1]
else:
answer = ""
options = content_lines[option_a_line:]
optionList = []
for line2 in options:
# split带参数1,分隔成两个
# optionId = line2.split(option_separator, 1)[0] # 没必要这么复杂,直接取第一个字符即可
optionId = line2[0]
optionList.append({"optionId": optionId, "optionTitle": line2})
if option_a_line == 1:
title = content_lines[0]
else:
title = "\r\n".join(content_lines[:option_a_line])
# if answer == "对" or answer == "错":
# subjectType = 3 # 判断题 的逻辑单独提到前面去了
if len(answer) == 1:
subjectType = "1" # 单选题
else:
subjectType = "2" # 多选题
answer = "&".join(answer)
# return title, optionList, answer, subjectType
return Question(title, optionList, subjectType, answer)
@staticmethod
def make_question_with_head(content_lines):
"""
默认导入以下6行式的题目 【带数字no. 没有答案】
# 暂时只支持单行选项的 【也不打算优化了,后续主要是json的格式了】
9
使用UML对系统进行动态建模,不能使用以下哪种图?()
A. 顺序图
B. 类图
C. 状态图
D. 活动图
"""
# 先排空
if not content_lines:
return
lineNo = 0
head = content_lines[0].strip()
if head.isdigit():
lineNo = head
if len(content_lines) == 2: # 单行作为判断题
title = content_lines[1]
return Question(title, [], "3", "", "", lineNo)
if len(content_lines) == 3 and content_lines[-1] == "":
title = content_lines[1]
return Question(title, [], "3", "", "", lineNo)
option_a_line = -1
option_separator = " "
# 查找选项的第一行,之前的行作为title
for line in content_lines:
option_a_line += 1
if line.startswith("A、"):
option_separator = "、"
break
if line.startswith("A."):
option_separator = "."
break
if line.startswith("A,"):
option_separator = ","
break
if line.startswith("A,"):
option_separator = ","
break
if line.startswith("A "): # 单独的A加空格,再往下找B,才能准确
if content_lines[option_a_line].startswith("B"):
break
# 优化答案行 最后一行如果不符合答案行要求,则置空
answer = content_lines[-1]
if re.match( r'[A-N]+$', answer):
options = content_lines[option_a_line:-1]
else:
answer = ""
options = content_lines[option_a_line:]
optionList = []
for line2 in options:
# split带参数1,分隔成两个
# optionId = line2.split(option_separator, 1)[0] # 没必要这么复杂,直接取第一个字符即可
optionId = line2[0]
optionList.append({"optionId": optionId, "optionTitle": line2})
if option_a_line == 2:
title = content_lines[1]
else:
title = "\r\n".join(content_lines[1:option_a_line])
# 不确定题目类型,subjectType设置为0
return Question(title, optionList, 0, answer, "", lineNo)
def get_question_lines_1():
file0 = open("questions.txt", mode='r')
txt = file0.read()
questions = re.split("\n\n+", txt)
# 备份questions.txt
file1 = open("{}-questions.txt".format(strtime), mode='w')
file1.write(txt)
file1.close()
file0.close()
return questions
def get_question_lines_2():
"""
# 对比get_question_lines_1 【最新修正:过滤内容行:过滤一两三个空格这种内容行,过滤5 77这种序号行】
输入questions.txt
备份该文件
函数返回questions,为question列表
"""
file0 = open("questions.txt", mode='r')
txt_lines = file0.readlines()
def filter_line_func(line):
if line == "":
return True
if line == "\n":
return True
linestrip = line.strip()
if linestrip == "":
return False
if linestrip.isdigit():
return False
return True
txt_new_lines = filter(filter_line_func, txt_lines)
txt = "".join(txt_new_lines)
# print(txt)
questions = re.split("\n\n+", txt)
# 备份questions.txt
file1 = open("{}-questions.txt".format(strtime), mode='w')
file1.writelines(txt_new_lines)
file1.close()
file0.close()
return questions
def get_question_lines_of_exams():
file0 = open("exam.txt", mode='r')
txt = file0.read()
questions = re.split("\n\n+", txt)
file0.close()
# exam.txt文档结尾必须有两三空行,不加不行,但是干扰计数
print("get_question_lines_of_exams: {}".format(len(questions) - 1))
return questions
def main_search_answer_txt():
"""
读入exam.txt,进行处理
"""
questions_bank = Question.load_all_questions()
questions_updates = []
questions_unmatched = []
questions = get_question_lines_of_exams()
for question in questions:
if question == "":
continue
items = question.split("\n")
try:
question_item = Question.make_question_with_head(items)
if question_item:
answer_result, matched_question = question_item.search_my_answer(questions_bank)
if answer_result > 0: # in [1, 2]
matched_question.print_question()
if answer_result == 1:
questions_updates.append(matched_question)
else:
questions_unmatched.append(question_item)
except Exception as e:
print(">>> error occur\n")
traceback.print_exc()
print(question)
if questions_updates:
print("\n==========\nappend:")
print(len(questions_updates))
# ## txt模式,没有subjectId,全部都会进入append.json。 因此关闭本功能
# Question.append_new_questions(questions_updates, suffix="append.json")
if questions_unmatched:
print("\n==========\nunmatched:")
print(len(questions_unmatched))
Question.append_new_questions(questions_unmatched, suffix="unmatched.json")
def main_search_answer_json():
"""
读入exam.json,进行处理
跟main_search_answer_txt的逻辑大体差不多
"""
questions_bank = Question.load_all_questions()
questions_updates = []
questions_unmatched = []
questions_json_file = "exam.json"
load_questions = []
with open(questions_json_file, mode='r') as json_file:
load_dict = json.load(json_file)
for item in load_dict:
if item:
question = Question.load_from_json(item)
load_questions.append(question)
print("load_questions: at main_search_answer_json")
print(len(load_questions))
for i, question_item in enumerate(load_questions):
question_item.set_lineno(i+1)
try:
answer_result, matched_question = question_item.search_my_answer(questions_bank)
if answer_result > 0: # in [1, 2]
matched_question.print_question()
if answer_result == 1:
questions_updates.append(matched_question)
else:
questions_unmatched.append(question_item)
except Exception as e:
print(">>> error occur\n")
print(e)
print(question)
if questions_updates:
print("\n==========\nappend:")
print(len(questions_updates))
Question.append_new_questions(questions_updates, suffix="append.json")
if questions_unmatched:
print("\n==========\nunmatched:")
print(len(questions_unmatched))
Question.append_new_questions(questions_unmatched, suffix="unmatched.json")
# main_search_answer_txt()
main_search_answer_json()
sys.exit(0)
def main_parse_question_to_json():
"""
输入的questions.txt
输出为20210908_154830-exam.json
"""
outputs = []
outputs.append("[")
questions = get_question_lines_2()
for question in questions:
if question == "":
continue
items = question.split("\n")
try:
question_item = Question.make_question(items)
if question_item:
item_json = question_item.get_json(True)
outputs.append(item_json + ",")
except:
print(">>> error occur\n")
print(question)
outputs.append("]")
file2name = "{}-exam.json".format(strtime)
file2 = open(file2name, mode='w')
file2.write("\n".join(outputs))
# 如果要指定utf8编码
# file2 = open(file2name, mode='w', encoding='utf-8') # python3
# file2 = codecs.open(file2name, 'w', 'utf-8') # python2.7
file2.close()
main_parse_question_to_json()
def test_load_append():
# load_questions = Question.load_all_questions("line")
load_questions = Question.load_all_questions("json")
Question.append_new_questions(load_questions)
# test_load_append()
def test_similar():
question1_json = {
"answer": "8569657",
"optionList": [
{
"optionId": "8569655",
"optionTitle": "A. 纯软件需求决策由软件RMT负责"
},
{
"optionId": "8569656",
"optionTitle": "B. 产品软硬件结合需求决策由对应产品SPDT RMT负责"
},
{
"optionId": "8569657",
"optionTitle": "C. 需求冲突时可在CCB会议仲裁"
},
{
"optionId": "8569658",
"optionTitle": "D. 产品RAT负责各产品系列的软硬结合需求分析"
}
],
"subjectId": "2928644",
"subjectTitle": "对需求管理,理解不正确的是( C )",
"subjectType": "1"
}
question2_json = {
"answer": "",
"optionList": [
{
"optionId": "A",
"optionTitle": "A. 纯软件需求决策由软件RMT负责"
},
{
"optionId": "C",
"optionTitle": "C. 产品软硬件结合需求决策由对应产品SPDT RMT负责"
},
{
"optionId": "D",
"optionTitle": "D. 需求冲突时可在CCB会议仲裁"
},
{
"optionId": "B",
"optionTitle": "B. 产品RAT负责各产品系列的软硬结合需求分析"
}
],
"subjectId": "",
"subjectTitle": "对需求管理,理解不正确的是()",
"subjectType": "1"
}
question3_json = {
"subjectId": "1723992",
"subjectTitle": "输入校验不可以防止以下哪种漏洞?",
"subjectType": "1",
"answer": "",
"optionList": [
{
"optionId": "4926519",
"optionTitle": "A. XSS"
},
{
"optionId": "4926520",
"optionTitle": "B. SQL注入"
},
{
"optionId": "4926521",
"optionTitle": "C. XML注入"
},
{
"optionId": "4926522",
"optionTitle": "D. CSRF"
}
]
}
question1 = Question.load_from_json(question1_json)
question2 = Question.load_from_json(question2_json)
question3 = Question.load_from_json(question3_json)
print(question1.is_similar_question(question2))
print(question1.is_similar_question(question3))
# test_similar()
var getOptionList = function(optionList) {
var content = "";
for (var i=0, len=optionList.length; i<len; i++)
{
content = content + optionList[i].optionTitle + "\n";
}
return content.substring(0, content.length-1);
};
var examins = angular.element(document.querySelector("#boxscroll4")).scope().examinquestionsAll;
for (var i=0,len=examins.length; i<len; i++)
{
console.log(i+1);
console.log(examins[i].subjectTitle.$$unwrapTrustedValue());
console.log(getOptionList(examins[i].optionList));
console.log("\n");
}
console.log("\n\n__txt_json__\n\n");
var examins = angular.element(document.querySelector("#boxscroll4")).scope().examinquestionsAll;
var makeJsonOfOptionList = function(optionList) {
var content = "[\n";
for (var i=0, len=optionList.length; i<len; i++)
{
var optionId = optionList[i].optionId;
var optionTitle = optionList[i].optionTitle.replace(/"/g, "\\\"");
content = content + `{\n"optionId": "${optionId}",\n"optionTitle": "${optionTitle}"\n},\n`;
}
return content.substring(0, content.length-2) + "\n]";
};
var makeJsonOfQuestion = function (subjectId, subjectTitle, subjectType, optionList, language) {
if(language) {
var json = `{\n"subjectId": "${subjectId}",\n"subjectTitle": "${subjectTitle}",\n"subjectType": "${subjectType}",\n"answer": "",\n"language": "${language}",\n"optionList":\n${optionList}\n},`;
return json;
} else {
var json = `{\n"subjectId": "${subjectId}",\n"subjectTitle": "${subjectTitle}",\n"subjectType": "${subjectType}",\n"answer": "",\n"optionList":\n${optionList}\n},`;
return json;
}
};
language="java";
console.log("[\n");
for (var i=0, len=examins.length; i<len; i++)
{
var subjectId = examins[i].subjectId;
var subjectTitle = examins[i].subjectTitle.$$unwrapTrustedValue().replace(/"/g, "\\\"");
var subjectType = examins[i].subjectType;
var optionList = makeJsonOfOptionList(examins[i].optionList);
console.log(makeJsonOfQuestion(subjectId, subjectTitle, subjectType, optionList, language));
}
console.log("]\n");
all.json
[
{
"answer": "A",
"optionList": [
{
"optionId": "A",
"optionTitle": "A.同一问题的不同表相"
},
{
"optionId": "B",
"optionTitle": "B.不同问题的同一表相"
},
{
"optionId": "C",
"optionTitle": "C.不同问题的不同表相"
},
{
"optionId": "D",
"optionTitle": "D.以上都不是"
}
],
"subjectId": "",
"subjectTitle": "设计模式一般用来解决什么样的问题()",
"subjectType": 1
},
{
"answer": "D",
"optionList": [
{
"optionId": "A",
"optionTitle": "A.系统的维护与开发"
},
{
"optionId": "B",
"optionTitle": "B.对象组合与类的继承"
},
{
"optionId": "C",
"optionTitle": "C.系统架构与系统开发"
},
{
"optionId": "D",
"optionTitle": "D.系统复用与系统扩展"
}
],
"subjectId": "",
"subjectTitle": "设计模式的两大主题是()",
"subjectType": 1
},
{
"answer": "A",
"optionList": [
{
"optionId": "A",
"optionTitle": "A. 两者都 满足开闭原则:简单工厂以if else 方式创建对象,增加需求看看 时候会修改源代码 "
},
{
"optionId": "B",
"optionTitle": "B. 简单工厂对具体产品的创建类别和创建时机的判断是混合在一起的,这点在工厂方法模式中已经克服"
},
{
"optionId": "C",
"optionTitle": "C. 不能形成简单工厂的继承结构"
},
{
"optionId": "D",
"optionTitle": "D. 在工厂方法模式中,对于存在继承等级结构的产品树,产品的创建是通过相应等级结构的工厂创建的。"
}
],
"subjectId": "",
"subjectTitle": "下列关于简单工厂模式与工厂方法模式表述错误的是()",
"subjectType": 1
},
{
"answer": "CD",
"optionList": [
{
"optionId": "A",
"optionTitle": "A. 抽象工厂(Abstract Factory)"
},
{
"optionId": "B",
"optionTitle": "B. 适配器(Adapter)"
},
{
"optionId": "C",
"optionTitle": "C. 观察者(Observer)"
},
{
"optionId": "D",
"optionTitle": "D. 职责链(Chain of Responsibility)"
}
],
"subjectId": "",
"subjectTitle": "设计模式一般被分为三大类,其中行为型模式用于描述程序在运行时复杂的流程控制,即描述多个类或对象之间怎样相互协作共同完成单个对象都无法单独完成的任务,它涉及算法与对象间职责的分配,下列模式中属于行为模式的是()",
"subjectType": 2
},
{
"answer": "ACD",
"optionList": [
{
"optionId": "A",
"optionTitle": "A. 结构型模式可以在不破坏类封装性的基础上,实现新的功能"
},
{
"optionId": "B",
"optionTitle": "B. 结构型模式主要用于创建一组对象"
},
{
"optionId": "C",
"optionTitle": "C. 结构型模式可以创建一组类的统一访问接口"
},
{
"optionId": "D",
"optionTitle": "D. 结构型模式可以在不破坏类封装性的基础上,使得类可以通不曾估计到的系统进行交互"
}
],
"subjectId": "",
"subjectTitle": "设计模式中关于结构型模式,以下说法正确的是",
"subjectType": 2
},
{
"answer": "BD",
"optionList": [
{
"optionId": "A",
"optionTitle": "A. 基本类型偏执(Primitive Obsession)"
},
{
"optionId": "B",
"optionTitle": "B. 散弹式修改(Shotgun Surgery)"
},
{
"optionId": "C",
"optionTitle": "C. 过长参数列表(Long Parameter List)"
},
{
"optionId": "D",
"optionTitle": "D. 发散式变化(Divergent Change)"
}
],
"subjectId": "",
"subjectTitle": "面向对象设计的基本原则中的开闭原则指出软件要对扩展开放,对修改关闭。违反开闭原则可能造成的坏味道有",
"subjectType": 2
}
]
hhh