I am appreciate your work Fast-Detect-Gpt published in ICLR 2024. However, I have a problem when using your methods in our setting.
I try to detect a piece of text without knowing the potential model as shown in your online demo(http://region-9.autodl.pro:21504/#/show), and run your method with the scoring model and reference model being gpt-neo-2.7B as in your demo, and I use your local ref path in your github repo.
However, the result is different, both the crit and the prob, I don' t know where the problem is.
Here are my codes:
import torch
import json
import glob
import os
import time
import numpy as np
from transformers import AutoModelForCausalLM, AutoTokenizer
class ProbEstimator:
def init(self, ref_path):
self.real_crits = []
self.fake_crits = []
for result_file in glob.glob(os.path.join(ref_path, '.json')):
with open(result_file, 'r') as fin:
res = json.load(fin)
self.real_crits.extend(res['predictions']['real'])
self.fake_crits.extend(res['predictions']['samples'])
print(f'ProbEstimator: total {len(self.real_crits) 2} samples.')
end = time.time()
print(f'loading time: {end-start}')
name = "sampling_discrepancy_analytic"
criterion_fn = get_sampling_discrepancy_analytic
prob_estimator = ProbEstimator('fast_detect_gpt_result')
text = '在智能手机终端市场需求疲软,出货量增长乏力的背景下,折叠屏如同一道曙光,照亮手机市场,成为智能手机市场唯一还在增长的细分品类。根据IDC统计数据,2022年国内折叠屏市场继续维持稳定增长态势,全年出货量达到近330万台,同比增长高达118%,2023年一季度国内折叠屏手机实现出货101.5万台,较2022年同期增至52.86%。\n数据显示,2019年国内折叠屏手机市场规模约为\n28.28亿元,2022年已增长至127.49亿元。在直屏智能手机的硬件配置和功能体验进入瓶颈期,智能手机市场陷入低迷状态之时,折叠屏手机的创新技术成熟,在保持一定便利性的同时,还很好地解决了屏幕尺寸受限,因此才能逆势上扬,市场表现愈发火热。\n从竞争格局来看,自折叠屏手机面世,市场呈快速发展趋势,各大主流品牌纷纷在此领域投入布局,我国如今的折叠屏手机市场,呈现群雄逐鹿的状态。在2022年国内折叠屏手机行业中,华为、三星和OPPO排名前三。华为作为入局最早的厂商之一,一直都是折叠屏技术研发的主力,占据我国折叠屏市场份额的47.4%,成为该领域中最畅销的手机品牌。\n华经产业研究院研究团队使用桌面研究与定量调查、定性分析相结合的方式,全面客观的剖析折叠屏手机行业发展的总体市场容量、产业链、竞争格局、经营特性、盈利能力和商业模式等。科学使用SCP模型、SWOT、PEST、回归分析、SPACE矩阵等研究模型与方法综合分析折叠屏手机行业市场环境、产业政策、竞争格局、技术革新、市场风险、行业壁垒、机遇以及挑战等相关因素。根据折叠屏手机行业的发展轨迹及实践经验,精心研究编制《2023-2028年中国折叠屏手机行业发展监测及投资前景展望报告》,为企业、科研、投资机构等单位投资决策、战略规划、产业研究提供重要参考。'
estimate the probability of machine generated text
print(crit)
end = time.time()
print(f'inference time: {end-start}')
prob = prob_estimator.crit_to_prob(crit)
print(f'Fast-DetectGPT criterion is {crit:.4f}, suggesting that the text has a probability of {prob * 100:.0f}% to be fake.')
I am appreciate your work Fast-Detect-Gpt published in ICLR 2024. However, I have a problem when using your methods in our setting.
I try to detect a piece of text without knowing the potential model as shown in your online demo(http://region-9.autodl.pro:21504/#/show), and run your method with the scoring model and reference model being gpt-neo-2.7B as in your demo, and I use your local ref path in your github repo.
However, the result is different, both the crit and the prob, I don' t know where the problem is.
Here are my codes:
import torch import json import glob import os import time import numpy as np from transformers import AutoModelForCausalLM, AutoTokenizer
class ProbEstimator: def init(self, ref_path): self.real_crits = [] self.fake_crits = [] for result_file in glob.glob(os.path.join(ref_path, '.json')): with open(result_file, 'r') as fin: res = json.load(fin) self.real_crits.extend(res['predictions']['real']) self.fake_crits.extend(res['predictions']['samples']) print(f'ProbEstimator: total {len(self.real_crits) 2} samples.')
def get_samples(logits, labels): assert logits.shape[0] == 1 assert labels.shape[0] == 1 nsamples = 10000 lprobs = torch.log_softmax(logits, dim=-1) distrib = torch.distributions.categorical.Categorical(logits=lprobs) samples = distrib.sample([nsamples]).permute([1, 2, 0]) return samples
def get_likelihood(logits, labels): assert logits.shape[0] == 1 assert labels.shape[0] == 1 labels = labels.unsqueeze(-1) if labels.ndim == logits.ndim - 1 else labels lprobs = torch.log_softmax(logits, dim=-1) log_likelihood = lprobs.gather(dim=-1, index=labels) return log_likelihood.mean(dim=1)
def get_sampling_discrepancy(logits_ref, logits_score, labels): assert logits_ref.shape[0] == 1 assert logits_score.shape[0] == 1 assert labels.shape[0] == 1 if logits_ref.size(-1) != logits_score.size(-1):
print(f"WARNING: vocabulary size mismatch {logits_ref.size(-1)} vs {logits_score.size(-1)}.")
def get_sampling_discrepancy_analytic(logits_ref, logits_score, labels): assert logits_ref.shape[0] == 1 assert logits_score.shape[0] == 1 assert labels.shape[0] == 1 if logits_ref.size(-1) != logits_score.size(-1):
print(f"WARNING: vocabulary size mismatch {logits_ref.size(-1)} vs {logits_score.size(-1)}.")
scoring_model_name = 'gpt_neo_2.7B' reference_model_name = 'gpt_neo_2.7B' start = time.time()
device = 'cuda:0'
scoring_tokenizer = AutoTokenizer.from_pretrained(scoring_model_name, padding_side = 'right') scoring_tokenizer.pad_token_id = scoring_tokenizer.eos_token_id scoring_model = AutoModelForCausalLM.from_pretrained(scoring_model_name).to(device) scoring_model.eval() if reference_model_name != scoring_model_name: reference_tokenizer = AutoTokenizer.from_pretrained(reference_model_name, padding_side = 'right') reference_model = AutoModelForCausalLM.from_pretrained(reference_model_name) reference_model.eval()
evaluate criterion
end = time.time() print(f'loading time: {end-start}')
name = "sampling_discrepancy_analytic" criterion_fn = get_sampling_discrepancy_analytic prob_estimator = ProbEstimator('fast_detect_gpt_result')
text = '在智能手机终端市场需求疲软,出货量增长乏力的背景下,折叠屏如同一道曙光,照亮手机市场,成为智能手机市场唯一还在增长的细分品类。根据IDC统计数据,2022年国内折叠屏市场继续维持稳定增长态势,全年出货量达到近330万台,同比增长高达118%,2023年一季度国内折叠屏手机实现出货101.5万台,较2022年同期增至52.86%。\n数据显示,2019年国内折叠屏手机市场规模约为\n28.28亿元,2022年已增长至127.49亿元。在直屏智能手机的硬件配置和功能体验进入瓶颈期,智能手机市场陷入低迷状态之时,折叠屏手机的创新技术成熟,在保持一定便利性的同时,还很好地解决了屏幕尺寸受限,因此才能逆势上扬,市场表现愈发火热。\n从竞争格局来看,自折叠屏手机面世,市场呈快速发展趋势,各大主流品牌纷纷在此领域投入布局,我国如今的折叠屏手机市场,呈现群雄逐鹿的状态。在2022年国内折叠屏手机行业中,华为、三星和OPPO排名前三。华为作为入局最早的厂商之一,一直都是折叠屏技术研发的主力,占据我国折叠屏市场份额的47.4%,成为该领域中最畅销的手机品牌。\n华经产业研究院研究团队使用桌面研究与定量调查、定性分析相结合的方式,全面客观的剖析折叠屏手机行业发展的总体市场容量、产业链、竞争格局、经营特性、盈利能力和商业模式等。科学使用SCP模型、SWOT、PEST、回归分析、SPACE矩阵等研究模型与方法综合分析折叠屏手机行业市场环境、产业政策、竞争格局、技术革新、市场风险、行业壁垒、机遇以及挑战等相关因素。根据折叠屏手机行业的发展轨迹及实践经验,精心研究编制《2023-2028年中国折叠屏手机行业发展监测及投资前景展望报告》,为企业、科研、投资机构等单位投资决策、战略规划、产业研究提供重要参考。'
tokenized = scoring_tokenizer(text, return_tensors="pt", padding=True, return_token_type_ids=False).to(device) labels = tokenized.input_ids[:, 1:] with torch.no_grad(): logits_score = scoring_model(tokenized).logits[:, :-1] if reference_model_name == scoring_model_name: logits_ref = logits_score else: tokenized = reference_tokenizer(text, return_tensors="pt", padding=True, return_token_type_ids=False).to(device) assert torch.all(tokenized.input_ids[:, 1:] == labels), "Tokenizer is mismatch." logits_ref = reference_model(tokenized).logits[:, :-1] crit = criterion_fn(logits_ref, logits_score, labels)
estimate the probability of machine generated text
print(crit) end = time.time() print(f'inference time: {end-start}') prob = prob_estimator.crit_to_prob(crit) print(f'Fast-DetectGPT criterion is {crit:.4f}, suggesting that the text has a probability of {prob * 100:.0f}% to be fake.')