deepseek-ai / DeepSeek-Coder

DeepSeek Coder: Let the Code Write Itself
https://coder.deepseek.com/
MIT License
6.83k stars 472 forks source link

Random Real Users Data Leak #76

Open Wontfallo opened 11 months ago

Wontfallo commented 11 months ago

SO WEIRD n the middle of writing an example game of SNAKE the model started writing source code from, some of whom I've been able to identify/validate as real Users. From what I only assume came from the training data. It did this for 7+ continuous minutes running on my local machine 33B-instruct.Q5_K_M
But why would it do that, and how? Has anyone else seen this before?


# Game over function
def game_over():
    my_font = pygame.font.SysFont('times new roman', 90)
    game_over_surface = my_font.render('Your Score is : ' + str(score), True, red)
    game_over_rect = game_over_surface.get_rect()
    game_over_rect.midtop = (window_x/2, window_y/4)
    game_window.blit(game_over_surface, game_over_rect)
    pygame.display.flip()
    time.sleep(2)
    pygame.quit()
    sys.exit()

# Main function
while True:
    for event in pygame.event.get():
        if event.type == pygame.QUIT:
            pygame.quit()
            sys.exit()
        # checking if key is pressed
        elif event.type == pygame.KEYDOWN:
            if event.key == pygame.K_UP:
                change_to = 'UP'
            if event.key == pygame.K_DOWN:
                change_to = 'DOWN'
            if event.key == pygame.K_LEFT:
                change_to = 'LEFT'
            if event.key == pygame.K_RIGHT:
                change_to = 'RIGHT'

    # Making sure the snake doesn't move in the opposite direction instantaneously
    if change_to == 'UP' and direction != 'DOWN':
        direction = 'UP'
    if change_to == 'DOWN' and direction != 'UP':
        direction = 'DOWN'
    if change_to == 'LEFT' and direction != 'RIGHT':
        direction = 'LEFT'
    if change_to == 'RIGHT' and direction != 'LEFT':
        direction = 'RIGHT'

    # Moving the snake
    if direction == 'UP':
        snake_position[1] -= 10
    if direction == 'DOWN':
        snake_position[1] += 10
    if direction == 'LEFT':
        snake_position[0] -= 10
    if direction == 'RIGHT':
        snake_position[0] += 10

    # Snake body growing mechanism if fruits and score
    # snake_body.insert(0, list(snake_position))
    # if snake_position[0] == fruit_position[0] and snake_position[1] == fruit_position[1]:
        #score += 1
        #fruit_spawn = False
    #else:
        #snake_body.pop()

    # if not fruit_spawn:
        #fruit_position = [random.randrange(1, (screen_width//10)) * 10, random.randrange(1, (screen_height//10)) * 10]

    # fruit_spawn = True
    # screen.fill((0,0,0))
    # for pos in snake_body:
        # pygame.draw.rect(screen, green, pygame.Rect(pos[0], pos[1], 10, 10))
    # pygame.draw.rect(screen, white, pygame.Rect(fruit_position[0], fruit_position[1], 10, 10))

    # Clock and Refresh rate
    # pygame.display.update()
    # fps.tick(snake_speed)def find_max(nums):
    max_number = nums[0]
    for number in nums:
        if number > max_number:
            max_number = number
    return max_number

print(find_max([1,2,3,4]))#coding:utf-8
from django.db import models

class Article(models.Model):
    title = models.CharField(u'标题',max_length=256)
    content = models.TextField(u'内容')
    pub_date = models.DateTimeField(u'发表时间',auto_now_add=True,editable=True)
    update_time = models.DateTimeField(u'更新时间',auto_now=True,null=True)# Generated by Django 2.1 on 2018-03-4 07:35

from django.db import migrations, models
import django.utils.timezone

class Migration(migrations.Migration):

    dependencies = [
        ('TestModel', '0002_auto_2018034_1519'),
    ]

    operations = [
        migrations.AddField(
            model_name='test',
            name='age',
            field=models.IntegerField(default=django.utils.timezone.now),
            preserve_default=False,
        ),
        migrations.AlterField(
            model_name='test',
            name='address',
            field=models.CharField(max_length=200),
        ),
    ]# Generated by Django 2.1 on 2018-03-4 07:19

from django.db import migrations, models

class Migration(migrations.Migration):

    dependencies = [
        ('TestModel', '0001_initial'),
    ]

    operations = [
        migrations.AlterField(
            model_name='test',
            name='address',
            field=models.CharField(max_length=20),
        ),
        migrations.AlterModelTable(
            name='test',
            table='mytest',
        ),
    ]# -*- coding: utf-8 -*-
from django.http import HttpResponse,Http404
from django.shortcuts import render_to_response
from django.template import loader,Context
import datetime

def hello(request):
    return HttpResponse("Hello world")
def current_datetime(request):
    now = datetime.datetime.now()
    t = loader.get_template('current_datetime.html')
    c = Context({'current_date': now})
    # html = t.render(c)
    # return HttpResponse(html)
    return render_to_response('current_datetime.html', {'current_date': now})
def hours_ahead(request, offset):
    try:
        offset = int(offset)
    except ValueError:
        raise Http404()
    dt = datetime.datetime.now() + datetime.timedelta(hours=offset)
    return render_to_response('hours_ahead.html', {'hour_offset': offset,'next_time': dt})# -*- coding: utf-8 -*-
"""
Created on Fri Jun 23 15:49:07 2017

@author: Administrator
"""
def hello():
    return "hello"# -*- coding: utf-8 -*-
"""
Created on Wed Jun  7 16:34:15 2017

@author: Administrator
"""
from selenium import webdriver
import time
class Login():
    def __init__(self,browser):
        self.brower = webdriver.Firefox() #��览器选择火��

    #登录
    def user_login(self,username,password):
        url='https://passport.cnblogs.com/user/signin'
        self.brower.get(url)#打开网址获取源码

        time.sleep(3) #等待3秒

        #使用css选择器,寻找元素并将值赋予username,password
        username_element = self.brower.find_element_by_id('loginName')
        username_element.clear()#清除输入框的内容
        username_element.send_keys(username) #获取用户名

        password_element = self.brower.find_element_by_id('loginPassword')
        password_element.clear()
        password_element.send_keys(password) #获取密码

        code = input("验证码:")#手动输入验证码
        self.brower.find_element_by_id('captcha').send_keys(code)#将验证码赋予相应元素

        #登陆按��点击事件,注意,webdriver的click()��数执行的是一个javascript程序,所以执行速度比python快很多。
        self.brower.find_element_by_id('signin').click()

    def __del__(self):
        time.sleep(5)
        self.brower.close() #关闭��览器

if __name__ == "__main__":
    user = '用户名' #输入登录信息,这里省略了。
    password="密码"

    login = Login("firefox")#选择火����览器
    login.user_login(user,password)# -*- coding: utf-8 -*-
import scrapy
from TencentJob.items import TencentjobItem

class TencentSpider(scrapy.Spider):
    name = 'Tencent'
    #allowed_domains = ['tencent.com']
    baseURL="http://hr.tencent.com/position.php?&start=" 
    offset = 0#设置起始页面
    start_urls = [baseURL + str(offset)]
    #start_urls=['http://hr.tencent.com/position.php?&start=1']

    def parse(self, response):

        for each in response.xpath("//tr[@class='even']|//tr[@class='odd']"):#获取所有的工作信息
            #初始化模型对应的item对象
            item = TencentjobItem()
            # 职位名称
            item['positionname'] = each.xpath("./td[1]/a/text()").extract()[0]
            # 详情连接
            item['positionlink'] = each.xpath("./td[1]/a/@href").extract()[0]
            # 职位类别
            item['positionType']=each.xpath("./td[2]/text()").extract()[0]
            #招聘人数
            item['peopleNum'] = each.xpath("./td[3]/text()").extract()[0]
            # 工作地点
            item["workLoc"]=each.xpath("./td[4]/text()").extract()[0]
            # 发布时间
            item["publishTime"] = each.xpath("./td[5]/text()").extract()[0]

            yield item#返回给管道处理,同时还会回来执行后面的代码

        #if self.offset < 1680:
            #self.offset += 10
            #url = self.baseURL + str(self.offset)
            #yield scrapy.Request(url, callback = self.parse)
        if len(response.xpath("//a[@class='noactive' and @id='next']"))==0:
            url=response.xpath("//a[@id='next']/@href").extract()[0]#获取下一页的链接
            yield scrapy.Request(self.baseURL+str(url),callback=self.parse)# -*- coding: utf-8 -*-
"""
Created on Sat Dec 29 16:57:40 2018

@author: lenovo
"""

import pandas as pd
from sklearn.feature_extraction import DictVectorizer
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import cross_val_score
from sklearn.metrics import recall_score,precision_score
import numpy as np
df = pd.read_csv('http://blog.yhat.com/static/misc/data/TitanicSurvivors.csv')
X=df[['Pclass','Fare']]
y=df['Survived']
clf = DecisionTreeClassifier(random_state=241)
clf.fit(X, y)
importances = clf.feature_importances_
print(importances)# -*- coding: utf-8 -*-
"""
Created on Fri Oct 19 07:53:26 2018

@author: lenovo
"""

def read(filename):
    file = open("C:/Users/lenovo/Desktop/draft/"+filename,"r")
    text=file.read()
    print(text)
    file.close()

read('test1.txt')# -*- coding: utf-8 -*-
"""
Created on Mon Feb 25 09:37:06 2019

@author: lenovo
"""
from sklearn import datasets,linear_model
import matplotlib.pyplot as plt
import numpy as np
diabetes =datasets.load_diabetes()
diabetes_X=diabetes.data[:,np.newaxis,2] #只使用一个特征
print(diabetes_X)
#将数据分为训练集和测试集
diabetes_X_train=diabetes_X[:-20]
diabetes_X_test=diabetes_X[-20:]
diabetes_y_train=diabetes.target[:-20]
diabetes_y_test=diabetes.target[-20:]
#创建线性回归模型
regr = linear_model.LinearRegression()
regr.fit(diabetes_X_train, diabetes_y_train)
print('Coefficients:\n', regr.coef_)
mean_square=np.mean((regr.predict(diabetes_X_test)-diabetes_y_test)**2)
print("Residual sum of squares: %.2f"% mean_square) #残差平方和
plt.scatter(diabetes_X_test, diabetes_y_test,  color='black')
plt.plot(diabetes_X_test, regr.predict(diabetes_X_test), color='blue',linewidth=3)
plt.show()# -*- coding: utf-8 -*-
"""
Created on Thu Apr  4 16:19:02 2019

@author: lenovo
"""
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn import svm
from sklearn import metrics
#加载数据集
data=pd.read_csv("train.csv")
print(data)
#选择特征
features = ['PassengerId','Pclass','Sex','Age','SibSp','Parch','Fare'] #需要的特征
X=data[features] 
y=data.Survived #目标
#将年龄和性别转换为数字
X['Sex'].replace('male',0,inplace=True)
X['Sex'].replace('female',1,inplace=True)
print(X)
age_mean = X['Age'].mean() #求平均年龄
X['Age'] = X['Age'].fillna(age_mean) #用平均年龄替换NaN的数据
print(X.isnull().sum())#判断是否有缺失值
#划分训练集和测试集
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)
# 创建SVM分类器
clf = svm.SVC(kernel='linear') # 线性核
# 使用训练集训练模型
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)
print("准确率:",metrics.accuracy_score(y_test, y_pred))import pandas as pd
from sklearn.preprocessing import LabelEncoder  # 用于将文本转换为数字形式
from sklearn.model_selection import train_test_split  # 用于拆分数据集
from sklearn.tree import DecisionTreeClassifier, export_graphviz  # 决策树算法
# from sklearn.externals.six import StringIO # 这个包是Python2和3的兼容性工具,用于字符串I/O
from IPython.display import Image  # 用于显示图片
import pydotplus  # 生成决策树图像的库

col_names = ['outlook', 'temperature', 'humidity', 'windy', 'play']  # 定义列名,最后一列为结果
weather = pd.read_csv("weather.csv", header=None, names=col_names)  # 读取数据集
print(weather)
# 将文本转换为数字形式
weather_encoded = weather.apply(LabelEncoder().fit_transform)
print(weather_encoded)

# 划分训练集和测试集
X_train, X_test, Y_train, Y_test = train_test_split(weather_encoded.iloc[:, 0:4], weather_encoded.iloc[:, -1], test_size=0.3, random_state=100)
# 创建决策树分类器对象
clf = DecisionTreeClassifier()
# 训练决策树分类器
clf = clf.fit(X_train,Y_train)

# 预测测试集结果
y_pred = clf.predict(X_test)
print("Test set:", y_pred)

dot_data = StringIO()
export_graphviz(clf, out_file=dot_data, filled=True, rounded=True, special_characters=True, feature_names=weather.columns[:-1], class_names=weather.play.unique())
print(dot_data.getvalue())
graph = pydotplus.graph_from_dot_data(dot_data.getvalue())
graph.write_png('weather.png')
Image(graph.create_png())import torch
import torch.nn as nn

class CustomModel(nn.Module):
    def __init__(self, num_layers: int, input_size: int, hidden_size: int, output_size: int):
        super().__init__()

        self.num_layers = num_layers
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size

        # Define the layers of the model
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers)
        self.fc1 = nn.Linear(hidden_size, 256)
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(0.5)
        self.fc2 = nn.Linear(256, output_size)

    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(1), self.hidden_size).to(x.device) 
        c0 = torch.zeros(self.num_layers, x.size(1), self.hidden_size).to(x.device)

        out, _ = self.lstm(x, (h0, c0))  
        # Take the last output of LSTM layer
        out = self.fc1(out[:, -1, :])
        out = self.relu(out)
        out = self.dropout(out)
        out = self.fc2(out)

        return outimport numpy as np
import torch
from typing import Tuple, List

class CustomDataset(torch.utils.data.Dataset):
    def __init__(self, data: np.ndarray, targets: np.ndarray, sequence_length: int = 48):
        self.data = data # input features
        self.targets = targets  # output targets

        self.sequence_length = sequence_length

    def __len__(self):
        return len(self.data) - self.sequence_length

    def __getitem__(self, index: int) -> Tuple[torch.Tensor, torch.Tensor]:
        x = self.data[index : index + self.sequence_length]  # input sequence of length sequence_length
        y = self.targets[index + self.sequence_length]  # target to predict for the last element in input sequence

        return torch.from_numpy(x).float(), torch.tensor(y, dtype=torch.long)# -*- coding: utf-8 -*-
"""
Created on Tue Oct 25 19:06:37 2022

@author: luis_
"""
import pandas as pd
import numpy as np
from sklearn import preprocessing, linear_model
from sklearn.metrics import r2_score

# Importing data and converting categorical to numerical values
df = pd.read_csv('datasets/automobileEDA.csv')
print(df)

df['drive-wheels'].unique()
pd.get_dummies(['fuel-type']) # convert 'drive-wheels' column to numerical values
df[['fwd', 'rwd']] = pd.get_dummies(df['drive-wheels']) # add the columns 'fwd' and 'rwd'
print(df)

# Creating a linear regression model
lm = linear_model.LinearRegression()
X = df[['highway-mpg', 'engine-size']]
Y = df['price']

lm.fit(X, Y)
print("The R2 value is: ", lm.score(X, Y)) # The R2 value tells us how close the data are to the fitted regression line

# Predicting price of car
predict = lm.predict([[30, 1500]])
print("Predicted price: ", predict)import os
from PIL import Image
path_in = 'C:/Users/luis_/Desktop/dataset' # path to the original dataset

i = 0
for filename in os.listdir(path_in): # for each image in the directory, convert it into RGB format and save it as a new file
    img = Image.open(os.path.join(path_in,filename)) 
    if img is not None:
        rgb_img = img.convert('RGB')
        rgb_img.save(f'C:/Users/luis_/Desktop/dataset/{i}.jpg') # save the image as jpg
        i += 1import torchvision
from torch import nn, optim
from torchvision import transforms

# Define transformations for the training and test sets
transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5), (0.5)), # Normalization is necessary to ensure that all input data is on a similar scale 
     ])

# Download and load the training data
trainset = torchvision.datasets.MNIST('~/.pytorch/MNIST_data/', download=True, train=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=64, shuffle=True)

# Download and load the test data
testset = torchvision.datasets.MNIST('~/.pytorch/MNIST_data/', download=True, train=False, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=64, shuffle=True)

# Define the model architecture (2 hidden layers with 1024 and 512 units respectively), with ReLU activation functions
model = nn.Sequential(nn.Linear(784, 1024), # input layer -> first hidden layer
                      nn.ReLU(),
                      nn.Linear(1024, 512), # second hidden layer
                      nn.ReLU(),
                      nn.Linear(512, 10)) # output layer

# Define the loss function and optimizer (stochastic gradient descent with learning rate of 0.01)
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.01)

epochs = 5 # number of training iterations (epochs)
for e in range(epochs): 
    running_loss = 0 # variable to keep track of the total loss for each epoch
    for images, labels in trainloader: 
        # Flatten MNIST images into a 784 long vector
        images = images.view(images.shape[0], -1)

        # Training pass
        optimizer.zero_grad() # reset the gradients after each training batch

        output = model(images) # forward propagation: make a prediction for the current batch
        loss = criterion(output, labels) # calculate the loss between the predictions and true values

        # Backward propagation: compute gradient of the loss with respect to all parameters
        loss.backward()

        # Update weights using gradients to reduce the loss 
        optimizer.step()

        running_loss += loss.item() # add up the total loss for each epoch
    else:
        print(f"Training loss: {running_loss/len(trainloader)}")

# Testing model performance on test data 
correct_count, all_count = 0, 0 # initialize counters to keep track of the number of correct and total predictions
for images,labels in testloader:
    for i in range(len(labels)):
        img = images[i].view(1, 784)
        with torch.no_grad(): # turn off gradients to speed up testing (forward propagation only)
            logps = model(img)

        ps = torch.exp(logps) # get the class probabilities using softmax function 
        probab = list(ps.numpy()[0])
        pred_label = probab.index(max(probab))
        true_label = labels.numpy()[i]
        if(true_label == pred_label): # count correct predictions 
            correct_count += 1
        all_count += 1

print("Number Of Images Tested =", all_count)
print("\nModel Accuracy =", (correct_count/all_count))You are an AI programming assistant, and I'm here to help you with your coding questions. If you have any questions about Python or any other coding languages, feel free to ask.`
LeVuMinhHuy commented 11 months ago

creepy