Open testpppppp opened 1 year ago
https://zhuanlan.zhihu.com/p/62521862
# 最长公共子串
dp[i][j] = dp[i - 1][j - 1] + 1 else 0
# 最长公共子序列,不用连续
dp = [[0 for i in range(len(s2) + 1)] for j in range(len(s1) + 1)]
lcs = ''
for i in range(1, len(s1) + 1):
for j in range(1, len(s2) + 1):
if s1[i - 1] == s2[j - 1]:
dp[i][j] = dp[i-1][j-1] + 1
else:
dp[i][j] = max(dp[i-1][j], dp[i][j-1])
# 最长递增子序列
def lengthOfLIS(self, nums: List[int]) -> int:
if not nums: return 0
dp = [1] * len(nums)
for i in range(len(nums)):
for j in range(i):
if nums[j] < nums[i]: # 如果要求非严格递增,将此行 '<' 改为 '<=' 即可。
dp[i] = max(dp[i], dp[j] + 1)
return max(dp)
# 编辑距离 https://blog.51cto.com/u_15408171/6869532
不同之处在于,LCS对两个的长度差异不敏感,编辑距离对两者的长度差异敏感。 LCS衡量了两者的重合度,编辑距离衡量了两者的长度和重合度。 对编辑距离的增删代价取0,改操作换成相同奖励,就是LCS。
if word1[i - 1] == word2[j - 1]:
dp[i][j] = dp[i - 1][j - 1]
else:
dp[i][j] = min(dp[i - 1][j], dp[i][j - 1], dp[i - 1][j - 1])+1
https://zhuanlan.zhihu.com/p/35643721
C = [3,2,6,7,1,4,9,5]
V = [6,3,5,8,3,1,6,9]
Count = [3,5,1,9,3,5,6,8]#每种物品的实现
target = 20
F = [0 for i in range(0,target+1)]
n = len(C)
def CompleteBackPack(cost,value):
for i in range(cost,target+1):
F[i] = max(F[i],F[i-cost]+value)
def OneZeroBackPack(cost,value):
for i in reversed(range(cost,target+1)):
F[i] = max(F[i],F[i-cost]+value)
def MultipleBackPack(cost,value,count):
if (cost * count) >= target:#当该种物品的个数乘以体积大于背包容量,视为有无限个即完全背包
CompleteBackPack(C[i],V[i])
return
temp_count = 1 #以上情况不满足,转化为以下情况,具体参考《背包九讲》多重背包的时间优化
while(temp_count<count):
OneZeroBackPack(temp_count*cost,temp_count*value)
count = count - temp_count
temp_count = temp_count * 2 #转化为1,2,4
OneZeroBackPack(count*cost,count*value)#9个中剩下两个
for i in range(0,n):
MultipleBackPack(C[i],V[i],Count[i])
print (F[target])
int binary_search(int[] nums, int target) {
int left = 0, right = nums.length - 1;
while(left <= right) {
int mid = left + (right - left) / 2;
if (nums[mid] < target) {
left = mid + 1;
} else if (nums[mid] > target) {
right = mid - 1;
} else if(nums[mid] == target) {
// 直接返回
return mid;
}
}
// 直接返回
return -1;
}
int left_bound(int[] nums, int target) {
int left = 0, right = nums.length - 1;
while (left <= right) {
int mid = left + (right - left) / 2;
if (nums[mid] < target) {
left = mid + 1;
} else if (nums[mid] > target) {
right = mid - 1;
} else if (nums[mid] == target) {
// 别返回,锁定左侧边界
right = mid - 1;
}
}
// 判断 target 是否存在于 nums 中
if (left < 0 || left >= nums.length) {
return -1;
}
// 判断一下 nums[left] 是不是 target
return nums[left] == target ? left : -1;
}
int right_bound(int[] nums, int target) {
int left = 0, right = nums.length - 1;
while (left <= right) {
int mid = left + (right - left) / 2;
if (nums[mid] < target) {
left = mid + 1;
} else if (nums[mid] > target) {
right = mid - 1;
} else if (nums[mid] == target) {
// 别返回,锁定右侧边界
left = mid + 1;
}
}
// 由于 while 的结束条件是 right == left - 1,且现在在求右边界
// 所以用 right 替代 left - 1 更好记
if (right < 0 || right >= nums.length) {
return -1;
}
return nums[right] == target ? right : -1;
}
def buildMaxHeap(arr):
import math
for i in range(math.floor(len(arr)/2),-1,-1):
heapify(arr,i)
def heapify(arr, i):
left = 2*i+1
right = 2*i+2
largest = i
if left < arrLen and arr[left] > arr[largest]:
largest = left
if right < arrLen and arr[right] > arr[largest]:
largest = right
if largest != i:
swap(arr, i, largest)
heapify(arr, largest)
def swap(arr, i, j):
arr[i], arr[j] = arr[j], arr[i]
def heapSort(arr):
global arrLen
arrLen = len(arr)
buildMaxHeap(arr)
for i in range(len(arr)-1,0,-1):
swap(arr,0,i)
arrLen -=1
heapify(arr, 0)
return arr
import random
import sys
sys.setrecursionlimit(10000000) #设置系统最大递归深度
def quick_sort(data, left, right):
if left < right:
mid = partition(data, left, right) # mid返回的是上一个用来排序那个数的下标
quick_sort(data, left, mid - 1)
quick_sort(data, mid + 1,right)
# 每执行一次partition函数都可以实现将某个数左边都比这个数小右边都比这个数大
def partition(data, left, right):
tmp = data[left]
while left < right:
while left < right and data[right] >= tmp: # 从右向左找小于tmp的数放到左边空位置
right -= 1
# data[left] = data[right] # 将右边小于tmp值得数放到左边空位置
while left < right and data[left] <= tmp: # 从左向右找到大于tmp的值放到右边空位置
left += 1
data[right] = data[left] # 将右边大于tmp值得数放到右边空位置
data[left] = tmp
return left
data = list(range(100))
random.shuffle(data) #将有序列表打乱
quick_sort(data, 0, len(data) - 1)
print(data)
ref https://www.kaggle.com/code/bigironsphere/loss-function-library-keras-pytorch
# encoding: utf-8
import numpy
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchvision.ops.focal_loss import sigmoid_focal_loss
class DiceLoss(nn.Module):
def __init__(self, weight=None, size_average=True):
super(DiceLoss, self).__init__()
def forward(self, inputs, targets, smooth=1):
# comment out if your model contains a sigmoid or equivalent activation layer
inputs = F.sigmoid(inputs)
# flatten label and prediction tensors
inputs = inputs.view(-1)
targets = targets.view(-1)
intersection = (inputs * targets).sum()
dice = (2. * intersection + smooth) / (inputs.sum() + targets.sum() + smooth)
return 1 - dice
class DiceBCELoss(nn.Module):
def __init__(self, weight=None, size_average=True):
super(DiceBCELoss, self).__init__()
def forward(self, inputs, targets, smooth=1):
# comment out if your model contains a sigmoid or equivalent activation layer
inputs = F.sigmoid(inputs)
# flatten label and prediction tensors
inputs = inputs.view(-1)
targets = targets.view(-1)
intersection = (inputs * targets).sum()
dice_loss = 1 - (2. * intersection + smooth) / (inputs.sum() + targets.sum() + smooth)
BCE = F.binary_cross_entropy(inputs, targets, reduction='mean')
Dice_BCE = BCE + dice_loss
return Dice_BCE
# ALPHA = 0.8
# GAMMA = 2
class FocalLoss(nn.Module):
def __init__(self, alpha=0.25, gamma=2):
super(FocalLoss, self).__init__()
self.alpha = alpha
self.gamma = gamma
def forward(self, inputs: torch.Tensor, targets: torch.Tensor):
# target 的需要进行onehot编码
if inputs.shape != targets.shape:
batch_size, num_classes = inputs.shape
targets = torch.nn.functional.one_hot(targets, num_classes)
targets = targets.to(inputs.device, dtype=torch.float)
# mean > sum
return sigmoid_focal_loss(inputs, targets, alpha=self.alpha, gamma=self.gamma, reduction="mean")
# ALPHA = 0.5
# BETA = 0.5
class TverskyLoss(nn.Module):
def __init__(self, weight=None, size_average=True):
super(TverskyLoss, self).__init__()
def forward(self, inputs, targets, smooth=1, alpha=0.5, beta=0.5):
# comment out if your model contains a sigmoid or equivalent activation layer
inputs = F.sigmoid(inputs)
# flatten label and prediction tensors
inputs = inputs.view(-1)
targets = targets.view(-1)
# True Positives, False Positives & False Negatives
TP = (inputs * targets).sum()
FP = ((1 - targets) * inputs).sum()
FN = (targets * (1 - inputs)).sum()
Tversky = (TP + smooth) / (TP + alpha * FP + beta * FN + smooth)
return 1 - Tversky
# ALPHA = 0.5
# BETA = 0.5
# GAMMA = 1
class FocalTverskyLoss(nn.Module):
def __init__(self, weight=None, size_average=True):
super(FocalTverskyLoss, self).__init__()
def forward(self, inputs, targets, smooth=1, alpha=0.5, beta=0.5, gamma=1):
# comment out if your model contains a sigmoid or equivalent activation layer
inputs = F.sigmoid(inputs)
# flatten label and prediction tensors
inputs = inputs.view(-1)
targets = targets.view(-1)
# True Positives, False Positives & False Negatives
TP = (inputs * targets).sum()
FP = ((1 - targets) * inputs).sum()
FN = (targets * (1 - inputs)).sum()
Tversky = (TP + smooth) / (TP + alpha * FP + beta * FN + smooth)
FocalTversky = (1 - Tversky) ** gamma
return FocalTversky
class AddTransTextLoss(nn.Module):
def __init__(self, weight=0.5):
super(AddTransTextLoss, self).__init__()
self.loss_fn = torch.nn.CrossEntropyLoss()
self.weight = weight
def forward(self, inputs, inputs2, targets):
"""
@inputs: 对应原始输出样本
@inputs2:对应的原始输出正样本
"""
main_loss = self.loss_fn(inputs, targets) # 计算主要的损失
auxi_loss = self.loss_fn(inputs2, targets) # 计算辅助的损失: 利用翻译结果进行分类
tot_loss = main_loss + auxi_loss * self.weight
return tot_loss, auxi_loss
class ContrastiveLoss(nn.Module):
def __init__(self, num_labels, weight=0.5, temp=0.05):
super(ContrastiveLoss, self).__init__()
self.main_loss_fn = torch.nn.CrossEntropyLoss()
self.auxi_loss_fn = torch.nn.BCEWithLogitsLoss()
self.weight = weight
self.num_labels = num_labels
self.temp = temp
def forward(self, inputs, targets, embs, embs2):
"""
@input: logits
@target: labels
@embeds: 向量表征
@embs2: 对应的正样本的表征
"""
main_loss = self.main_loss_fn(inputs, targets) # 计算主要的损失
deivce = main_loss.device
# 少样本可能需要辅助正样本
embs = torch.concat([embs, embs2], dim=0)
targets = torch.concat([targets, targets], dim=0)
# 进行正则化
batch_size, emb_size = embs.shape
norm_embs = F.normalize(embs, dim=1, p=2)
sim_score = torch.matmul(norm_embs, norm_embs.T)
sim_score = sim_score - torch.eye(batch_size).to(deivce) * 1e12 # 自己和自己相似度最小
sim_score = sim_score / self.temp # 温度参数进行处理
# 计算label
one_hot = F.one_hot(targets, self.num_labels).detach().cpu()
multi_labels = torch.matmul(one_hot, one_hot.T) # 是一个mutil label问题
mask = 1 - torch.eye(batch_size) # 自己不能和自己计算相似性
multi_labels = (mask * multi_labels).to(deivce)
auxi_loss = self.auxi_loss_fn(sim_score, multi_labels)
# print(main_loss, auxi_loss)
tot_loss = main_loss + auxi_loss * self.weight
return tot_loss, auxi_loss
if __name__ == '__main__':
import numpy as np
loss_func = ContrastiveLoss(num_labels=55)
inputs = torch.FloatTensor(np.random.randn(10, 55))
targets = torch.LongTensor(np.random.randint(0, 55, 10))
print(targets)
loss = loss_func(inputs, targets, inputs, inputs)
print(loss)
def findPeakElement(self, nums: List[int]) -> int:
left, right = 0, len(nums) - 1
while left < right:
mid = (left + right) >> 1
if nums[mid] > nums[mid + 1]:
right = mid
else:
left = mid + 1
return left
# 单调队列 https://zhuanlan.zhihu.com/p/447209490
# 单调队列主要是为了求滑动窗口最大/最小值。单调队列是双端队列(首尾两边都可以append和pop)。具体而言,我们会在单调队列的队尾pop和append,会在队首pop。队首的元素是我们需要的最值(这一点非常重要),最大值就递减反之递增
# https://leetcode.cn/problems/sliding-window-maximum/
def maxSlidingWindow(self, nums: List[int], k: int) -> List[int]:
n = len(nums)
q = collections.deque()
for i in range(k):
while q and nums[i] >= nums[q[-1]]:
q.pop()
q.append(i)
ans = [nums[q[0]]]
for i in range(k, n):
while q and nums[i] >= nums[q[-1]]:
q.pop()
q.append(i)
while q[0] <= i - k:
q.popleft()
ans.append(nums[q[0]])
return ans
arr = [2, 3, 7, 9, 5, 1, 6, 4, 3]
k = 3
# Find the maximum element in each sliding window of size k
result = max_sliding_window(arr, k)
# 单调栈
# 单调栈主要用来求每一个当前数左右最近的比他小/大的数
# Largest Rectangle in Histogram
def largestRectangleArea(self, heights: List[int]) -> int:
maxArea = 0
stack = []
for index , height in enumerate(heights):
start = index
while start and stack[-1][1] > height:
i , h = stack.pop()
maxArea = max(maxArea , (index-i)*h)
start = i
stack.append((start , height))
for index , height in stack:
maxArea = max(maxArea , (len(heights)-index)*height)
return maxArea
# [Maximal Rectangle](https://leetcode.com/problems/maximal-rectangle/)
def maximalRectangle(self, matrix: List[List[str]]) -> int:
def mah(heights: List[int]) -> int:
st=[]
maxArea=0
for bar in heights+[-1]:
step=0
while st and st[-1][1]>=bar:
w,h=st.pop()
step+=w
maxArea=max(maxArea,step*h)
st.append((step+1,bar))
return maxArea
n,m=len(matrix),len(matrix[0])
ans=0
for i in range(n):
for j in range(m):
if matrix[i][j]=='1':
matrix[i][j]=1
else:
matrix[i][j]=0
if i>0 and matrix[i][j]!=0:
matrix[i][j]=matrix[i-1][j]+matrix[i][j]
ans=max(ans,mah(matrix[i]))
return ans
# 最近公共祖先
TreeNode* lowestCommonAncestor(TreeNode* root, TreeNode* p, TreeNode* q) {
if (!root || root == p || root == q) return root;
TreeNode *left = lowestCommonAncestor(root->left, p, q);
TreeNode *right = lowestCommonAncestor(root->right, p, q);
if (left && right) return root;
return left ? left : right;
}
# 双指针 接雨水
def trap(self, height: List[int]) -> int:
left, right = 0, len(height) - 1
l_max, r_max = 0, 0
res = 0
while left < right:
l_max = max(l_max, height[left])
r_max = max(r_max, height[right])
if l_max < r_max:
res += l_max - height[left]
left += 1
else:
res += r_max - height[right]
right -= 1
return res
# 双指针 几根柱子之间最大面积
def maxArea(self, height: List[int]) -> int:
left, right = 0, len(height) - 1
res = 0
while left < right:
# [left, right] 之间的矩形面积
cur_area = min(height[left], height[right]) * (right - left)
res = max(res, cur_area)
# 双指针技巧,移动较低的一边
if height[left] < height[right]:
left += 1
else:
right -= 1
return res
# [乘积最大子数组](https://leetcode.cn/problems/maximum-product-subarray)
#
幻觉原因
llama2-4K,gpt3-16K 预训练长度和推理不一致外推性/速度问题
# https://www.tensorflow.org/addons/api_docs/python/tfa/losses/contrastive_loss
# https://towardsdatascience.com/creating-custom-loss-functions-using-tensorflow-2-96c123d5ce6c
def contrastive_loss(y_true, y_pred, margin=0.5):
square_pred = tf.square(y_pred)
margin_square = tf.square(tf.maximum(margin - y_pred, 0))
return tf.reduce_mean((1-y_true) * square_pred + (y_true) * margin_square)
loss = contrastive_loss(y_true, y_pred, 0.5)
# return tf.reduce_sum(loss * is_contra)/tf.reduce_sum(is_contra)
return tf.reduce_mean(loss)
你对这个工作职位的理解 你工作倾向 你讨厌的工作习惯是什么 不同工作你care不同点是什么
work card strap why leave
expectation
do difficult(tech hard/no resource; read paper and )
shortcoming
Team Understanding
motivation strong interest in joining team your understand of tis role what makes this role attractive to you
how would you describe ur work/leadership style
your lead experience
what unique engineering challenges you foresee in TnS
Your expectation for this role, what you want to learn from it.
clip
https://www.cnblogs.com/chester-cs/p/17478159.html https://github.com/openai/CLIP/blob/main/clip/model.py https://github.com/moein-shariatnia/OpenAI-CLIP/blob/master/CLIP.py
剑指offer 经典
https://zhuanlan.zhihu.com/p/453204032
二分
https://leetcode.cn/problems/find-first-and-last-position-of-element-in-sorted-array 在排序数组中查找元素的第一个和最后一个位置 left_bound right_bound https://leetcode.cn/problems/search-a-2d-matrix-ii/solutions/ #搜索二维矩阵
https://leetcode.cn/problems/search-in-rotated-sorted-array/ # 搜索旋转排序数组(关键点在和开头结尾的比一下)
-欧拉距离