Open azl397985856 opened 2 years ago
Problem Link
Ideas
Complexity: hash table and bucket
Code
from collections import Counter
class Solution:
def findSubstring(self, s: str, words: List[str]) -> List[int]:
res = []
if not s or not words: return []
word_len = len(words[0])
words_len = word_len * len(words)
n = len(s)
word_dict = Counter(words)
for i in range(0,n - words_len +1):
word_tmp = s[i:i+ words_len]
count = []
for j in range(0,words_len, word_len):
count.append(word_tmp[j:j+word_len])
if Counter(count) == word_dict:
res.append(i)
return res
1.生成单词记录表 单词:次数
2.遍历滑动窗口[0,单词长度x单词个数],滑动窗口子串[0,单词长度]记录表 与单词记录表 一一匹配,单词不存在/次数超过则不匹配; 若遍历到最后,则该滑动窗口符合题意
/**
* @param {string} s
* @param {string[]} words
* @return {number[]}
*/
var findSubstring = function(s, words) {
const ret = []
// 统计出现的次数
const map = {}
for (let i = 0; i < words.length; i++) {
const word = words[i]
map[word] = map[word] || 0
map[word]++
}
const sLen = s.length
const wordLen = words[0].length
const wordsNum = words.length
const wordStrLen = wordLen * wordsNum
for (let start = 0; start < sLen - wordStrLen + 1; start++) {
// 当前窗口,当前用于判断的子串
const subStr = s.slice(start, start + wordStrLen)
// 记录个数
const subMap = {}
let subStart = 0
// 处理是否符合,循环中会因不符合时跳出处理,故循环后要判断是否符合
for (; subStart < subStr.length; subStart += wordLen) {
const word = subStr.slice(subStart, subStart + wordLen)
if (!(word in map)) break
subMap[word] = subMap[word] || 0
subMap[word]++
if (subMap[word] > map[word]) break
}
// 上面循环处理了不成立的情况,当subStart在子串的末尾
// 说明当前子串已经遍历完,且当前子串符合
if (subStart === subStr.length) {
ret.push(start)
}
}
return ret
};
时间复杂度:O(n)
空间复杂度:O(m)
滑动窗口+哈希表 参考了这个链接 具体的实现方式是:
Counter
记录所有words出现的次数wordLen
和words的个数wordNum
,这两者相乘后就是我们的window_size
s[idx:idx+wordLen]
得到当前window里的wordsubWords
哈希表来记录当前word出现的次数subWords[word]+=1
idx == i+ window_size
那么意味着当前window 可以是一个solution,cnt +=1
class Solution(object):
def findSubstring(self, s, words):
"""
:type s: str
:type words: List[str]
:rtype: List[int]
"""
allWords = collections.Counter(words)
wordNum = len(words)
wordLen = len(words[0])
window_size = wordNum*wordLen
res = []
for i in range(len(s) - window_size+1 ):
subWords = collections.defaultdict(int)
idx = i
while idx< i+ window_size:
curWord = s[idx: idx+wordLen]
if curWord not in allWords or subWords[curWord] == allWords[curWord]:
break
subWords[curWord] += 1
idx += wordLen
if idx == i+ window_size:
res.append(i)
return res
时间:len(s) = n, len(words) = m, len(words[0]) = h
遍历了每个char ,O(n), 每次最多遍历 m次,因为对哈希表操作均为 O(1) , 所以 总时间为O(nm)
空间:两个哈希表,O(hm) 因为h必然是小于30,所以可以视为O(m)
Hashmap +Sliding Window
def findSubstring(self, s: str, words: List[str]) -> List[int]:
if not words:
return []
m = len(words)
n = len(words[0])
r=0
l=0
res=[]
Counter=collections.Counter(words)
while l<=len(s)-m*n:
subwords=collections.defaultdict(int)
r=l
while r<l+m*n:
substr=s[r:r+n]
if substr not in Counter or Counter[substr]==subwords[substr]:
break
subwords[substr]+=1
r+=n
if r==l+m*n:
res.append(l)
l+=1
return res
时间复杂度: O(N*M) M is the number of the words 空间复杂度: O(M)
官方题解
使用语言:Python3
class Solution:
def findSubstring(self, s: str, words: List[str]) -> List[int]:
from collections import Counter
if not s or not words:return []
one_word = len(words[0])
all_len = len(words) * one_word
n = len(s)
words = Counter(words)
res = []
for i in range(0, n - all_len + 1):
tmp = s[i:i+all_len]
c_tmp = []
for j in range(0, all_len, one_word):
c_tmp.append(tmp[j:j+one_word])
if Counter(c_tmp) == words:
res.append(i)
return res
复杂度分析 时间复杂度:O(n) 空间复杂度:O(n)
python
class Solution:
def findSubstring(self, s: str, words: List[str]) -> List[int]:
n = len(s)
n_words = len(words)
l_words = len(words[0])
cnt_words = collections.Counter(words)
res = []
for i in range(n - n_words * l_words + 1):
cnt = {}
j = 0
while j < n_words:
word = s[i+j*l_words:i+(j+1)*l_words]
if word not in words:
break
cnt[word] = cnt.get(word, 0) + 1
if cnt[word] > cnt_words[word]:
break
j += 1
if j == n_words:
res.append(i)
return res
Python3 Code:
class Solution:
def findSubstring(self, s: str, words: List[str]) -> List[int]:
from collections import Counter
if not s or not words: return[]
one_word = len(words[0])
all_len = len(words)*one_word
n = len(s)
words =Counter(words)
res = []
for i in range(0,n-all_len+1):
temp = s[i:i+all_len]
c_temp =[]
for j in range(0,all_len,one_word):
c_temp.append(temp[j:j+one_word])
if words == Counter(c_temp):
res.append(i)
return res
复杂度分析
令 n 为数组长度。
class Solution: def findSubstring(self, s: str, words: List[str]) -> List[int]: from collections import Counter if not s or not words: return[] one_word = len(words[0]) all_len = len(words)*one_word n = len(s) words =Counter(words) res = [] for i in range(0,n-all_len+1): temp = s[i:i+all_len] c_temp =[] for j in range(0,all_len,one_word): c_temp.append(temp[j:j+one_word]) if words == Counter(c_temp): res.append(i) return res
Python3 Code:
class Solution:
def findSubstring(self, s: str, words: List[str]) -> List[int]:
from collections import Counter
if not s or not words:return []
one_word = len(words[0])
word_num = len(words)
n = len(s)
words = Counter(words)
res = []
for i in range(0, one_word):
cur_cnt = 0
left = i
right = i
cur_Counter = Counter()
while right + one_word <= n: # 哈希表 + 双指针的精髓是这两个while
w = s[right:right + one_word]
right += one_word
cur_Counter[w] += 1
cur_cnt += 1
while cur_Counter[w] > words[w]:
left_w = s[left:left+one_word]
left += one_word
cur_Counter[left_w] -= 1
cur_cnt -= 1
if cur_cnt == word_num : # 对比word的数量
res.append(left)
return res
复杂度分析
令 n 为数组长度。
这道 hard 题目居然被我做出来了!,虽然耗时比较长!
滑动窗口 + 哈希表。滑动窗口的大小为 k*len。
最关键的部分在于滑动窗口移动的时候,每次获取 len 长度的子串,并判断这个子串是否在 words 中,并构建子串出现次数的哈希表,同时要求子串出现的次数不能大于原来 words 中单词出现的次数。
时间复杂度:$O((n-k len) k)$,n 是字符串s的长度,k是words中单词的个数,len是每个单词的长度。
代码第二次修改参考了西法的剪枝代码,之前自己用嵌套 if 判断实在太傻了。
class Solution {
public:
vector<int> findSubstring(string s, vector<string>& words) {
unordered_map<string, int> freq;
// 计算字符串数组中每个单词出现的频率
for(auto s1: words){
freq[s1]++;
}
vector<int> ret;
int len = words[0].size();
for(int i=0; i< s.size()-len*words.size()+1; i++){
int pos = i;
int num = 1;
unordered_map<string, int> freq2;
while(num <= words.size()){
auto target = s.substr(pos, len);
if(freq.count(target) == 0) break; // 剪枝
freq2[target]++; // 滑动窗口中子串出现次数+1
if(freq2[target] > freq[target]) break; // 剪枝
pos += len;
num++;
}
if(num-1 == words.size()) ret.push_back(i);
}
return ret;
}
};
Each word in the words will be exactly same length, but the same words could occur more than once, so we need to use a hash map to record the number of times a word appears in the words list.
Once we have the hashmap, we can get the length of the slide window which we would use to process through the original string s.
While iterate s, we slide the window by each character, in each window, we check if all the words appears exactly the same times as the word list, if so we have found one of the solutions.
class Solution {
public:
bool helper(string str, unordered_map<string, int> counter, int len) {
for (int i = 0; i < str.length(); i = i + len) {
string sub = str.substr(i, len);
if (counter.count(sub)) {
if (--counter[sub] == 0)
counter.erase(sub);
} else {
return false;
}
}
return counter.size() == 0;
}
vector<int> findSubstring(string s, vector<string>& words) {
vector<int> result;
std::unordered_map<string, int> counter;
for (string& word : words)
counter[word]++;
int window_size = words.size() * words[0].length();
for (int i = 0; i + window_size <= s.length(); i++) {
if (helper(s.substr(i, window_size), counter, words[0].length()))
result.push_back(i);
}
return result;
}
};
O(n * m), m is the size of the words
O(m) for the hash table to store the word frequence.
滑动窗口
const findSubstring = (s, words) => {
if (!s || !words || !words.length) return [];
let windows = {},
needs = {},
oneWordLen = words[0].length;
for (let w of words) {
needs[w] ? needs[w]++ : needs[w] = 1;
}
let l = 0,
r = 0,
count = 0,
needsKeyLen = Object.keys(needs).length,
ans = [];
for (let i = 0, i < oneWordLen; i++) {
windows = {};
r = l = i;
count = 0;
while(r <= s.length - oneWordLen) {
let w1 = s.slice(r, r + onwWordLen);
if (!needs[w1]) {
windows = {};
l = r;
count = 0;
continue;
}
windows[w1] ? windows[w1]++ : windows[w1] = 1;
if (windows[w1] === needs[w1]) count++;
while (count === needsKenLen) {
if (r - 1 === oneWordLen * words.length) ans.push(1);
let w2 = s.slice(l, l + oneWordLen);
l += oneWordLen;
if (needs[w2]) {
windows[w2]--;
if (windows[w2] < needs[w2]) count--;
}
}
}
}
return ans;
}
补卡
hashmap
class Solution:
def findSubstring(self, s: str, words: List[str]) -> List[int]:
res = []
wordsCount = collections.Counter(words)
wordLen, wordNum = len(words[0]), len(words)
subLen = wordLen * wordNum
for i in range(len(s) - subLen + 1):
subString = s[i : i+subLen]
tempCount = collections.Counter()
for j in range(0, subLen, wordLen):
word = subString[j : j+wordLen]
if word not in wordsCount:
break
tempCount.update({word : 1})
if tempCount[word] > wordsCount[word]:
break
if j == subLen - wordLen:
res.append(i)
return res
Time: O(nmk) Space: O(n)
滑动窗口+哈希表
class Solution:
def findSubstring(self, s: str, words: List[str]) -> List[int]:
hashmap1=dict()
k=len(words[0])
length=len(words)*k
res=[]
for i in range(len(words)):
if words[i] in hashmap1:
hashmap1[words[i]]+=1
else:
hashmap1[words[i]]=1
left=0
while(left<len(s)-length+1):
win=s[left:left+length]
j=0
chai=[]
hashmap2=dict()
while(j<length):
word=win[j:j+k]
if word not in hashmap1.keys():
break
if word not in hashmap2.keys():
hashmap2[word]=1
else:
hashmap2[word]+=1
if hashmap2[word]>hashmap1[word]:
break
if j==length-k:
res.append(left)
j+=k
left+=1
return res
nested iteration, 因为题目规定好words中的单词长度都是相同的,所以可以single_word的长度跳着排查。 当出现次数超过的时候,记得删除然后把指针往后移动即可。
class Solution:
def findSubstring(self, s: str, words: List[str]) -> List[int]:
word_set = Counter(words)
s_len = len(words[0]) # 因为 same length, 每s_len 步数往后跳
res = []
for i in range(s_len):
count, start = len(words), i
match_set = collections.defaultdict(int)
for j in range(i,len(s),s_len):
temp_string = s[j:j+s_len]
if temp_string in word_set:
match_set[temp_string] += 1
count -= 1
while match_set[temp_string] > word_set[temp_string]:
# 这个词出现次数超过了
match_set[s[start:start+s_len]] -= 1
start += s_len
count += 1
if count == 0:
res.append(start)
else:
# 复原count, temp_string不在, 直接跳过s_len个
count,start = len(words), j+s_len
match_set = collections.defaultdict(int)
return res
time complexity: O(N*M) M为words[0] 的长度
space complexity: O(N)
def findSubstring(self, s: str, words: List[str]) -> List[int]: allWords = collections.Counter(words) wordNum = len(words) wordLen = len(words[0]) res = [] for i in range(len(s) - wordNum wordLen + 1): subWords = collections.defaultdict(int) index = i while index < i + wordNum wordLen: curWord = s[index: index + wordLen] if curWord not in allWords or subWords[curWord] == allWords[curWord]: break subWords[curWord] += 1 index += wordLen if index == i + wordNum * wordLen: res.append(i) return res
class Solution {
public:
vector<int> findSubstring(string s, vector<string>& words) {
// get size and num of given words
int word_size = words[0].size();
int word_num = words.size();
unordered_map<string, int> m1;
// get frequency of word appeared in words.
for(auto& word:words){m1[word]++;}
//results
vector<int> results;
// ensure all character can be the first word
for(int i=0; i < word_size; i++){
unordered_map<string, int>m2;
// two pointers, and count is the sign.
int left = i;
int right = left;
int count = 0;
// start searching
while(right + word_size <= s.size()){
// get part string and need to move right side.
string ss = s.substr(right, word_size);
right += word_size;
// ensure words are required words.
// bad case first
if(m1.count(ss)==0){
m2.clear();
left = right;
count = 0;
}
else{
// mark inside map
m2[ss]++;
count += 1;
// pop words until the num is correct
while(m2.at(ss) > m1.at(ss)){
string t = s.substr(left, word_size);
m2[t]--;
left+=word_size;
count--;
}
// mark pos;
if(count==word_num) results.push_back(left);
}
}
}
return results;
}
};
Time:O(N*M)(M为单词长度) Space:O(N)
vector<int> findSubstring(string str, vector<string> words) {
vector<int> res;
unordered_map<string,int> wordsDict;
for (auto v : words) {
if (wordsDict.find(v) == wordsDict.end())
wordsDict[v] = 0;
else
++wordsDict[v];
}
int wordsSize = words.size();
int wordLen = words.front().size();
for (int i = 0; i < (str.size() - wordsSize * wordLen + 1); ++i) {
int curIdx = i;
string cur = str.substr(curIdx, wordLen);
unordered_map<string,int> tmpDict(wordsDict);
while (tmpDict.find(cur) != tmpDict.end()) {
if (tmpDict[cur] == 0)
tmpDict.erase(cur);
else
--tmpDict[cur];
curIdx += wordLen;
cur = str.substr(curIdx, wordLen);
}
if (tmpDict.empty())
res.push_back(i);
}
return res;
}
30. 串联所有单词的子串
入选理由
暂无
题目地址
https://leetcode-cn.com/problems/substring-with-concatenation-of-all-words
前置知识
题目描述
注意子串要与 words 中的单词完全匹配,中间不能有其他字符,但不需要考虑 words 中单词串联的顺序。
示例 1: 输入: s = "barfoothefoobarman", words = ["foo","bar"] 输出:[0,9] 解释: 从索引 0 和 9 开始的子串分别是 "barfoo" 和 "foobar" 。 输出的顺序不重要, [9,0] 也是有效答案。 示例 2:
输入: s = "wordgoodgoodgoodbestword", words = ["word","good","best","word"] 输出:[]