azl397985856 commented 1 week ago

面试题 17.17 多次搜索

入选理由

暂无

题目地址

https://leetcode-cn.com/problems/multi-search-lcci

前置知识

字符串匹配

Trie

题目描述


给定一个较长字符串 big 和一个包含较短字符串的数组 smalls，设计一个方法，根据 smalls 中的每一个较短字符串，对 big 进行搜索。输出 smalls 中的字符串在 big 里出现的所有位置 positions，其中 positions[i]为 smalls[i]出现的所有位置。

示例：

输入： big = "mississippi" smalls = ["is","ppi","hi","sis","i","ssippi"] 输出： [[1,4],[8],[],[3],[1,4,7,10],[5]] 提示：

0 <= len(big) <= 1000 0 <= len(smalls[i]) <= 1000 smalls 的总字符数不会超过 100000。你可以认为 smalls 中没有重复字符串。所有出现的字符均为英文小写字母。

Martina001 commented 1 week ago

class Solution {
    public int[][] multiSearch(String big, String[] smalls) {
        // 思路：将smalls中的字符串构建出一个前缀树 写一个search(word)方法 查询树中是否有word的前缀 并将其返回
        // 这个和之前的实现前缀树的不同点在于
        // 之前是startWithPrefix判断树中是否有字符串的前缀为prefix；这次是要返回树中word的所有前缀，并且树节点的val存储树中字符串值
        Trie root = new Trie();
        for (String word : smalls) {
            root.insert(word);
        }
        // resMap存储每个smalls中word在target中的开始索引值们
        Map<String, List<Integer>> resMap = new HashMap();
        int n = big.length();
        for (int i = 0; i < n; i++) {
            String target = big.substring(i);
            List<String> prefixs = root.searchPrefixListOfTarget(target);
            for (String word : prefixs) {
                // 为啥getOrDefault没用
                // resMap.getOrDefault(word, new ArrayList()).add(i);
                if(!resMap.containsKey(word)){
                    resMap.put(word, new ArrayList<>());
                }
                resMap.get(word).add(i);
            }
        }

        // 把resMap转成对应的数组
        // 不能直接初始化res长度为new int[smalls.length][n] 不然就会被赋初始值0
        int[][] res = new int[smalls.length][];
        for (int i = 0; i < smalls.length; i++) {
            String word = smalls[i];
            List<Integer> indexs = resMap.get(word);
            if (indexs == null || indexs.size() == 0) {
                res[i] = new int[0];
                continue;
            }
            int j = 0;
            res[i] = new int[indexs.size()];
            for (int index : indexs) {
                res[i][j++] = index;
            }
        }
        return res;
    }

    class Trie {
        String val;
        Trie[] child;

        public Trie() {
            child = new Trie[26];
        }

        void insert(String word) {
            Trie cur = this;
            for (char c : word.toCharArray()) {
                if (cur.child[c - 'a'] == null) {
                    cur.child[c - 'a'] = new Trie();
                }
                cur = cur.child[c - 'a'];
            }
            // 节点中存储word字符串
            cur.val = word;
        }

        // 获取 在target中是前缀的当前树中的字符串
        List<String> searchPrefixListOfTarget(String target) {
            Trie cur = this;
            List<String> res = new ArrayList();
            for (char c : target.toCharArray()) {
                // 如果发现前缀不匹配了 直接break
                if (cur.child[c - 'a'] == null) {
                    break;
                }
                cur = cur.child[c - 'a'];
                if (cur.val != null && cur.val != "") {
                    res.add(cur.val);
                }
            }
            return res;
        }
    }
}

CathyShang commented 1 week ago

smalls构建前缀树，big子集遍历

import collections
class Trie:
    def __init__(self, words):
        self.d = {}
        # t = self.d 不能写在循环外面，一次到底了
        for word in words:
            t = self.d # 回到头部，回到字典嵌套第一层而不是尾层
            for w in word:
                if w not in t:
                    t[w] = {}
                t = t[w]
            t['end'] = word 
        # 不止一个根的前缀树

    def search(self, s):
        t = self.d
        res = []
        for w in s:
            if w not in t:
                break
            t = t[w]
            if 'end' in t:
                res.append(t['end'])
        return res

class Solution:
    def multiSearch(self, big: str, smalls: List[str]) -> List[List[int]]:
        node = Trie(smalls)
        dic = collections.defaultdict(list)

        for i in range(len(big)):
            match_words = node.search(big[i:])
            for word in match_words:
                dic[word].append(i)

        ans = []
        for word in smalls:
            ans.append(dic[word])
        return ans

时间复杂度: $O(n*m)$ n表示big的长度

空间复杂度: $O(m*k)$ m表示smalls的长度，k表示smalls中最长字符串的长度，最坏的情况时都匹配上

smallppgirl commented 1 week ago

class Trie:
    def __init__(self, words):
        self.d = {}
        for word in words:
            t = self.d
            for w in word:
                if w not in t:
                    t[w] = {}
                t = t[w]
            t['end'] = word

    def search(self, s):
        t = self.d
        res = []
        for w in s:
            if w not in t:
                break
            t = t[w]
            if 'end' in t:
                res.append(t['end'])
        return res

class Solution:
    def multiSearch(self, big: str, smalls: List[str]) -> List[List[int]]:
        trie = Trie(smalls)
        hit = collections.defaultdict(list)

        for i in range(len(big)):
            matchs = trie.search(big[i:])
            for word in matchs:
                hit[word].append(i)

        res = []
        for word in smalls:
            res.append(hit[word])
        return res

zhiyuanpeng commented 1 week ago

class Trie:
    def __init__(self, words):
        self.d = {}
        for word in words:
            t = self.d
            for w in word:
                if w not in t:
                    t[w] = {}
                t = t[w]
            t['end'] = word

    def search(self, s):
        t = self.d
        res = []
        for w in s:
            if w not in t:
                break
            t = t[w]
            if 'end' in t:
                res.append(t['end'])
        return res

class Solution:
    def multiSearch(self, big: str, smalls: List[str]) -> List[List[int]]:
        trie = Trie(smalls)
        hit = collections.defaultdict(list)

        for i in range(len(big)):
            matchs = trie.search(big[i:])
            for word in matchs:
                hit[word].append(i)

        res = []
        for word in smalls:
            res.append(hit[word])
        return res

Dtjk commented 1 week ago

class Solution { public: struct Trie{ int smallIndex; Trie next[26]; Trie(){ smallIndex=-1; memset(next,0,sizeof(next)); } }; vector<vector> res; Trie root=new Trie(); void insert(string s,int s_index){ Trie* node=root; for(auto ch:s){ if(node->next[ch-'a']==NULL){ node->next[ch-'a']=new Trie(); } node=node->next[ch-'a']; } node->smallIndex=s_index; }

void search(string subBig,int index){ 
    Trie* node=root;
    for(auto ch:subBig){
        if(node->next[ch-'a']==NULL) return;
        node=node->next[ch-'a'];
        if(node->smallIndex!=-1){ 
            res[node->smallIndex].push_back(index);
        }           
    }
}

vector<vector<int>> multiSearch(string big, vector<string>& smalls) {
    res.resize(smalls.size());
    for(int i=0;i<smalls.size();i++){ 
        insert(smalls[i],i);
    }
    for(int i=0;i<big.size();i++){ 
        string subBig=big.substr(i);
        search(subBig,i);
    }
    return res;
}

};

leetcode-pp / 91alg-13-daily-check

【Day 75 】2024-06-21 - 面试题 17.17 多次搜索 #76

面试题 17.17 多次搜索

入选理由

题目地址

前置知识

题目描述