leetcode819：最常见的单词（most_common_word_819）

carloscn commented 1 year ago

问题描述

给定一个段落 (paragraph) 和一个禁用单词列表 (banned)。返回出现次数最多，同时不在禁用列表中的单词。

题目保证至少有一个词不在禁用列表中，而且答案唯一。

禁用列表中的单词用小写字母表示，不含标点符号。段落中的单词不区分大小写。答案都是小写字母。

示例：

输入: paragraph = "Bob hit a ball, the hit BALL flew far after it was hit." banned = ["hit"] 输出: "ball" 解释: "hit" 出现了3次，但它是一个禁用的单词。 "ball" 出现了2次 (同时没有其他单词出现2次)，所以它是段落里出现次数最多的，且不在禁用列表中的单词。注意，所有这些单词在段落里不区分大小写，标点符号需要忽略（即使是紧挨着单词也忽略，比如 "ball,"）， "hit"不是最终的答案，虽然它出现次数更多，但它在禁用单词列表中。

提示：

1 <= 段落长度 <= 1000 0 <= 禁用单词个数 <= 100 1 <= 禁用单词长度 <= 10 答案是唯一的, 且都是小写字母 (即使在 paragraph 里是大写的，即使是一些特定的名词，答案都是小写的。) paragraph 只包含字母、空格和下列标点符号!?',;. 不存在没有连字符或者带有连字符的单词。单词里只包含字母，不会出现省略号或者其他标点符号。

来源：力扣（LeetCode）链接：https://leetcode.cn/problems/most-common-word

carloscn commented 1 year ago

问题分析

这个题的rust版本非常方便，使用闭包和迭代器很快就把输入的数组按照条件分开。而C语言，需要自己造轮子，一步步的对字符串进行拆分，最后转为字符串数组。

C语言版本

static inline char spec_convert(char a)
{
    if (('!' == a) ||
        ('?' == a) ||
        (';' == a) ||
        ('.' == a) ||
        (' ' == a) ||
        (',' == a) ||
        ('\'' == a)) {
        return '\0';
    } else if ((a < 'Z') || (a > 'A')) {
        return utils_conv_lowercase(a);
    }

    return a;
}

static bool is_in_bans(const char *in, STRLIST_T *banlist)
{
    size_t i = 0;
    size_t in_len = strlen(in);
    size_t len = strlist_get_size(banlist);

    if (in_len == 0) {
        return true;
    }

    for (i = 0; i < len; i ++) {
        if (strcmp(in, strlist_get_str_at(banlist, i)) == 0) {
            LOG("the in is %s, bans is %s\n", in, strlist_get_str_at(banlist, i));
            return true;
        }
    }

    return false;
}

static size_t max_count(STRLIST_T *list)
{
    char *target_str = NULL;
    size_t count = 0;
    size_t max_count = 0;
    size_t i = 0, j = 0;
    size_t index = 0;
    size_t sz = strlist_get_size(list);

    for (i = 0; i < sz; i ++) {
        target_str = strlist_get_str_at(list, i);
        for (j = 0; j <= i; j ++) {
            if (strcmp(target_str, strlist_get_str_at(list, j)) == 0) {
                count ++;
            }
        }
        if (count > max_count) {
            max_count = count;
            index = i;
        }
        count = 0;
    }

    return index;
}

static int32_t most_common_word(const char *paragraph, STRLIST_T *banlist, char *out)
{
    int32_t ret = 0;
    size_t in_len = 0;
    size_t i = 0;
    STRLIST_T *paralist = NULL;
    char *dup_para = NULL;
    char c = 0;
    char *dup_para_header = NULL;
    size_t max_index = 0;

    UTILS_CHECK_PTR(paragraph);
    UTILS_CHECK_PTR(banlist);
    UTILS_CHECK_PTR(out);
    UTILS_CHECK_LEN(in_len = strlen(paragraph));

    paralist = strlist_malloc();
    UTILS_CHECK_PTR(paralist);

    dup_para = strdup(paragraph);
    UTILS_CHECK_PTR(dup_para);

    dup_para_header = dup_para;
    for (i = 0; i < in_len; i ++) {
        c = spec_convert(dup_para[i]);
        dup_para[i] = c;
        if (c == '\0') {
            if (is_in_bans(dup_para_header, banlist) == false) {
                strlist_add(paralist, dup_para_header);
                LOG("add str to list: %s\n", dup_para_header);
            }
            dup_para_header = dup_para + i + 1;
        }
    }

    strlist_infolog(paralist);

    max_index = max_count(paralist);
    strcpy(out, strlist_get_str_at(paralist, max_index));

finish:
    strlist_free(paralist);
    UTILS_SAFE_FREE(dup_para);
    return ret;
}

Rust版本

fn most_common_word(paragraph:&String, banlist:&Vec<String>) -> Result<String, &'static str>
{
    let mut count:usize = 0;
    let mut max_count:usize = 0;
    let mut index:usize = 0;
    let mut max_index:usize = 0;
    let param:String = paragraph.to_lowercase();
    let mut param_vec:Vec<&str> =  param.split(' ')
        .map(|x:&str| {
            x.trim_end_matches(|a:char| ('!' == a) ||
                                        ('?' == a) ||
                                        (';' == a) ||
                                        ('.' == a) ||
                                        (' ' == a) ||
                                        (',' == a) ||
                                        ('\'' == a))
            }
        ).collect();
    param_vec = param_vec.iter().filter(|&x| !(*banlist).contains(&x.to_string())).cloned().collect();

    for e in &param_vec {
        for k in &param_vec {
            if **e == **k {
                count += 1;
            }
        }
        if count > max_count {
            max_count = count;
            max_index = index;
        }
        count = 0;
        index += 1;
    }

    Ok(param_vec[max_index].to_string())
}

carloscn commented 1 year ago

code

https://review.gerrithub.io/c/carloscn/structstudy/+/550162 https://github.com/carloscn/structstudy/commit/426025984c6981ab2f3fdbc8f41da551deb104a4

carloscn / structstudy