NewBornRustacean / mark

MARK; Morphological Analysis with Rust for Korean
2 stars 1 forks source link

와.. 챗지피티 개쩌네요 코드 바로 써줌ㄷㄷㄷ #12

Open NewBornRustacean opened 1 year ago

NewBornRustacean commented 1 year ago
use std::collections::{HashMap, VecDeque};

// Trie node structure
#[derive(Default)]
struct TrieNode {
    children: HashMap<char, usize>,
    failure: Option<usize>,
    output: Vec<String>,
}

// Aho-Corasick Automaton structure
struct AhoCorasick {
    trie: Vec<TrieNode>,
}

impl AhoCorasick {
    fn new() -> Self {
        AhoCorasick {
            trie: vec![TrieNode::default()],
        }
    }

    fn add_pattern(&mut self, pattern: String) {
        let mut current = 0; // Start from the root node
        for ch in pattern.chars() {
            let child = self.trie[current].children.entry(ch).or_insert_with(|| {
                self.trie.push(TrieNode::default());
                self.trie.len() - 1
            });
            current = *child;
        }
        self.trie[current].output.push(pattern);
    }

    fn build_failure_transitions(&mut self) {
        let mut queue = VecDeque::new();

        // Set failure transitions for depth-1 nodes
        for (_, &child) in &self.trie[0].children {
            self.trie[child].failure = Some(0);
            queue.push_back(child);
        }

        // Calculate failure transitions for other nodes
        while let Some(parent) = queue.pop_front() {
            for (&ch, &child) in &self.trie[parent].children {
                queue.push_back(child);
                let mut failure = self.trie[parent].failure.unwrap();

                while failure != 0 && !self.trie[failure].children.contains_key(&ch) {
                    failure = self.trie[failure].failure.unwrap();
                }

                if self.trie[failure].children.contains_key(&ch) {
                    failure = self.trie[failure].children[&ch];
                }

                self.trie[child].failure = Some(failure);
                self.trie[child].output.extend(&self.trie[failure].output);
            }
        }
    }

    fn search(&self, text: &str) -> Vec<&str> {
        let mut results = Vec::new();
        let mut current = 0; // Start from the root node

        for (i, ch) in text.chars().enumerate() {
            while current != 0 && !self.trie[current].children.contains_key(&ch) {
                current = self.trie[current].failure.unwrap();
            }

            if self.trie[current].children.contains_key(&ch) {
                current = self.trie[current].children[&ch];
            }

            for pattern in &self.trie[current].output {
                let start_index = i - pattern.len() + 1;
                results.push(&text[start_index..=i]);
            }
        }

        results
    }
}

fn main() {
    let mut ac = AhoCorasick::new();
    ac.add_pattern("he".to_string());
    ac.add_pattern("she".to_string());
    ac.add_pattern("his".to_string());
    ac.add_pattern("hers".to_string());

    ac.build_failure_transitions();

    let text = "ushers";
    let matches = ac.search(text);

    println!("Text: {}", text);
    println!("Matches: {:?}", matches);
}
NewBornRustacean commented 1 year ago

는 컴파일 에러ㅋㅋㅋㅋㅋㅋㅋㅋㅋ 아 다행이다 아직 개발 더 해먹을 수 있겠다