acrookston / ACRAutoComplete

A text auto-complete library written in Swift for iOS using the trie data structure
MIT License
13 stars 3 forks source link

it is not fast enough for HackerRank challenge #2

Closed elozoya closed 7 years ago

elozoya commented 7 years ago

I am learning about the Trie Data Structure because every day I am trying to solve some challenges on HackerRank website, and I found an interesting problem called: "Tries: Contacts". This is the link: https://www.hackerrank.com/challenges/ctci-contacts

I am still solving this problem using Swift. I tried to use your code, and it gives the right answers, but it is not fast enough. I think it would be interesting for you to make your library faster.

This is the code that I used.

// Enter your code here 

// Library
import Foundation

public protocol Searchable : Hashable {
    func keywords() -> [String]
}

open class AutoComplete<T : Searchable> {

    var nodes : [Character : AutoComplete<T>]?
    var items  : [T]?

    public init() { }

    public func insert(_ object: T) {
        for string in object.keywords() {
            var tokens =  tokenize(string)
            var at = 0
            var max = tokens.count
            insert(&tokens, at: &at, max: &max, object: object)
        }
    }

    private func insert(_ tokens: inout [Character], at: inout Int, max: inout Int, object: T) {

        if at < max {

            let current = tokens[at]
            at += 1

            if nodes == nil {
                nodes = [Character : AutoComplete<T>]()
            }

            if nodes![current] == nil {
                nodes![current] = AutoComplete<T>()
            }

            nodes![current]!.insert(&tokens, at: &at, max: &max, object: object)

        } else {
            if items == nil {
                items = [T]()
            }
            items!.append(object)
        }
    }

    public func insert(set: [T]) {
        for object in set {
            insert(object)
        }
    }

    public func search(_ string: String) -> [T] {
        var mergedResults : Set<T>?

        for word in string.components(separatedBy: " ") {
            var wordResults = Set<T>()
            var tokens = tokenize(word)
            find(&tokens, into: &wordResults)
            if mergedResults == nil {
                mergedResults = wordResults
            } else {
                mergedResults = mergedResults!.intersection(wordResults)
            }
        }

        return mergedResults == nil ? [] : Array(mergedResults!)
    }

    func insertAll(into results: inout Set<T>) {
        if let items = items {
            for t in items {
                results.insert(t)
            }
        }

        guard let nodes = nodes else {
            return
        }

        for (_, child) in nodes {
            child.insertAll(into: &results)
        }
    }

    private func find(_ tokens : inout [Character], into results: inout Set<T>) {
        guard tokens.count > 0 else {
            insertAll(into: &results)
            return
        }

        guard let nodes = nodes else {
            return
        }

        let current = tokens.remove(at: 0)

        nodes[current]?.find(&tokens, into: &results)
    }

    private func tokenize(_ string: String) -> [Character] {
        return Array(string.lowercased().characters)
    }
}

class Word : Searchable {

    var word : String

    init(word: String) {
        self.word = word
    }

    func keywords() -> [String] {
        return [word]
    }
}

extension Word : Hashable {
    var hashValue: Int { return word.hashValue }

    static func == (lhs: Word, rhs: Word) -> Bool {
        return lhs.word == rhs.word
    }
}

// Code
let N = Int(readLine()!)!
let autocomplete = AutoComplete<Word>()

for i in 0..<N {
    let line = readLine()!.characters.split(separator: " ").map{ String($0) }
    let action = line[0]
    let parameter = line[1]
    if action == "add" {
        autocomplete.insert(Word(word: parameter))
    } else if action == "find" {
        print(autocomplete.search(parameter).count)
    }
}
acrookston commented 7 years ago

Cool. That's really interesting. How did you measure speed?

I realize I didn't do a textbook implementation of a Trie structure, I hadn't seen one in Swift before so I was adapting from other languages for the use case I had.

Since then I've found this repo with lots of algorithms and data structures; https://github.com/raywenderlich/swift-algorithm-club/tree/master/Trie Not sure if it's the same solution as you provided but figured I would let you know about it.

I'll take a look a closer look at your stuff when I've got some more time. Thanks.

elozoya commented 7 years ago

I don't know how to discover the measure speed on HackerRank, but it is easy to measure your code using XCTest. I solved that challenge using c++, but I still can't do it using Swift. I have tried different algorithms, but for some reason they are not fast enough.

I saw that repo (https://github.com/raywenderlich/swift-algorithm-club/tree/master/Trie), but that Trie class does not have implemented a function to get the number of words with a common prefix.

My best solution is this one (it only got 19.23 out of 50 points):

// My Trie Implementation
class TrieNode {
    var words:Int
    var prefixes:Int
    var edges:[Character:TrieNode]

    init() {
        words = 0
        prefixes = 0
        edges = [:]
    }
}

func addWord(node: TrieNode, word: String) {
    var n = node
    for c in word.characters {
        n.prefixes += 1
        if n.edges[c] == nil {
            n.edges[c] = TrieNode()
        }
        n = n.edges[c]!
    }
    n.words += 1
}

func countPrefixes(node: TrieNode, prefix:String) -> Int {
    var w = prefix
    if w.isEmpty {
        return node.prefixes + node.words
    }
    let c = w.remove(at: w.startIndex)
    if node.edges[c] == nil {
        return 0
    }
    return countPrefixes(node: node.edges[c]!, prefix: w)
}

// Code
let N = Int(readLine()!)!
let root = TrieNode()

for i in 0..<N {
    let line = readLine()!.characters.split(separator: " ").map{ String($0) }
    let action = line[0]
    var parameter = line[1]
    if action == "add" {
        addWord(node: root, word: parameter)
    } else if action == "find" {
        print(countPrefixes(node: root, prefix: parameter))
    }
}
acrookston commented 7 years ago

I'm going to close this because the purpose of the library is not to be fast at HackerRank. I've tried gutting the library down to the very basics and tried everything but a linked list trie implementation. Maybe it's just the way HR implements Swift that makes it slow? When I run test case #2 locally I got it down to 37seconds with this implementation, which obviously is tailored to solving the problem, not the problem I was solving with the library.

// Library
import Foundation

open class AutoComplete {

    var character : Character?
    var children  : [AutoComplete]?
    var items     : [String]?

    public init(_ char: Character?=nil) {
        character = char
    }

    public func insert(_ string: String) {
        var tokens =  tokenize(string)
        var at = 0
        var max = tokens.count
        insert(&tokens, at: &at, max: &max, object: string)
    }

    private func insert(_ tokens: inout [Character], at: inout Int, max: inout Int, object: String) {

        if at < max {

            let current = tokens[at]
            at += 1

            if children == nil {
                children = [AutoComplete]()
            }

            for child in children! {
                if child.character == current {
                    child.insert(&tokens, at: &at, max: &max, object: object)
                    return
                }
            }

            // Only here if not in children already
            let child = AutoComplete(current)
            child.insert(&tokens, at: &at, max: &max, object: object)
            children!.append(child)

        } else {
            if items == nil {
                items = [String]()
            }
            items!.append(object)
        }
    }

    public func search(_ string: String) -> Int {
        var results = 0
        var tokens = tokenize(string)
        var at = 0
        var max = tokens.count
        find(&tokens, at: &at, max: &max, into: &results)
        return results
    }

    func insertAll(into results: inout Int) {
        if let items = items {
            results += items.count
        }

        guard let children = self.children else {
            return
        }

        for child in children {
            child.insertAll(into: &results)
        }
    }

    private func find(_ tokens: inout [Character], at: inout Int, max: inout Int, into results: inout Int) {
        if at >= max {
            insertAll(into: &results)

        } else {

            guard let children = self.children else {
                return
            }

            let current = tokens[at]
            at += 1

            for child in children {
                if child.character == current {
                    child.find(&tokens, at: &at, max: &max, into: &results)
                    return
                }
            }
        }
    }

    private func tokenize(_ string: String) -> [Character] {
        return Array(string.characters)
    }
}

// Execution
let autocomplete = AutoComplete()

func handleLine(_ line: String) {
    let pair = line.components(separatedBy: " ")
    if pair.first == "add" {
        autocomplete.insert(pair[1])
    } else if pair.first == "find" {
        print(autocomplete.search(pair[1]))
    }
}

while let line = readLine() {
  handleLine(line)
}