RubyLouvre / algorithmbook

没有经过整理的知识才是徒然浪费时间,伤透脑筋!
109 stars 67 forks source link

后缀树 #15

Open RubyLouvre opened 5 years ago

RubyLouvre commented 5 years ago
 var count = 0
    function STrie(suffix){
       this.children = {}
       this.suffix =  suffix
       this.count = count++
    }
    class SuffixTire{
      constructor(str){
        var t = null;
        for(var i  = 0; i < str.length; i++){
          t = this.insert(t, str[i])
        }
        return this.getRoot(t)
      }
      insert(top, c){
        if(top == null){
          top = new STrie()
        }
        var node = top
        var newnode = new STrie()
        while( node && !(c in node.children)){
          newnode.suffix =  node.children[c] = new STrie(node)
          newnode = node.children[c]
          node = node.suffix
        }
        if(node){
          newnode.suffix =  node.children[c] 
        }
        return top.children[c]
      }
      getRoot(node){
        while(node.suffix){
          node = node.suffix
        }
        return node
      }
    }
    var a = new SuffixTire('cacao')
     console.log(a)
RubyLouvre commented 5 years ago
//由后向前构建
         class SuffixNode {
                constructor(s) {
                    this.children = [];
                    this.value = s;
                }
            }

            class SuffixTree {
                constructor() {
                    this.root = new SuffixNode("");
                }
                insert(word, symbol) {
                    var nodes = this.root.children;
                    while (word) {
                        this.addNodeByPrefix(nodes, word);
                        word = word.slice(1);
                    }
                    this.addEndSymbol(nodes, symbol || '')
                }
                addEndSymbol(nodes, symbol){
                  for (var i = 0; i < nodes.length; i++) {
                    var node = nodes[i];
                    if(node.isEnd){
                      node.value += symbol
                    }else if(node.children.length){
                      this.addEndSymbol(node.children, symbol)
                    }
                  }
                }
                addNodeByPrefix(nodes, word) {
                    var add = true;
                    for (var i = 0; i < nodes.length; i++) {
                        var node = nodes[i];
                        if (node.value[0] == word[0] ) {
                            //如果存在共同前缀
                            add = false;
                            var prefix = "";
                            for (var j = 0; j < word.length; j++) {
                                if (node.value[j] != word[j]) {
                                    break;
                                }
                            }
                            prefix = word.slice(0, j);
                            var valueLen = node.value.length  
                            if (prefix === word) {
                                //情况一
                                //word完全是node.value的一个前缀,如ana与anana
                                var cur = new SuffixNode(prefix );
                                console.log("创建节点的value", prefix);
                                console.log(
                                    "修改节点的value",
                                    node.value,
                                    "为",
                                    node.value.slice(j)
                                );
                                nodes[i] = cur; //反客为主
                                node.value = node.value.slice(j);
                                cur.children.push(node);
                             } else if (j === valueLen && word.length > valueLen) {
                                //情况三
                                //node.value比word短,全部相等,  如ab与abcd
                                console.log('修改节点的value', node.value, '为', prefix )
                                node.value = word.slice(0, j) 
                                this.addNodeByPrefix(node.children, word.slice(j))
                            } else {
                                //情况二
                                //node.value比word长, 局部相等, 如abcabxabcd与abxabcd
                                var cur = new SuffixNode(prefix );
                                nodes[i] = cur; //反客为主
                                console.log("创建节点的value", prefix );
                                console.log(
                                    "修改节点的value",
                                    node.value,
                                    "为",
                                    node.value.slice(j)
                                );
                                node.value = node.value.slice(j);
                                cur.children.push(node);
                                console.log(
                                    "添加另一个子节点",
                                    word.slice(j) 
                                );
                                cur.children.push(
                                    new SuffixNode(word.slice(j) )
                                );
                            }
                        }
                    }
                    if (add) {
                        console.log("没有前缀,添加新节点", word);
                        var node = new SuffixNode(word)
                        node.isEnd = true
                        nodes.push(node);
                    }
                }
            }
            var t = new SuffixTree()
            t.insert("abcabxabcd", '#');
            console.log(t)
RubyLouvre commented 5 years ago
 class Node{
         constructor(node){
             this.suffix = node
             this.children = {}
         }
     }
     class STree{

         insert(str){
            this.str = str
            this.len = str.length +1000
            this.root = new Node(null)
            var node = t.root, n = 0;
           for(var i = 0; i < str.length; i++){
            [node, n] = this.update(node, n, i);
            [node, n] = this.canonize(node, n, i) 
           }

         }
         update(node, left, right){
             var c = this.str[right] //当前字符
             var prev = new Node;
             while(true){
                 var [finish, p] = this.branch(node, left, right-1, c);
                 if(finish){
                     break
                 }
                 p.children[c] =  [right, t.len, new Node]
                 prev.suffix = p;
                 prev = p;
                 [node, left] = this.canonize(node.suffix, left, right-1 );
             }
             prev.suffix = node;
             return [node, left]
          }
          branch(node, left, right, ch){
              if(right-left+1 <= 0){
                  if(!node){
                      return [true, this.root]
                  }else {
                      return [ch in node.children, node]
                  }
              }else{
                 // console.log(this.str, left, 'xxx')
                  var [ll, rr, child] = node.children[this.str[left]];
                  var pos = ll + right - left + 1;
                  if(this.str[pos] == ch){
                      return [true, node]
                  }else{
                      var branchNode = new Node;
                      node.children[this.str[ll]] = [ll, pos -1, branchNode];
                      branchNode.children[ this.str[pos] ] = [pos, rr, child];
                      return [false, branchNode]
                  }
              }
          }
          canonize(node, left, right){

            if(!node){
                if(right-left+1 <= 0){
                    return [null, left]
                }else{
                   // console.log(this.root, left+1, right, 'end')
                    return this.canonize(this.root, left+1, right)
                }
            }
            while(left <= right){
               // console.log(this.str, left, 'xxx')
                var [ll, rr, child] = node.children[this.str[left]]
                if(right-left >=  rr - ll){
                    left += rr - ll+1;
                    node = child
                }else{
                    break
                }
            }
            return [node, left]
          }
          findPatten(s){
              var node = this.root;
              while(1){
                 var match = false;
                 for(var i in node.children){
                     var [ll, rr, child] =  node.children[i];
                     var edge = this.str.slice(ll, rr+1);
                     console.log(edge, child)
                     /*
                     if(edge.indexOf(s) === 0){
                     return Math.max( Object.keys(child.children), 1  )
                     }else if(s.indexOf(edge) === 0){
                         match = true
                         node = child;
                         s = s.slice(edge.length)
                     }*/
                 }
                 if(!match){
                     return 0
                 }
              }
              return 0
          }
     }
     var t = new STree;
     t.insert('javaxjava')
     console.log(t)
     console.log(t.findPatten('java'))
RubyLouvre commented 5 years ago

解决复杂的分叉

 var INACTIVE = 'INACTIVE'
      var ACTIVE = 'ACTIVE'
      var FORK = 'FORK'
      class SuffixNode {
        constructor(value) {
          this.value = value
          this.splitLength = 0;
          this.status = INACTIVE;
          this.children = []
        }
      }
      class SuffixTree {
        constructor() {
          this.root = new SuffixNode('');
        }
        insert(word, sign) {
          sign = sign || '#'
          var nodes = this.root.children;
          word += sign;
          for (var i = 0; i < word.length; i++) {
            console.log("即将插入", word[i], '==============')
            this.addNodeByChar(nodes, word[i], 0)
          }
        }

        addNodeByChar(nodes, ch, level) {
          var add = true;
          if (level > 0) {
            add = false
          }
          for (var i = 0; i < nodes.length; i++) {
            var node = nodes[i]
            var isMatch = node.value[node.splitLength] === ch
            //如果没有分叉,可以修改value

            if (node.status !== FORK) {
              console.log(node.value, '后跟新字符', node.value + ch, node.status)
              node.value += ch;

            }
            if (!isMatch) {
              if (node.status == FORK) {
                if (node.splitLength == node.value) {
                  node.splitLength = 0
                } else {
                  if (node.splitLength) {//第二次分叉
                    var value = node.value;
                    var parent = new SuffixNode(value.slice(0, node.splitLength))//变成父节点
                    nodes[i] = parent;
                    parent.status = FORK
                    parent.children.push(node, new SuffixNode(''))

                    node.value = value.slice(node.splitLength)

                    node.splitLength = 0;
                    node = parent;
                  }
                }
              }

              if (node.status == ACTIVE) {//第一次分叉
                var value = node.value;
                var parent = new SuffixNode(value.slice(0, node.splitLength))//变成父节点
                nodes[i] = parent;
                parent.status = FORK
                parent.children.push(node, new SuffixNode(''))

                node.value = value.slice(node.splitLength, -1)
                node.status = INACTIVE;

                node.splitLength = 0;
                node = parent;
              }
            } else { //命中了,就不用在根节点中添加新节点
              add = false;
              node.splitLength++
              if (node.status === INACTIVE) {
                node.status = ACTIVE; //准备分叉
              }
            }
            //如果有孩子,可以继续传递ch
            if (node.children.length) {
              this.addNodeByChar(node.children, ch, level + 1)
            }
          }
          if (add) {
            if (ch == '#') {
              if (level > 0) {
                return
              }
            }
            console.log(`第${level}层添加新节点 ${ch}`)
            nodes.push(new SuffixNode(ch))
          }
        }
        toString() {
          return getChildren(this.root.children)
        }
      }

      function getChildren(childfren) {
        var array = []
        for (var i = 0; i < children; i++) {
          array.push(node.value + getChildren(node.childfren))
        }
        if (array.length) {
          return '[' + array.join('\n') + ']'
        }
        return ''

      }
      var tree = new SuffixTree();
      //tree.insert("banana");
      tree.insert("abcabxa");
      console.log(tree)
RubyLouvre commented 5 years ago

https://www.fogsail.net/2019/03/06/20190306/

https://github.com/maclandrol/SuffixTreeJS