caesar0301 / treelib

An efficient implementation of tree data structure in python 2/3.
http://treelib.readthedocs.io/en/latest/
Other
800 stars 185 forks source link

get error when do Huffman Coding #225

Closed Freakwill closed 2 months ago

Freakwill commented 2 months ago

Get error: t.paste(level, tree) File "/usr/local/lib/python3.10/site-packages/treelib/tree.py", line 690, in paste raise ValueError("Duplicated nodes %s exists." % list(map(text, set_joint))) ValueError: Duplicated nodes ['()'] exists.

I am sure that the identifiers of nodes are unique.

#!/usr/bin/env python

from toolz import *
from treelib import *

import numpy as np

def merge(trees, level=()):
    """merge the trees to one tree by add a root

    Args:
        trees (list): list of trees or nodes
        level (tuple, optional): the prefix for identifier

    Returns:
        Tree
    """

    data = list(concat(tree.data['symbols'] if isinstance(tree, Node) else tree.get_node(tree.root).data['symbols'] for tree in trees))
    freq = sum(tree.data['frequence'] if isinstance(tree, Node) else tree.get_node(tree.root).data['frequence'] for tree in trees)
    t = Tree()
    root = Node(tag='', identifier=level, data={'symbols':data, 'frequence':freq, 'code':''})
    t.add_node(root)
    t.root = level
    for k, tree in enumerate(trees):
        if isinstance(tree, Node):
            tree.identifier = (k,) + tree.identifier
            tree.tag = tree.data['code'] = f'{k}' + tree.data['code']
            t.add_node(tree, parent=level)
        else:
            for n in tree.all_nodes_itr():
                n.identifier = (k,) + n.identifier
                n.tag = n.data['code'] = f'{k}' + n.data['code']
            t.paste(level, tree)
    return t

def huffman_tree(trees, level=()):
    """huffman coding

    Args:
        trees (list): list of trees or nodes
        level (tuple, optional): the prefix for identifier

    Returns:
        Tree
    """
    if len(trees) == 2:
        return merge(trees, level=level)
    else:
        k, l = np.argsort([tree.data['frequence'] for tree in trees])[:2]
        t = merge([trees[k], trees[l]], level=level)
        t = huffman_tree([t]+[trees[i] for i in range(len(trees)) if i !=k and i!=l], level=level)
        t.tag = 'root'
        return t

nodes =[Node(identifier=(), data={'symbols':'X', 'frequence':1, 'code':''}), Node(identifier=(), data={'symbols':['A','C'], 'frequence':3, 'code':''}), Node(identifier=(), data={'symbols':'B', 'frequence':4, 'code':''})]
t = merge([nodes[0],nodes[1]], level=())

# for n in t.all_nodes_itr():
#     print(n)

t = merge([t, nodes[2]])

print(t)

for n in t.all_nodes_itr():
    print(n.data, n.identifier)