amueller / gco_python

Python wrappers for GCO alpha-expansion and alpha-beta-swaps
137 stars 65 forks source link

Segfault in cut_from_graph with specific inputs #12

Open Erotemic opened 8 years ago

Erotemic commented 8 years ago

(I'm not sure if this is the correct repo to submit this issue. There seem to be 3 versions of the rpo out there, and I don't see an original repo for GCO itself)

I've found a specific set of inputs that causes a segfault in pygco.cut_from_graph.

I wrote a script to demonstrate this and attempt to work down the case to a minimal cause. Unfortunately I've been unable to identify the cause.

However, I did find one (unrelated) easy to fix segfault that happens when you specify an edge index that is out of bounds. This is also in the test script.

The script defines a function for each test case, and then runs them in the main part. There are several tests for what I thought might be causing the issue, but those ideas turn out to be wrong.

The final test is the smallest version of the error I could reproduce. In this test, removing any of the edges suppresses the error. Then when running with all edges the segfault occurs.


import numpy as np
import pygco

def original_data():
    # Original data that cause segfault
    unary_cost = np.array([[0, 0],
                           [0, 0],
                           [0, 0],
                           [0, 0],
                           [0, 0],
                           [0, 0],
                           [0, 0],
                           [0, 0],
                           [0, 0],
                           [0, 0]], dtype=np.int32)

    pairwise_cost = np.array([[-1,  0],
                              [ 0, -1]], dtype=np.int32)

    edges = np.array([[  0,   5, -18],
                      [  0,   6, -19],
                      [  0,   8, -20],
                      [  0,   3, -18],
                      [  0,   1, -21],
                      [  1,   4, -20],
                      [  1,   6, -17],
                      [  1,   7, -20],
                      [  1,   8, -17],
                      [  1,   3,  -1],
                      [  2,   4,   6],
                      [  2,   7, -14],
                      [  2,   1,   0],
                      [  3,   4, -21],
                      [  3,   6, -18],
                      [  3,   8, -20],
                      [  4,   5,   0],
                      [  4,   1,   0],
                      [  5,   4,  24],
                      [  5,   7, -15],
                      [  5,   8,  -2],
                      [  5,   3, -18],
                      [  6,   5,  24],
                      [  6,   4,  -8],
                      [  6,   0,   0],
                      [  6,   8,  24],
                      [  6,   2,  11],
                      [  7,   3,   0],
                      [  7,   4,  14],
                      [  7,   5,   0],
                      [  7,   6,   0],
                      [  7,   8,  29],
                      [  7,   9,  -4],
                      [  8,   4,  15],
                      [  8,   6,   0],  # REMOVING THIS ROW WILL ALSO REMOVE THE SEGFAULT
                      [  8,   2,  19],
                      [  8,   9,   8],
                      [  9,   4,  -5],
                      [  9,   6,  -6],
                      [  9,   2,   2],
                      [  9,   3,   0]], dtype=np.int32)
    return unary_cost, pairwise_cost, edges

def original_case_segfault():
    print('--- TESTING ORIGINAL CASE ---')
    unary_cost, pairwise_cost, edges = original_data()
    cutkw = {'algorithm': 'expansion', 'n_iter': 5}

    # This will sefault.
    print('About to segfault on the original case.')
    labeling = pygco.cut_from_graph(edges, unary_cost, pairwise_cost,
                                    **cutkw)
    print('labeling = %r' % (labeling,))

def original_case_fix():
    print('--- TESTING FIX ORIGINAL CASE ---')
    unary_cost, pairwise_cost, edges = original_data()
    # Try changing to upper triangular
    edge_utri = edges.copy()
    flag = edge_utri.T[0] > edge_utri.T[1]
    edge_utri[flag, 0:2] = edge_utri[flag].T[0:2][::-1].T
    edge_utri = edge_utri[np.lexsort(edge_utri.T[::-1])]
    edge_utri = np.ascontiguousarray(edge_utri)

    # Removing spurious 0 weighted egdges seems to fix it.
    # BUT IT TURN SOUT THIS IS ONLY A SIDE EFFECT
    from collections import defaultdict  # NOQA
    uv_list = edge_utri.T[0:2].T
    groups = defaultdict(list)
    for idx, uv in enumerate(uv_list):
        val = edge_utri[idx, 2]
        # Ah there were duplicates with zero values!
        if val != 0:
            group = groups[tuple(uv.tolist())]
            # Removing them fixes it.
            group.append(val)
            assert len(group) == 1, 'should only ever add to a group once'
        else:
            print('Removed uv = %r' % (uv,))

    edges_fixed = np.array([[u, v, val_[0]] for (u, v), val_ in groups.items()])
    edges_fixed = edges_fixed[np.lexsort(edges_fixed.T[::-1])].astype(np.int32)
    edges_fixed = np.ascontiguousarray(edges_fixed)

    #print('edge_utri =\n%r' % (edge_utri,))
    #print('edges_fixed =\n%r' % (edges_fixed,))

    labeling = pygco.cut_from_graph(edges_fixed, unary_cost, pairwise_cost)
    print('labeling = %r' % (labeling,))

# VERYIFY MINIMAL TEST CASE
def test_zero_duplicate_idea():
    print('--- TESTING ZERO DUPLICATE IDEA (Not the cause) ---')

    unary_cost = np.array([[0, 0],
                           [0, 0]], dtype=np.int32)

    pairwise_cost = np.array([[-1,  0],
                              [ 0, -1]], dtype=np.int32)

    edges = np.array([[  0,   1, -100]], dtype=np.int32)

    labeling = pygco.cut_from_graph(edges, unary_cost, pairwise_cost)

    print('If my idea that having a duplicate edge with a 0 weight caused'
          ' segfaults was right then this would segfault'
          ' but it does not')

    edges = np.array([[  0,   1, -100],
                      [  1,   0,  0]], dtype=np.int32)

    labeling = pygco.cut_from_graph(edges, unary_cost, pairwise_cost)

    edges = np.array([[  0,   1, 100],
                      [  0,   1,  0]], dtype=np.int32)

    labeling = pygco.cut_from_graph(edges, unary_cost, pairwise_cost)
    print('labeling = %r' % (labeling,))
    print('... This idea is not the problem!')

def test_impossible_case_idea():
    print('--- TESTING IMPOSSIBLE CASE IDEA ---')
    unary_cost = np.array([[0, 0],
                           [0, 0],
                           [0, 0]], dtype=np.int32)

    pairwise_cost = np.array([[-1,  0],
                              [ 0, -1]], dtype=np.int32)

    edges = np.array([[  0,   1, -100],
                      [  0,   2, -100]], dtype=np.int32)
    labeling = pygco.cut_from_graph(edges, unary_cost, pairwise_cost)

    edges = np.array([[  0,   1, -100],
                      [  2,   0, -100],
                      [  0,   2, -100]], dtype=np.int32)
    labeling = pygco.cut_from_graph(edges, unary_cost, pairwise_cost)
    print('labeling = %r' % (labeling,))
    print('... This idea is not the problem!')

def unrelated_segfault_case_edge_idx_out_of_bounds():
    print('--- TESTING (UNRELATED) EDGE IDX OUT OF BOUNDS ---')
    unary_cost = np.array([[0, 0],
                           [0, 0]], dtype=np.int32)

    pairwise_cost = np.array([[-1,  0],
                              [ 0, -1]], dtype=np.int32)

    edges = np.empty((0, 3), dtype=np.int32)
    print('Initially works')
    labeling = pygco.cut_from_graph(edges, unary_cost, pairwise_cost)
    print('labeling = %r' % (labeling,))
    print('Add an out of bounds edge and it segfaults')
    edges = np.array([[1, 3]], dtype=np.int32)
    print('ABOUT TO SEGFAULT!')
    labeling = pygco.cut_from_graph(edges, unary_cost, pairwise_cost)

def minimal_case_noclue_why():
    print('--- TESTING MINIMAL SEGFAULT CASE ---')
    unary_cost = np.array([[0, 0],
                           [0, 0],
                           [0, 0],
                           [0, 0],
                           [0, 0],
                           [0, 0],
                           [0, 0],
                           [0, 0],
                           [0, 0],
                           [0, 0]], dtype=np.int32)

    pairwise_cost = np.array([[-1,  0],
                              [ 0, -1]], dtype=np.int32)

    print('removing any one of these edges will stop the segfault')
    edges = np.array([[  0,   6, -19],
                      [  1,   6, -17],
                      [  3,   6, -18],
                      [  6,   5,  24],
                      [  6,   4,  -8],
                      [  6,   0,   0],
                      [  6,   8,  24],
                      [  6,   2,  11],
                      [  7,   6,   0],
                      [  8,   6,   0],
                      [  9,   6,  -6]], dtype=np.int32)

    cutkw = {'algorithm': 'expansion', 'n_iter': 5}

    print('Testing by removing each edge.')
    flags = np.ones(len(edges), dtype=np.int)
    for idx in range(len(edges)):
        flags[idx] = 0
        edges_ = edges.compress(flags, axis=0)
        labeling = pygco.cut_from_graph(edges_, unary_cost, pairwise_cost,
                                        **cutkw)
        print('labeling = %r' % (labeling,))

    # This will sefault.
    print('BUT when all edges are included it will segfault')
    print('About to segfault. I dont know why.')
    labeling = pygco.cut_from_graph(edges, unary_cost, pairwise_cost,
                                    **cutkw)
    print('labeling = %r' % (labeling,))

if __name__ == '__main__':

    if False:
        # Test the original case
        original_case_segfault()
    else:
        print('skipping original segfault case')

    # I found a simple fix to the original case that does not alter it
    original_case_fix()

    # I had ideas as to what cause the segfaults but they were wrong
    test_zero_duplicate_idea()
    test_impossible_case_idea()

    if False:
        # Found a SECOND simple segfault case, but probably unrelated
        unrelated_segfault_case_edge_idx_out_of_bounds()
    else:
        print('skipping second segfault case')

    if 1:
        # This is a minimal version of the original case
        minimal_case_noclue_why()
    else:
        print('skipping minimal error case')
amueller commented 8 years ago

Hey. If this is a bug in the cython code, this is the right repo (what other versions are there?). If it is a bug in gco, it is not, and you should write to the original authors. I don't have a bunch of time to investigate this right now. Have you tried debugging it with gdb?