tskit-dev / tskit

Population-scale genomics
MIT License
147 stars 69 forks source link

Add node_is_sample array #2917

Open jeromekelleher opened 3 months ago

jeromekelleher commented 3 months ago

This just took me way to long to do:

def node_is_sample(ts):
    sample_flag = np.full_like(ts.nodes_flags, tskit.NODE_IS_SAMPLE)
    return np.bitwise_and(ts.nodes_flags, sample_flag) != 0

Tests:


class TestNodeIsSample:
    def test_simple_example(self):
        ts = single_tree_example_ts()
        is_sample = model.node_is_sample(ts)
        for node in ts.nodes():
            assert node.is_sample() == is_sample[node.id]

    @pytest.mark.parametrize("bit", [1, 2, 17, 31])
    def test_sample_and_other_flags(self, bit):
         tables = single_tree_example_ts().dump_tables()
        flags = tables.nodes.flags
        tables.nodes.flags = flags | (1 << bit)
        ts = tables.tree_sequence()
        is_sample = model.node_is_sample(ts)
        for node in ts.nodes():
            assert node.is_sample() == is_sample[node.id]
            assert (node.flags & (1 << bit)) != 0

I'm not sure how it should be named etc, but we do want this array in some form available in the library. I guess it would be useful to have it take an array, so that it can be applied to the node table too, like

def is_sample(flags):
     sample_flag = np.full_like(flags, tskit.NODE_IS_SAMPLE)
     return np.bitwise_and(flags, sample_flag) != 0