mjumbewu / django-rest-framework-csv

CSV Tools for Django REST Framework
BSD 2-Clause "Simplified" License
364 stars 89 forks source link

Field order doesnt respected #31

Open aldarund opened 9 years ago

aldarund commented 9 years ago

When i have a serializer with a defined field list like this:

 class Meta:
     model = ExternalLink
     fields = ('date', 'linked_by', 'domain', 'trust_flow', 'citation_flow', 'backlink_number')

I expected the output of csv to be in the same order. But this doesnt happen.

dbkaplun commented 9 years ago

:+1:

munendrasn commented 9 years ago

@aldarund

# in tablize function
data.header = sorted(headers)
# change above statement to
data.header = headers # this solves half of your problem.

the main reason for order not being preserved is

    def flatten_list(self, l):
        flat_list = {}
        for index, item in enumerate(l):
            index = text_type(index)
            flat_item = self.flatten_item(item)
            nested_item = self.nest_flat_item(flat_item, index)
            flat_list.update(nested_item) # this line causes improper ordering
        return flat_list

    def flatten_dict(self, d):
        flat_dict = {}
        for key, item in d.items():
            key = text_type(key)
            flat_item = self.flatten_item(item)
            nested_item = self.nest_flat_item(flat_item, key)
            flat_dict.update(nested_item) # this line causes improper ordering
        return flat_dict

Since dictionary are based on hashMaps , it is unordered. If you really want to preserve the same order refer to this #27 . Is there a particular reason to maintain order??

freyley commented 9 years ago

:+1: Ordering fields is rather important to the output format - CSVs with fields randomly ordered are much less usable than CSVs with specified ordering.

dobestan commented 9 years ago

:+1:

mjumbewu commented 8 years ago

I agree that this would be ideal. It may be as simple as removing the sorted from the tablize method, and using collections.OrderDicts in the flatten_list, flatten_dict, and nest_flat_item methods. There will have to be some testing done though. If anyone has the bandwidth to test that this works (and has sane results in the case of nested serializers as well) then I would be more than happy to pull it in.

pymarco commented 8 years ago

I have solved this with the following subclass of CsvRenderer. It depends on more_itertools library.

# python
from collections import OrderedDict
from six import text_type

# contrib
from more_itertools import unique_everseen
from rest_framework_csv.renderers import CSVRenderer

class OrderedCsvRenderer(
    CSVRenderer,
):

    def tablize(self, data, header=None, labels=None):
        """
        Convert a list of data into a table.
        """
        if data:

            # First, flatten the data (i.e., convert it to a list of
            # dictionaries that are each exactly one level deep).  The key for
            # each item designates the name of the column that the item will
            # fall into.
            data = self.flatten_data(data)
            data.header = header or data.header

            # Get the set of all unique headers, and sort them (unless already provided).
            if not data.header:
                headers = []
                for item in data:
                    headers.extend(item.keys())

                unique_fields = list(
                    unique_everseen(headers)
                )

                ordered_fields = OrderedDict()
                for item in unique_fields:
                    field = item.split(".")
                    field = field[0]
                    if field in ordered_fields:
                        ordered_fields[field].append(item)
                    else:
                        ordered_fields[field] = [item]

                data.header = []
                for fields in ordered_fields.itervalues():
                    for field in fields:
                        data.header.append(field)

            # Create a row for each dictionary, filling in columns for which the
            # item has no data with None values.
            rows = []
            for item in data:
                row = []
                for key in data.header:
                    row.append(item.get(key, None))
                rows.append(row)

            # Return your "table", with the headers as the first row.
            if labels:
                return [[labels.get(x, x) for x in data.header]] + rows
            else:
                return [data.header] + rows

        else:
            return []

    def nest_flat_item(self, flat_item, prefix):
        """
        Given a "flat item" (a dictionary exactly one level deep), nest all of
        the column headers in a namespace designated by prefix.  For example:

         header... | with prefix... | becomes...
        -----------|----------------|----------------
         'lat'     | 'location'     | 'location.lat'
         ''        | '0'            | '0'
         'votes.1' | 'user'         | 'user.votes.1'

        """
        nested_item = OrderedDict()
        for header, val in flat_item.items():
            nested_header = self.level_sep.join([prefix, header]) if header else prefix
            nested_item[nested_header] = val
        return nested_item

    def flatten_list(self, l):
        flat_list = OrderedDict()
        for index, item in enumerate(l):
            index = text_type(index)
            flat_item = self.flatten_item(item)
            nested_item = self.nest_flat_item(flat_item, index)
            flat_list.update(nested_item)
        return flat_list

    def flatten_dict(self, d):
        flat_dict = OrderedDict()
        for key, item in d.items():
            key = text_type(key)
            flat_item = self.flatten_item(item)
            nested_item = self.nest_flat_item(flat_item, key)
            flat_dict.update(nested_item)
        return flat_dict
pymarco commented 8 years ago

For anyone interested I refactored tabilize in the class I pasted previously to generate user friendly column labels. Pasted below in case it helps. It fits my use case where fields may be named foo or foo_bar.

def tablize(self, data, header=None, labels=None):
    """
    Convert a list of data into a table.
    """
    if data:

        # First, flatten the data (i.e., convert it to a list of
        # dictionaries that are each exactly one level deep).  The key for
        # each item designates the name of the column that the item will
        # fall into.
        data = self.flatten_data(data)
        data.header = header or data.header

        # Get the set of all unique headers, and sort them (unless already provided).
        if not data.header:
            headers = []
            for item in data:
                headers.extend(item.keys())

            unique_fields = list(
                unique_everseen(headers)
            )

            ordered_fields = OrderedDict()
            for item in unique_fields:
                field = item.split(".")
                field = field[0]
                if field in ordered_fields:
                    ordered_fields[field].append(item)
                else:
                    ordered_fields[field] = [item]

            data.header = []
            for fields in ordered_fields.itervalues():
                for field in fields:
                    data.header.append(field)

        # Create a row for each dictionary, filling in columns for which the
        # item has no data with None values.
        rows = []
        for item in data:
            row = []
            for key in data.header:
                row.append(item.get(key, None))
            rows.append(row)

        # Return your "table", with the headers as the first row.
        # If labels not provided then generate user friendly labels
        if not labels:
            labels = {}
            for column in data.header:
                parts = column.split(".")
                label = parts[0].replace("_", " ").title()

                if len(parts) == 2:
                    label = "{0} - {1}".format(
                        label,
                        parts[1].replace("_", " ").title()
                    )
                elif len(parts) == 3:
                    label = "{0} #{1} - {2}".format(
                        label,
                        int(parts[1]) + 1,
                        parts[2].replace("_", " ").title()
                    )

                labels[column] = label

        return [[labels.get(x, x) for x in data.header]] + rows

    else:
        return []