DocNow / twarc-csv

A plugin for twarc2 for converting tweet JSON into DataFrames and exporting to CSV.
MIT License
31 stars 10 forks source link

Geo Point coordinates not saved #39

Closed igorbrigadir closed 2 years ago

igorbrigadir commented 2 years ago

For example, a tweet like 1249702384659554308

"geo": {
        "coordinates": {
          "type": "Point",
          "coordinates": [
            42.77810097,
            88.01785747
          ]
        }

Doesn't get saved in the CSV.

Full tweet json:

{
  "data": [
    {
      "lang": "pl",
      "entities": {
        "urls": [
          {
            "start": 212,
            "end": 235,
            "url": "https://t.co/MZ9QlqGyfA",
            "expanded_url": "https://www.instagram.com/p/B-7ItQwBJ93/?igshid=187dqx4b5lu2y",
            "display_url": "instagram.com/p/B-7ItQwBJ93/…",
            "status": 200,
            "unwound_url": "https://www.instagram.com/p/B-7ItQwBJ93/?igshid=187dqx4b5lu2y"
          }
        ],
        "hashtags": [
          {
            "start": 24,
            "end": 39,
            "tag": "harrypotterdiy"
          },
          {
            "start": 40,
            "end": 72,
            "tag": "harrypotterandphilosophersstone"
          },
          {
            "start": 73,
            "end": 84,
            "tag": "potterhead"
          },
          {
            "start": 85,
            "end": 97,
            "tag": "harrypotter"
          },
          {
            "start": 98,
            "end": 115,
            "tag": "philosopherstone"
          },
          {
            "start": 116,
            "end": 131,
            "tag": "czasnaczytanie"
          },
          {
            "start": 132,
            "end": 144,
            "tag": "zostańwdomu"
          },
          {
            "start": 145,
            "end": 157,
            "tag": "zostanwdomu"
          },
          {
            "start": 158,
            "end": 164,
            "tag": "magic"
          },
          {
            "start": 165,
            "end": 177,
            "tag": "harrypotter"
          },
          {
            "start": 178,
            "end": 187,
            "tag": "funkopop"
          },
          {
            "start": 188,
            "end": 197,
            "tag": "bookpile"
          },
          {
            "start": 198,
            "end": 210,
            "tag": "bookstagram"
          }
        ]
      },
      "created_at": "2020-04-13T14:14:01.000Z",
      "public_metrics": {
        "retweet_count": 1,
        "reply_count": 0,
        "like_count": 0,
        "quote_count": 0
      },
      "reply_settings": "everyone",
      "text": "Za co lubicie Harry'ego?#harrypotterdiy #harrypotterandphilosophersstone #potterhead #harrypotter #philosopherstone #czasnaczytanie #zostańwdomu #zostanwdomu #magic #harrypotter #funkopop #bookpile #bookstagram… https://t.co/MZ9QlqGyfA",
      "possibly_sensitive": false,
      "geo": {
        "coordinates": {
          "type": "Point",
          "coordinates": [
            42.77810097,
            88.01785747
          ]
        }
      },
      "id": "1249702384659554308",
      "context_annotations": [
        {
          "domain": {
            "id": "66",
            "name": "Interests and Hobbies Category",
            "description": "A grouping of interests and hobbies entities, like Novelty Food or Destinations"
          },
          "entity": {
            "id": "1206704182717104128",
            "name": "Model figures"
          }
        },
        {
          "domain": {
            "id": "130",
            "name": "Multimedia Franchise",
            "description": "Franchises which span multiple forms of media like 'Harry Potter'"
          },
          "entity": {
            "id": "933033311844286464",
            "name": "Harry Potter",
            "description": "This entity includes all conversation about the franchise, as well as any individual installments in the series, if applicable.\t\t\t"
          }
        }
      ],
      "author_id": "2344192110",
      "conversation_id": "1249702384659554308",
      "source": "Instagram"
    }
  ],
  "includes": {
    "users": [
      {
        "name": "Kama",
        "username": "kamanonickname",
        "protected": false,
        "verified": false,
        "public_metrics": {
          "followers_count": 64,
          "following_count": 152,
          "tweet_count": 8743,
          "listed_count": 0
        },
        "created_at": "2014-02-14T22:26:08.000Z",
        "description": "There should be bio but Mróz is busy writing his 666th novel",
        "id": "2344192110",
        "url": "",
        "profile_image_url": "https://pbs.twimg.com/profile_images/1422798418645225472/cRbGyIvp_normal.jpg"
      }
    ]
  },
  "__twarc": {
    "url": "https://api.twitter.com/2/tweets?expansions=author_id%2Cin_reply_to_user_id%2Creferenced_tweets.id%2Creferenced_tweets.id.author_id%2Centities.mentions.username%2Cattachments.poll_ids%2Cattachments.media_keys%2Cgeo.place_id&tweet.fields=attachments%2Cauthor_id%2Ccontext_annotations%2Cconversation_id%2Ccreated_at%2Centities%2Cgeo%2Cid%2Cin_reply_to_user_id%2Clang%2Cpublic_metrics%2Ctext%2Cpossibly_sensitive%2Creferenced_tweets%2Creply_settings%2Csource%2Cwithheld&user.fields=created_at%2Cdescription%2Centities%2Cid%2Clocation%2Cname%2Cpinned_tweet_id%2Cprofile_image_url%2Cprotected%2Cpublic_metrics%2Curl%2Cusername%2Cverified%2Cwithheld&media.fields=alt_text%2Cduration_ms%2Cheight%2Cmedia_key%2Cpreview_image_url%2Ctype%2Curl%2Cwidth%2Cpublic_metrics&poll.fields=duration_minutes%2Cend_datetime%2Cid%2Coptions%2Cvoting_status&place.fields=contained_within%2Ccountry%2Ccountry_code%2Cfull_name%2Cgeo%2Cid%2Cname%2Cplace_type&ids=1249702384659554308",
    "version": "2.8.1",
    "retrieved_at": "2021-12-14T16:17:36+00:00"
  }
}

The CSV is missing the point coordinates.

igorbrigadir commented 2 years ago

My mistake, https://github.com/DocNow/twarc-csv/blob/main/dataframe_converter.py#L72-L73 this already exists so they are picked up already. I forgot where this bug report came from, but can't reproduce this, so i added a new test.