Open SOBAN50 opened 1 year ago
I don't understand how this error does not occur in GitHub Actions.
I think Instagram have changed their HTML structure. I see now that the data is fetched in background using polaris_preload
. I have verified the source code of a package called instagramy
which also does the same thing. This package also unable to fetch the user data.
Next Steps: (I feel second option is better, since url won't change that frequently) Option1: find a way to scrape the polaris_preload URL. Option2: Use default profile url which won't change usually to fetch the data directly Example: (Using my profile, since personal profiles have less data) URL: https://www.instagram.com/api/v1/users/web_profile_info/?username=fayaz._.shaik response:
{
"data": {
"user": {
"ai_agent_type": null,
"biography": "",
"bio_links": [],
"fb_profile_biolink": null,
"biography_with_entities": {
"raw_text": "",
"entities": []
},
"blocked_by_viewer": false,
"restricted_by_viewer": null,
"country_block": false,
"eimu_id": "113766196687272",
"external_url": null,
"external_url_linkshimmed": null,
"edge_followed_by": {
"count": 409
},
"fbid": "17841400113543979",
"followed_by_viewer": false,
"edge_follow": {
"count": 805
},
"follows_viewer": false,
"full_name": "Fayaz Shaik",
"group_metadata": null,
"has_ar_effects": false,
"has_clips": true,
"has_guides": false,
"has_channel": false,
"has_blocked_viewer": false,
"highlight_reel_count": 0,
"has_requested_viewer": false,
"hide_like_and_view_counts": false,
"id": "1534659784",
"is_business_account": false,
"is_professional_account": false,
"is_supervision_enabled": false,
"is_guardian_of_viewer": false,
"is_supervised_by_viewer": false,
"is_supervised_user": false,
"is_embeds_disabled": false,
"is_joined_recently": false,
"guardian_id": null,
"business_address_json": null,
"business_contact_method": "UNKNOWN",
"business_email": null,
"business_phone_number": null,
"business_category_name": null,
"overall_category_name": null,
"category_enum": null,
"category_name": null,
"is_private": true,
"is_verified": false,
"is_verified_by_mv4b": false,
"is_regulated_c18": false,
"edge_mutual_followed_by": {
"count": 0,
"edges": []
},
"pinned_channels_list_count": 0,
"profile_pic_url": "https://scontent-sin6-4.cdninstagram.com/v/t51.2885-19/339953276_6244685068926228_2785093265688359005_n.jpg?stp=dst-jpg_s150x150\u0026_nc_ht=scontent-sin6-4.cdninstagram.com\u0026_nc_cat=100\u0026_nc_ohc=3OPu0GWTmAQAX9xxKNm\u0026edm=AOQ1c0wBAAAA\u0026ccb=7-5\u0026oh=00_AfBzpEfq4BsnKsI_pX-gvzgF65YJQ7gX3CJ21gNcK5GM4A\u0026oe=6533C3E8\u0026_nc_sid=8b3546",
"profile_pic_url_hd": "https://scontent-sin6-4.cdninstagram.com/v/t51.2885-19/339953276_6244685068926228_2785093265688359005_n.jpg?stp=dst-jpg_s320x320\u0026_nc_ht=scontent-sin6-4.cdninstagram.com\u0026_nc_cat=100\u0026_nc_ohc=3OPu0GWTmAQAX9xxKNm\u0026edm=AOQ1c0wBAAAA\u0026ccb=7-5\u0026oh=00_AfAea6z3LrIyJn0VTAkNpT8EQI1nkktPS6waw18NNCq_iA\u0026oe=6533C3E8\u0026_nc_sid=8b3546",
"requested_by_viewer": false,
"should_show_category": false,
"should_show_public_contacts": false,
"show_account_transparency_details": true,
"transparency_label": null,
"transparency_product": "STATE_CONTROLLED_MEDIA",
"username": "fayaz._.shaik",
"connected_fb_page": null,
"pronouns": [],
"edge_felix_video_timeline": {
"count": 0,
"page_info": {
"has_next_page": false,
"end_cursor": null
},
"edges": []
},
"edge_owner_to_timeline_media": {
"count": 29,
"page_info": {
"has_next_page": false,
"end_cursor": ""
},
"edges": []
},
"edge_saved_media": {
"count": 0,
"page_info": {
"has_next_page": false,
"end_cursor": null
},
"edges": []
},
"edge_media_collections": {
"count": 0,
"page_info": {
"has_next_page": false,
"end_cursor": null
},
"edges": []
},
"edge_related_profiles": {
"edges": []
}
}
},
"status": "ok"
}
hey please assign this to me and add a hacktoberfest tag with it
Repository commit
ac3bd1032c02ff5c2f6eb16f2bf5a1b24d106d1c
Python version (python --version)
3.12.0
Dependencies version (pip freeze)
beautifulsoup4==4.12.2 certifi==2023.7.22 charset-normalizer==3.3.0 colorama==0.4.6 contourpy==1.1.1 coverage==7.3.2 cycler==0.12.1 fake-useragent==1.3.0 fonttools==4.43.1 idna==3.4 imageio==2.31.5 iniconfig==2.0.0 joblib==1.3.2 kiwisolver==1.4.5 lxml==4.9.3 markdown-it-py==3.0.0 matplotlib==3.8.0 mdurl==0.1.2 mpmath==1.3.0 numpy==1.26.1 oauthlib==3.2.2 opencv-python==4.8.1.78 packaging==23.2 pandas==2.1.1 patsy==0.5.3 Pillow==10.1.0 pluggy==1.3.0 Pygments==2.16.1 pyparsing==3.1.1 pytest==7.4.2 pytest-cov==4.1.0 python-dateutil==2.8.2 pytz==2023.3.post1 requests==2.31.0 requests-oauthlib==1.3.1 rich==13.6.0 scikit-learn==1.3.1 scipy==1.11.3 setuptools==68.0.0 six==1.16.0 soupsieve==2.5 statsmodels==0.14.0 sympy==1.12 threadpoolctl==3.2.0 tweepy==4.14.0 tzdata==2023.3 urllib3==2.0.6 wheel==0.37.1 xgboost==2.0.0
Expected behavior
I expect the instagram_crawler to fetch the Instagram user's information.
Actual behavior
It gives an errror while fetching user information. I have tested on Windows and Ubuntu both. It gives the same error. I have tested that it goes executes both
try
andexcept
blocks and fails in both of them.