itteco / iframely

oEmbed proxy. Supports over 1800 domains via custom parsers, oEmbed, Twitter Cards and Open Graph
https://iframely.com
Other
1.52k stars 300 forks source link

instagram return image url #273

Closed meecect closed 4 years ago

meecect commented 4 years ago

I'm sure this is a simple configuration issue, but the iframely endpoint seems to return just the link in the html field. Am I supposed to use the instagram sdk to turn that into a player? Or do I have to specify in an option somewhere to give me the 'app' rel instead of the 'image' rel?

I do see more extensive results in one of the links that are returned.

Here is what the debug page returns:

{
    "meta": {
        "site": "Instagram",
        "author": "tomy_toy",
        "author_url": "https://www.instagram.com/tomy_toy",
        "title": "TOMY on Instagram: “Oh, shell-o there! Did you know all our CMM plush toys are 20% off at @bestbuy this week?  Check out the collection via the link in bio.…”",
        "description": "Oh, shell-o there! Did you know all our CMM plush toys are 20% off at @bestbuy this week?\n\nCheck out the collection via the link in bio. \n\n.\n.\n.\n#tomytoys #tomytoy #clubmocchimocchi #cmm #cmmtoys #tomy #parentapproved #kidapproved #family #videogames #kirby #baby #momanddad\n\n📸: @psmarinho",
        "canonical": "https://www.instagram.com/p/CClqRd7gZF_/"
    },
    "links": [
        {
            "html": "<blockquote class="instagram-media" data-instgrm-captioned data-instgrm-permalink="https://www.instagram.com/p/CClqRd7gZF_/?utm_source=ig_embed&amp;utm_campaign=loading" data-instgrm-version="12" style=" background:#FFF; border:0; border-radius:3px; box-shadow:0 0 1px 0 rgba(0,0,0,0.5),0 1px 10px 0 rgba(0,0,0,0.15); margin: 1px; max-width:658px; min-width:326px; padding:0; width:99.375%; width:-webkit-calc(100% - 2px); width:calc(100% - 2px);"><div style="padding:16px;"> <a href="https://www.instagram.com/p/CClqRd7gZF_/?utm_source=ig_embed&amp;utm_campaign=loading" style=" background:#FFFFFF; ... (truncated)",
            "type": "text/html",
            "rel": [
                "app",
                "ssl",
                "html5",
                "inline"
            ],
            "options": {
                "_showcaption": {
                    "label": "Show author's text caption",
                    "value": true
                }
            },
            "media": {
                "min-width": 326,
                "max-width": 660,
                "aspect-ratio": 1.5873015873015872,
                "padding-bottom": 284
            }
        },
        {
            "href": "https://instagram.com/p/CClqRd7gZF_/media/?size=l",
            "type": "image",
            "rel": [
                "image",
                "thumbnail",
                "ssl"
            ],
            "media": {
                "width": 1080,
                "height": 1362
            }
        },
        {
            "href": "https://instagram.com/p/CClqRd7gZF_/media/?size=m",
            "type": "image",
            "rel": [
                "thumbnail",
                "ssl"
            ],
            "media": {
                "width": 320,
                "height": 404
            }
        },
        {
            "href": "https://instagram.com/p/CClqRd7gZF_/media/?size=t",
            "type": "image",
            "rel": [
                "thumbnail",
                "ssl"
            ],
            "media": {
                "width": 150,
                "height": 150
            }
        },
        {
            "href": "https://www.instagram.com/static/images/ico/favicon-192.png/68d99ba29cc8.png",
            "rel": [
                "icon",
                "ssl"
            ],
            "type": "image/png",
            "media": {
                "width": 192,
                "height": 192
            }
        },
        {
            "href": "https://www.instagram.com/static/images/ico/apple-touch-icon-180x180-precomposed.png/c06fdb2357bd.png",
            "rel": [
                "apple-touch-icon-precomposed",
                "icon",
                "ssl"
            ],
            "type": "image/png",
            "media": {
                "width": 180,
                "height": 180
            }
        },
        {
            "href": "https://www.instagram.com/static/images/ico/apple-touch-icon-167x167-precomposed.png/4985e31c9100.png",
            "rel": [
                "apple-touch-icon-precomposed",
                "icon",
                "ssl"
            ],
            "type": "image/png",
            "media": {
                "width": 167,
                "height": 167
            }
        },
        {
            "href": "https://www.instagram.com/static/images/ico/apple-touch-icon-152x152-precomposed.png/68193576ffc5.png",
            "rel": [
                "apple-touch-icon-precomposed",
                "icon",
                "ssl"
            ],
            "type": "image/png",
            "media": {
                "width": 152,
                "height": 152
            }
        },
        {
            "href": "https://www.instagram.com/static/images/ico/apple-touch-icon-120x120-precomposed.png/8a5bd3f267b1.png",
            "rel": [
                "apple-touch-icon-precomposed",
                "icon",
                "ssl"
            ],
            "type": "image/png",
            "media": {
                "width": 120,
                "height": 120
            }
        },
        {
            "href": "https://www.instagram.com/static/images/ico/apple-touch-icon-76x76-precomposed.png/666282be8229.png",
            "rel": [
                "apple-touch-icon-precomposed",
                "icon",
                "ssl"
            ],
            "type": "image/png",
            "media": {
                "width": 76,
                "height": 76
            }
        },
        {
            "href": "https://www.instagram.com/static/images/ico/favicon.svg/fc72dd4bfde8.svg",
            "rel": [
                "mask-icon",
                "icon",
                "ssl"
            ],
            "type": "image/svg"
        },
        {
            "href": "https://www.instagram.com/static/images/ico/favicon.ico/36b3ee2d91ed.ico",
            "rel": [
                "shortcut",
                "icon",
                "ssl"
            ],
            "type": "image/x-icon"
        }
    ],
    "h2": true,
    "rel": [
        "image",
        "thumbnail",
        "ssl"
    ],
    "html": "<img src="https://instagram.com/p/CClqRd7gZF_/media/?size=l">"
}

I have verified that my app gets basically the same results when it process the URL (I store a cache of the result in my db in addition to the iframely cache)

iparamonau commented 4 years ago

I see it in your JSON that Iframely gives you both app and image. It's just that the main html field indeed selects the image rather that branded app embed.

This looks like you're making API calls with &media=1 query string. Remove it and Iframely will default to app as the selected html

meecect commented 4 years ago

I am just sending this url:

https://www.instagram.com/p/CClqRd7gZF_/

the json output above is form the /debug endpoint so I can see that I am definitely not sending any other url parameters. It seems like I must have a wrong priority or whitelist settign in my config for it to default to the image rel instead of the media rel, but I'm not familiar enough with the config files to pinpoint what i am doing wrong.

here is my config.js:

(function() {

    // Monkey patch before you require http for the first time.
    var majorVersion = process.version.match(/v(\d+)\./);
    majorVersion = parseInt(majorVersion);
    if (majorVersion < 10) {
        process.binding('http_parser').HTTPParser = require('http-parser-js').HTTPParser;
    }

    var _ = require('underscore');
    var path = require('path');
    var fs = require('fs');

    var version = require('./package.json').version;

    var config = {

        baseAppUrl: "",
        port: 8061,
        relativeStaticUrl: "/s",
        use_http2: true,
        DEBUG: false,

        SPDY_AGENT_DEFAULT_PORT: 443,
        WHITELIST_URL: 'https://iframely.com/qa/whitelist.json',
        WHITELIST_URL_RELOAD_PERIOD: 60 * 60 * 1000,  // will reload WL every hour, if no local files are found in /whitelist folder

        WHITELIST_WILDCARD: {},
        WHITELIST_LOG_URL: 'https://iframely.com/whitelist-log',

        // Default cache engine to prevent warning.
        CACHE_ENGINE: 'node-cache',
        CACHE_TTL: 24 * 60 * 60,
        API_REQUEST_CACHE_TTL: 30 * 24 * 60 * 60,
        IMAGE_META_CACHE_TTL: 7 *24 * 60 * 60,

        CACHE_TTL_PAGE_TIMEOUT: 10 * 60,
        CACHE_TTL_PAGE_404: 10 * 60,
        CACHE_TTL_PAGE_OTHER_ERROR: 1 * 60,

        // Do not cache response in htmlparser with these status codes.
        TEMP_HTTP_ERROR_CODES: [
            408, 
            418, 
            429
            // 5xx included in logic.
        ],

        HTTP2_RETRY_CODES_LIST: [
            'ECONNRESET',
            'ESOCKETTIMEDOUT'
        ],

        CLUSTER_WORKER_RESTART_ON_PERIOD: 8 * 3600 * 1000, // 8 hours.
        CLUSTER_WORKER_RESTART_ON_MEMORY_USED: 120 * 1024 * 1024, // 120 MB.

        RESPONSE_TIMEOUT: 5 * 1000,

        SHUTDOWN_TIMEOUT: 6 * 1000,

        VERSION: version,

        FB_USER_AGENT: 'facebookexternalhit/1.1 (+http://www.facebook.com/externalhit_uatext.php)',

        ACCEPT_LANGUAGE_SUFFIX: ';q=0.9,en;q=0.7,*;q=0.5',

        SKIP_IFRAMELY_RENDERS: false,
        DEFAULT_ASPECT_RATIO: 16 / 9,
        MAX_VERTICAL_ASPECT_RATIO: 1,
        ASPECT_RATIO_PRECISION: 0.25,

        DEFAULT_OMIT_CSS_WRAPPER_CLASS: 'iframely-responsive',
        DEFAULT_MAXWIDTH_WRAPPER_CLASS: 'iframely-embed',

        T: {
            text_html: "text/html",
            maybe_text_html: "maybe_text_html",            
            javascript: "application/javascript",
            safe_html: "text/x-safe-html",
            image_jpeg: "image/jpeg",
            flash: "application/x-shockwave-flash",
            image: "image",
            image_icon: "image/icon",
            image_png: "image/png",
            image_svg: "image/svg",
            image_gif: "image/gif",
            image_webp: "image/webp",
            video_mp4: "video/mp4",
            video_ogg: "video/ogg",
            video_webm: "video/webm",
            stream_apple_mpegurl: "application/vnd.apple.mpegurl",
            stream_x_mpegurl: "application/x-mpegURL",
            audio_mp3: "audio/mp3",
            audio_mpeg: "audio/mpeg",
            audio_mp4: "audio/mp4"
        },

        PROMO_RELS: [
            "player",
            "image",
            "thumbnail"
        ],

        REL_GROUPS: [
            "promo",
            "app",
            "player",
            "survey",
            "summary",
            "image",
            "reader",
            "thumbnail",
            "logo",
            "icon",
            "file"
        ],

        MEDIA_ATTRS: [
            "width",
            "min-width",
            "max-width",
            "height",
            "min-height",
            "max-height",
            "aspect-ratio",
            "padding-bottom",
            "scrolling"
        ],

        R: {
            player: "player",
            thumbnail: "thumbnail",
            image: "image",
            reader: "reader",
            file: "file",
            survey: "survey",
            app: "app",
            summary: "summary",

            iframely: "iframely",
            og: "og",
            twitter: "twitter",
            oembed: "oembed",
            sm4: "sm4",

            icon: "icon",
            logo: "logo",

            inline: "inline",
            ssl: "ssl",
            resizable: "resizable",

            autoplay: "autoplay",
            html5: "html5",
            gifv: "gifv",

            promo: "promo",
            playerjs: "playerjs",

            audio: 'audio',
            slideshow: 'slideshow',
            playlist: 'playlist'
        },

        FEATURES: [ // feature policy: https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Feature-Policy#Directives
            'ambient-light-sensor', 'autoplay', 'accelerometer', 'camera', 'display-capture', 'document-domain', 'encrypted-media', 
            'fullscreen', 'geolocation', 'gyroscope', 'magnetometer', 'microphone', 'midi', 'payment', 'picture-in-picture',
            'speaker', 'sync-xhr', 'usb', 'wake-lock', 'vr', 'xr', 'vr / xr'
        ],

        // Option names
        O: {
            // compact & full - deprecated
            compact: "iframely.less",
            full: "iframely.more",
            // use O.more & O.less instead
            more: "iframely.more",
            less: "iframely.less"
        },

        // Option labels:
        L: {
            horizontal: 'Slimmer horizontal player',
            playlist: 'Include playlist',
            hide_artwork: 'Hide artwork',
            theme: 'Theme color',
            light: 'Light',
            dark: 'Dark',
            auto: 'Auto',
            default: 'Default',
            height: 'Adjust height',
            width: 'Adjust width',
            page: 'Active page'
        },

        // Whitelist settings.
        REL: {
            "iframely": [
                "reader",
                "app",
                "player",
                "survey",
                "image",
                "summary",
                "thumbnail",
                "logo"
            ],
            "twitter": [
                "player",
                "photo"
            ],
            "og": [
                "video"
            ],
            "sm4": [
                "video"
            ],
            "oembed": [
                "link",
                "rich",
                "video",
                "photo"
            ],
            "html-meta": [
                "video",
                "embedURL"
            ]
        },

        REL_OPTIONS: {
            all: ["ssl"],
            player: ["responsive", "autoplay"],
            video: ["responsive", "autoplay"],
            link: ["reader"],
            rich: ["reader"]
        },

        // whitelist rel to iframely rel.
        REL_MAP: {
            "article": "reader",
            "photo": "image",
            "video": "player"
        },

        // To detect: "html-meta".
        KNOWN_SOURCES: [
            "oembed",
            "og",
            "twitter",
            "iframely",
            "sm4"
        ],

        KNOWN_VIDEO_SOURCES: /(youtube|youtu|youtube\-nocookie|vimeo|dailymotion|theplatform|jwplatform|jwplayer|ooyala|cnevids|newsinc|podbean|simplecast|libsyn|wistia|podiant|art19|kaltura|mtvnservices|brightcove|bcove|soundcloud|giphy|viddler|flowplayer|vidible|bandzoogle|podigee|smugmug|facebook|vid|ultimedia|mixcloud|vidyard|youplay)\./i,

        OEMBED_RELS_PRIORITY: ["app", "player", "survey", "image", "reader"],
        OEMBED_RELS_MEDIA_PRIORITY: ["player", "survey", "image", "reader", "app"],

        providerOptions: {
            "readability": {},
            "twitter.status": {}
        }
    };

    var env_config_path = path.resolve(
        __dirname,
        "config." + (process.env.NODE_ENV || "local") + ".js"
    );

    var local_config_path = path.resolve(__dirname, "config.local.js");

    var local;

    // Try config by NODE_ENV.
    if (fs.existsSync(env_config_path)) {

        local = require(env_config_path);

    } else if (fs.existsSync(local_config_path)) {
        // Else - try local config.

        local = require(local_config_path);
    }

    _.extend(config, local);

    if (!config.baseStaticUrl) {
        config.baseStaticUrl = config.baseAppUrl + config.relativeStaticUrl;
    }

    if (!config.USER_AGENT) {
        var baseAppUrlForAgent;
        if (config.baseAppUrl && config.baseAppUrl.match(/^\/\//)) {
            baseAppUrlForAgent = 'https:' + config.baseAppUrl;
        } else {
            baseAppUrlForAgent = config.baseAppUrl;
        }

        config.USER_AGENT = "Iframely/" + version + " (+" + (baseAppUrlForAgent || 'https://github.com/itteco/iframely') + ")";
    }

    config.TYPES = Object.values(config.T);

    config.HTTP2_RETRY_CODES = {};
    config.HTTP2_RETRY_CODES_LIST.forEach(function(item) {
        config.HTTP2_RETRY_CODES[item] = 1;
    });

    module.exports = config;
})();

and my config.local.js:

(function() {
    var config = {

        // Specify a path for custom plugins. Custom plugins will override core plugins.
        // CUSTOM_PLUGINS_PATH: __dirname + '/yourcustom-plugin-folder',

        DEBUG: false,
        RICH_LOG_ENABLED: false,

        // For embeds that require render, baseAppUrl will be used as the host.
        baseAppUrl: "http://p.onetomy.com",
        relativeStaticUrl: "/r",

        // Or just skip built-in renders altogether
        SKIP_IFRAMELY_RENDERS: true,

        // For legacy reasons the response format of Iframely open-source is
        // different by default as it does not group the links array by rel.
        // In order to get the same grouped response as in Cloud API,
        // add `&group=true` to your request to change response per request
        // or set `GROUP_LINKS` in your config to `true` for a global change.
        GROUP_LINKS: true,

        // Number of maximum redirects to follow before aborting the page
        // request with `redirect loop` error.
        MAX_REDIRECTS: 4,

        SKIP_OEMBED_RE_LIST: [
            // /^https?:\/\/yourdomain\.com\//,
        ],

        /*
        // Used to pass parameters to the generate functions when creating HTML elements
        // disableSizeWrapper: Don't wrap element (iframe, video, etc) in a positioned div
        GENERATE_LINK_PARAMS: {
            disableSizeWrapper: true
        },
        */

        port: 8061, //can be overridden by PORT env var
        host: '0.0.0.0',    // Dockers beware. See https://github.com/itteco/iframely/issues/132#issuecomment-242991246
                            //can be overridden by HOST env var

        // Optional SSL cert, if you serve under HTTPS.
        /*
        ssl: {
            key: require('fs').readFileSync(__dirname + '/key.pem'),
            cert: require('fs').readFileSync(__dirname + '/cert.pem'),
            port: 443
        },
        */

        /*
        Supported cache engines:
        - no-cache - no caching will be used.
        - node-cache - good for debug, node memory will be used (https://github.com/tcs-de/nodecache).
        - redis - https://github.com/mranney/node_redis.
        - memcached - https://github.com/3rd-Eden/node-memcached
        */
        CACHE_ENGINE: 'node-cache',
        CACHE_TTL: 0, // In seconds.
        // 0 = 'never expire' for memcached & node-cache to let cache engine decide itself when to evict the record
        // 0 = 'no cache' for redis. Use high enough (e.g. 365*24*60*60*1000) ttl for similar 'never expire' approach instead

        /*
        // Redis cache options.
        REDIS_OPTIONS: {
            host: '127.0.0.1',
            port: 6379
        },
        */

        /*
        // Memcached options. See https://github.com/3rd-Eden/node-memcached#server-locations
        MEMCACHED_OPTIONS: {
            locations: "127.0.0.1:11211"
        }
        */

        /*
        // Access-Control-Allow-Origin list.
        allowedOrigins: [
            "*",
            "http://another_domain.com"
        ],
        */

        /*
        // Uncomment to enable plugin testing framework.
        tests: {
            mongodb: 'mongodb://localhost:27017/iframely-tests',
            single_test_timeout: 10 * 1000,
            plugin_test_period: 2 * 60 * 60 * 1000,
            relaunch_script_period: 5 * 60 * 1000
        },
        */

        // If there's no response from remote server, the timeout will occur after
        RESPONSE_TIMEOUT: 5 * 1000, //ms

        /* From v1.4.0, Iframely supports HTTP/2 by default. Disable it, if you'd rather not.
           Alternatively, you can also disable per origin. See `proxy` option below.
        */
        // DISABLE_HTTP2: true,

        // Customize API calls to oembed endpoints.
        ADD_OEMBED_PARAMS: [{
            // Endpoint url regexp array.
            re: [/^http:\/\/api\.instagram\.com\/oembed/],
            // Custom get params object.
            params: {
                hidecaption: true
            }
        }, {
            re: [/^https:\/\/www\.facebook\.com\/plugins\/page\/oembed\.json/i],
            params: {
                show_posts: 0,
                show_facepile: 0,
                maxwidth: 600
            }
        }, {
            // match i=user or i=moment or i=timeline to configure these types invidually
            // see params spec at https://dev.twitter.com/web/embedded-timelines/oembed
            re: [/^https?:\/\/publish\.twitter\.com\/oembed\?i=user/i],
            params: {
                limit: 1,
                maxwidth: 600
            }
        /*
        }, {
            // Facebook https://developers.facebook.com/docs/plugins/oembed-endpoints
            re: [/^https:\/\/www\.facebook\.com\/plugins\/\w+\/oembed\.json/i],
            params: {
                // Skip script tag and fb-root div.
                omitscript: true
            }
        */
         }],

        /* Configure use of HTTP proxies as needed.
           You don't have to specify all options per regex - just what you need to override
        */
        /*
        PROXY: [{
            re: [/^https?:\/\/www\.domain\.com/],
            proxy_server: 'http://1.2.3.4:8080',
            user_agent: 'CHANGE YOUR AGENT',
            headers: {
                // HTTP headers
                // Overrides previous params if overlapped.
            },
            request_options: {
                // Refer to: https://github.com/request/request
                // Overrides previous params if overlapped.
            },
            cache_ttl: 3600, // in seconds, cache response for 1 hour.
            disable_http2: true
        }],
        */

        // Customize API calls to 3rd parties. At the very least - configure required keys.
        providerOptions: {
            locale: "en_US",    // ISO 639-1 two-letter language code, e.g. en_CA or fr_CH.
                                // Will be added as highest priotity in accept-language header with each request.
                                // Plus is used in FB, YouTube and perhaps other plugins
            "twitter": {
                "max-width": 550,
                "min-width": 250,
                hide_media: false,
                hide_thread: false,
                omit_script: true,
                // center: false,
                // dnt: true,
                cache_ttl: 100 * 365 * 24 * 3600 // 100 Years.
            },
            readability: {
                enabled: false
                // allowPTagDescription: true  // to enable description fallback to first paragraph
            },
            images: {
                loadSize: false, // if true, will try an load first bytes of all images to get/confirm the sizes
                checkFavicon: false // if true, will verify all favicons
            },
            tumblr: {
                consumer_key: "INSERT YOUR VALUE"
                // media_only: true     // disables status embeds for images and videos - will return plain media
            },
            google: {
                // https://developers.google.com/maps/documentation/embed/guide#api_key
                maps_key: "INSERT YOUR VALUE"
            },

            /*
            // Optional Camo Proxy to wrap all images: https://github.com/atmos/camo
            camoProxy: {
                camo_proxy_key: "INSERT YOUR VALUE",
                camo_proxy_host: "INSERT YOUR VALUE"
                // ssl_only: true // will only proxy non-ssl images
            },
            */

            // List of query parameters to add to YouTube and Vimeo frames
            // Start it with leading "?". Or omit alltogether for default values
            // API key is optional, youtube will work without it too.
            // It is probably the same API key you use for Google Maps.
            youtube: {
                // api_key: "INSERT YOUR VALUE",
                get_params: "?rel=0&showinfo=1"     // https://developers.google.com/youtube/player_parameters
            },
            vimeo: {
                get_params: "?byline=0&badge=0"     // https://developer.vimeo.com/player/embedding
            },

            /*
            soundcloud: {
                old_player: true // enables classic player
            },
            giphy: {
                media_only: true // disables branded player for gifs and returns just the image
            }
            */
            /*
            bandcamp: {
                get_params: '/size=large/bgcol=333333/linkcol=ffffff/artwork=small/transparent=true/',
                media: {
                    album: {
                        height: 472,
                        'max-width': 700
                    },
                    track: {
                        height: 120,
                        'max-width': 700
                    }
                }
            }
            */
        },

        // WHITELIST_WILDCARD, if present, will be added to whitelist as record for top level domain: "*"
        // with it, you can define what parsers do when they run accross unknown publisher.
        // If absent or empty, all generic media parsers will be disabled except for known domains
        // More about format: https://iframely.com/docs/qa-format

        /*
        WHITELIST_WILDCARD: {
              "twitter": {
                "player": "allow",
                "photo": "deny"
              },
              "oembed": {
                "video": "allow",
                "photo": "allow",
                "rich": "deny",
                "link": "deny"
              },
              "og": {
                "video": ["allow", "ssl", "responsive"]
              },
              "iframely": {
                "survey": "allow",
                "reader": "allow",
                "player": "allow",
                "image": "allow"
              },
              "html-meta": {
                "video": ["allow", "responsive"],
                "promo": "allow"
              }
        }
        */

        // Black-list any of the inappropriate domains. Iframely will return 417
        // At minimum, keep your localhosts blacklisted to avoid SSRF
        BLACKLIST_DOMAINS_RE: [
            /^https?:\/\/127\.0\.0\.1/i,
            /^https?:\/\/localhost/i,

            // And this is AWS metadata service
            // https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/ec2-instance-metadata.html
            /^https?:\/\/169\.254\.169\.254/
        ]
    };

    module.exports = config;
})();

Thanks for the help!

nleush commented 4 years ago

@meecect fixed in https://github.com/itteco/iframely/releases/tag/v1.4.5

meecect commented 4 years ago

wow, that was fast. I'll pull and test again in a couple of hours.

meecect commented 4 years ago

works perfectly...thanks again!

iparamonau commented 4 years ago

Thank you for reporting!