andig / videodb

The videoDB media collection software
65 stars 42 forks source link

Engine -> OFDB Scraper - Genre Comedy doesn't get added #109

Closed rocKay82 closed 1 year ago

rocKay82 commented 6 years ago

Genre Comedy doesn't get added by engine OFDB Scraper because of Umlaut ö in Komödie. Looks like a UTF8 issue. Just made a Quick&Dirty Fix in ofdbscraper.php. Gonna try to solve it better in future:

    // Genres
    $genres = array(
        'Amateur' => '',
        'Eastern' => '',
        'Experimentalfilm' => '',
        'Mondo' => '',
        'Kampfsport' => 'Sport',
        'Biographie' => 'Biography',
        'Katastrophen' => 'Thriller',
        'Krimi' => 'Crime',
        'Science-Fiction' => 'Sci-Fi',
        'Kinder-/Familienfilm' => 'Family',
        'Dokumentation' => 'Documentary',
        'Action' => 'Action',
        'Drama' => 'Drama',
        'Abenteuer' => 'Adventure',
        'Historienfilm' => 'History',
        'Kurzfilm' => 'Short',
        'Liebe/Romantik' => 'Romance',
        'Heimatfilm' => 'Romance',
        'Grusel' => 'Horror',
        'Horror' => 'Horror',
        'Erotik' => 'Adult',
        'Hardcore' => 'Adult',
        'Sex' => 'Adult',
        'Musikfilm' => 'Musical',
        'Animation' => 'Animation',
        'Fantasy' => 'Fantasy',
        'Trash' => 'Horror',
        //'Komödie' => 'Comedy',  //Dirty Fix Komödie/rocKay82
        'Komoedie' => 'Comedy', //Dirty Fix Komödie/rocKay82
        'Krieg' => 'War',
        'Mystery' => 'Mystery',
        'Thriller' => 'Thriller',
        'Tierfilm' => 'Documentary',
        'Western' => 'Western',
        'TV-Serie' => '',
        'TV-Mini-Serie' => '',
        'Sportfilm' => 'Sport',
        'Splatter' => 'Horror',
        'Manga/Anime' => 'Animation'
    );
    if (preg_match('/>Genre\(s\)\:.*?<b>(.*?)<\/b>/i', $resp['data'], $ary))
    {
        if (preg_match_all('/<a.*?>(.*?)<\/a>/i',$ary[1],$ary2, PREG_SET_ORDER))
        {
            foreach($ary2 as $row) {
                $genre = trim(html_entity_decode($row[1]));
                $genre = strip_tags($genre);
                if (!$genre) continue;
                $genre = str_replace(utf8_encode('ö'), utf8_encode('oe'), $genre); //Dirty Fix Komödie/rocKay82
                if (isset($genres[$genre])) $data['genres'][] = $genres[$genre];
            }
        }
    }
johanneskonst commented 1 year ago

Thank you for the examples and code, continuing in #106