Closed albinmedoc closed 1 week ago
I got it working with my m3u file. But it seems like plex has ended support for strm-files, so I will not continue improving the code. I don't want my work to be wasted, so I throw in my parser.py file here.
import os
import re
from enum import Enum
class LineType(Enum):
Movie = 1
Series = 2
Other = 99
def extract_key_value_pairs(s: str) -> dict[str, str]:
# Define the regular expression pattern
pattern = r'(\w+(?:-\w+)?)="([^"]*)"'
# Find all matches in the string
matches: str = re.findall(pattern, s)
# Create a dictionary from the matches
result = {key: value for key, value in matches}
return result
def extract_series_name(episode_title: str) -> str:
# Define a regex pattern to match the series name
pattern = r"^(.*?)\sS\d{2}"
match = re.search(pattern, episode_title)
if match:
return match.group(1)
else:
return None
def extract_season_and_episode(episode_title: str) -> str:
# Define a regex pattern to match the season and episode
pattern = r"(S\d{2}E\d{2})"
match = re.search(pattern, episode_title)
if match:
return match.group(1)
else:
return None
def create_strm_file(directory: str, filename: str, url: str) -> None:
print(f"Creating file: {filename} in {directory} with URL: {url}")
#return None
if not os.path.exists(directory):
os.makedirs(directory)
print(f"Created directory: {directory}")
with open(os.path.join(directory, filename), "w") as f:
f.write(url)
print(f"Created file: {filename} in {directory}")
# Open the M3U file
with open("/usr/src/app/m3u_file.m3u", "r") as f:
# Read the M3U file contents
contents = f.read()
# Split the contents by newline characters
lines = contents.split("\n")
line_type: LineType = LineType.Other
title: str
directory: str
# Iterate through each line
for line in lines:
print(f"Processing line: {line}")
# Check if the line starts with #EXTINF
if line.startswith("#EXTINF"):
try:
attributes = extract_key_value_pairs(line)
group_title = attributes["group-title"]
# Determine the type of content (movie, series) based on the group name
if group_title.startswith("VOD:"):
title = attributes["tvg-name"]
directory = os.path.join("Movies", title)
line_type = LineType.Movie
elif group_title.startswith("Series:"):
season_and_episode = extract_season_and_episode(attributes["tvg-name"])
title = extract_series_name(attributes["tvg-name"])
directory = os.path.join("Series", title)
title += " " + season_and_episode
line_type = LineType.Series
else:
line_type = LineType.Other
except:
line_type = LineType.Other
# Check if the line is a URL
elif line.startswith("http"):
# Create the file path
if line_type == LineType.Other:
print("Skipping line")
else:
create_strm_file(directory, title + ".strm", line)
I got it working with my m3u file. But it seems like plex has ended support for strm-files, so I will not continue improving the code. I don't want my work to be wasted, so I throw in my parser.py file here.
Hey thanks for the input! Currently rewriting the entire parser.py script to address this issue and more.
You could always switch to jellyfin, no problem playing .strm files
I got it working with my m3u file. But it seems like plex has ended support for strm-files, so I will not continue improving the code. I don't want my work to be wasted, so I throw in my parser.py file here.
import os import re from enum import Enum class LineType(Enum): Movie = 1 Series = 2 Other = 99 def extract_key_value_pairs(s: str) -> dict[str, str]: # Define the regular expression pattern pattern = r'(\w+(?:-\w+)?)="([^"]*)"' # Find all matches in the string matches: str = re.findall(pattern, s) # Create a dictionary from the matches result = {key: value for key, value in matches} return result def extract_series_name(episode_title: str) -> str: # Define a regex pattern to match the series name pattern = r"^(.*?)\sS\d{2}" match = re.search(pattern, episode_title) if match: return match.group(1) else: return None def extract_season_and_episode(episode_title: str) -> str: # Define a regex pattern to match the season and episode pattern = r"(S\d{2}E\d{2})" match = re.search(pattern, episode_title) if match: return match.group(1) else: return None def create_strm_file(directory: str, filename: str, url: str) -> None: print(f"Creating file: {filename} in {directory} with URL: {url}") #return None if not os.path.exists(directory): os.makedirs(directory) print(f"Created directory: {directory}") with open(os.path.join(directory, filename), "w") as f: f.write(url) print(f"Created file: {filename} in {directory}") # Open the M3U file with open("/usr/src/app/m3u_file.m3u", "r") as f: # Read the M3U file contents contents = f.read() # Split the contents by newline characters lines = contents.split("\n") line_type: LineType = LineType.Other title: str directory: str # Iterate through each line for line in lines: print(f"Processing line: {line}") # Check if the line starts with #EXTINF if line.startswith("#EXTINF"): try: attributes = extract_key_value_pairs(line) group_title = attributes["group-title"] # Determine the type of content (movie, series) based on the group name if group_title.startswith("VOD:"): title = attributes["tvg-name"] directory = os.path.join("Movies", title) line_type = LineType.Movie elif group_title.startswith("Series:"): season_and_episode = extract_season_and_episode(attributes["tvg-name"]) title = extract_series_name(attributes["tvg-name"]) directory = os.path.join("Series", title) title += " " + season_and_episode line_type = LineType.Series else: line_type = LineType.Other except: line_type = LineType.Other # Check if the line is a URL elif line.startswith("http"): # Create the file path if line_type == LineType.Other: print("Skipping line") else: create_strm_file(directory, title + ".strm", line)
I'd like to thank you for giving me some insight on how to improve the parser script to accommodate more types of .m3u formats. While your method definitely works for your provided example, I found a lot of other .m3u were formatted to a slightly different standard. Most do not have all the categories like tvchno="", tvg-id="", tvg-name="". Rather they stick to keeping most of the relevant information in just the group-title="" value. So I made the parser script make a dictionary with key value pairs of every category for some information, but have it filter out information mainly from the group-title value.
With your provided example line, to get a proper directory/file structure made, you could use this compose example. Adding the SCRUB_HEADER value to eliminate the texts that precedes the movies title, and the REMOVE_TERMS value to remove the language in brackets, would yield a proper Movie Title (year).strm. You could add more terms to handle television shows as well depending on how those are formatted.
m3uparser: image: xaque87/m3uparser:latest environment:
- PUID=1000 # Default if blank
- PGID=1000 # Default if blank
- M3U_URL="m3uURL1.com, m3uURL2.com, etc..."
- HOURS=12 #update interval, setting this optional, default 12hrs
- SCRUB_HEADER="Svenska"."
- REMOVE_TERMS="[SE]"
- CLEANERS=movies
- LIVE_TV= # Default is false
- UNSORTED= # Default is false volumes:
- /path/to/your/media/library:/usr/src/app/VODS
I ran into problems running the code, and I seem to understand why. In current code, tvgroup is assumed to be the first attribute, but this is not always the case. A slightly "kinder" way of retrieving the attribute would be needed.
Wrote up a quick function that can be used to support various m3u formats. Hopefully this can help you. I tried to implement it in the codebase, but I still don't really understand how the current code works.
The function returns a dict representing all attributes.
Originally posted by @albinmedoc in https://github.com/Xaque8787/m3uparser/issues/2#issuecomment-2162842436