dfm / tess-atlas

MIT License
9 stars 8 forks source link

Improve summary page #232

Closed avivajpeyi closed 1 year ago

avivajpeyi commented 2 years ago
avivajpeyi commented 1 year ago

I was working on combining the stats page with the summary page http://catalog.tess-atlas.cloud.edu.au/content/stats.html

Below is a screenshot of what I managed to get working

Screen Shot 2022-09-19 at 12 06 26 pm

Adding the code down below so we dont lose it

Python code ```python # --- # jupyter: # jupytext: # formats: ipynb,py:light # text_representation: # extension: .py # format_name: light # format_version: '1.5' # jupytext_version: 1.11.5 # kernelspec: # display_name: Python 3 (ipykernel) # language: python # name: python3 # --- # ## STATS PAGE GENERATION # + import pandas as pd import matplotlib.pyplot as plt import glob from tess_atlas.data.exofop import get_tic_database, filter_db_without_lk # from tess_atlas.data.planet_candidate import CLASS_SHORTHAND import pandas as pd import os import re import pandas as pd from tqdm.auto import tqdm import numpy as np CLASS_SHORTHAND = dict( CP="Confirmed Planet", EB="Eclipsing Binary", IS="Instrument Noise", KP="Known Planet", O="O", PC="Planet Candidate", V="Stellar Variability", U="Undecided", ) URL = "http://catalog.tess-atlas.cloud.edu.au/content/toi_notebooks/toi_{}.html" # IMG_URL = "http://catalog.tess-atlas.cloud.edu.au/content/toi_notebooks/toi_{}_files/phase_plot_TOI{}_1.png.png" IMG_URL = "./tess_atlas_catalog/0.2.1.dev64+gc7fa3a0/toi_{}_files/{}" PHASE_IMG = "phase_plot_TOI{}_1_lowres.png" LINK = '''{txt}''' def toi_num(f): toi_str = re.search(r"toi_(.*\d)", f).group() return int(toi_str.split('_')[1]) def do_tois_have_netcdf(notebook_root, all_tois): files = glob.glob(f"{notebook_root}/*/toi_*_files/*.netcdf") tois = [toi_num(f) for f in files] return [True if i in tois else False for i in all_tois] def do_tois_have_phase_plot(notebook_root, all_tois): files = glob.glob(f"{notebook_root}/*/toi_*_files/{PHASE_IMG.format('*')}") tois = [toi_num(f) for f in files] return [True if i in tois else False for i in all_tois] def load_run_stats(notebook_root): fname = glob.glob(f"{notebook_root}/*/run_stats.csv") run_stats = pd.read_csv(fname[0]) cols = ['toi_numbers', 'execution_complete', 'duration'] run_stats.columns = cols run_stats.duration = run_stats.duration.apply(lambda x:round(x/(60*60),2)) # keep only the longest duration for the TOI (the shorter one is just generation) run_stats = run_stats.sort_values(by='duration', ascending=False) run_stats['duplicate']= run_stats.duplicated('toi_numbers', keep='first') run_stats = run_stats[run_stats['duplicate']==False] return run_stats[cols] def parse_logs(notebook_root): logs = glob.glob(f"{notebook_root}/log_pe/pe_*.log") toi_nums = [] log_line = [] for l in tqdm(logs, desc="Parsing logs"): toi_nums.append(get_toi_number_from_log(l)) log_line.append(get_log_last_line(l)) df= pd.DataFrame(dict( toi_numbers=toi_nums, log_fn=logs, log_line=log_line )) count_before = len(df) df = df.dropna() df = df.drop_duplicates(subset='toi_numbers', keep="first") count_after = len(df) if count_before != count_after: print(f"{count_before-count_after} log(s) dropped ({count_after} logs remain)") df = df.astype({"toi_numbers":"int32"}) return df def clean_log_line(log): log = log.strip() ansi_escape = re.compile(r'(?:\x1B[@-_]|[\x80-\x9F])[0-?]*[ -/]*[@-~]') log = ansi_escape.sub('', log) return log def get_log_last_line(log_fn): with open(log_fn, 'rb') as f: # open in binary mode to seek from end try: # catch OSError in case of a one line file f.seek(-2, os.SEEK_END) while f.read(1) != b'\n': f.seek(-2, os.SEEK_CUR) except OSError: f.seek(0) last_line = f.readline().decode() return clean_log_line(last_line) def get_toi_number_from_log(log_fn): regex = r'run_toi\((.*\d)\)' with open(log_fn, 'r') as f: f.seek(50, 0) txt = f.read(200) # read the chars from idx 50 - 250 match = re.findall(regex, txt) if match: return (int(match[0])) else: return np.nan def get_classification(short_classes): return [CLASS_SHORTHAND.get(sc, sc) for sc in short_classes] def get_toi_categories(): tic_db = get_tic_database() tic_db = filter_db_without_lk(tic_db, remove=True).copy() # tic_db = tic_db[['TOI int', 'Multiplanet System', 'Single Transit']] tic_db["Normal System"] = (~tic_db["Single Transit"]) & (~tic_db["Multiplanet System"]) tic_db.rename(columns = {'TOI int':'toi_numbers'}, inplace = True) return tic_db def load_toi_summary_data(notebook_root): df = pd.read_csv(f"{notebook_root}/tois.csv")['toi_numbers'] run_stats = load_run_stats(notebook_root) log_df = parse_logs(notebook_root) categories = get_toi_categories() df = pd.merge(df, run_stats, how='left',on="toi_numbers") df = pd.merge(df, categories, how='left',on="toi_numbers") df = pd.merge(df, log_df, how='left',on="toi_numbers") df['url'] = [URL.format(i) for i in df.toi_numbers] df['execution_complete'] = df['execution_complete'].fillna(False) df['duration'] = df['duration'].fillna(10) df['phaseplt_present'] = do_tois_have_phase_plot(notebook_root, df.toi_numbers) df['netcdf_present'] = do_tois_have_netcdf(notebook_root, df.toi_numbers) df['STATUS'] = get_status(df) df['TOI'] = create_weburl(df) df['category'] = get_category(df) df['logs'] = format_logs(df) df["Phase Plot"]= get_phase_plot_urls(df) df["Classification"] = get_classification(df['TESS Disposition']) df = df.drop_duplicates(subset='toi_numbers', keep="first") return df def get_status(df): status = [] for index, toi in df.iterrows(): s = "FAIL: no netcdf" # if toi.execution_complete: # s = "PASS" if toi.netcdf_present: s = "PASS" if not toi.phaseplt_present: s = "FAIL: no phaseplot" status.append(s) return status def create_weburl(df): url = [""] * len(df) for index, toi in df.iterrows(): url[index] = LINK.format(url=toi.url, txt=toi.toi_numbers) return url def get_phase_plot_urls(df): html = '''{}''' urls = [""] * len(df) for i, toi in df.iterrows(): t = toi.toi_numbers img = IMG_URL.format(t, PHASE_IMG.format(t)) txt = html.format(img, f"TOI{t} Phaseplot") urls[i] = LINK.format(url=toi.url, txt=txt) return urls def get_category(df): cat = [] for index, toi in df.iterrows(): if toi["Multiplanet System"] and toi["Single Transit"]: cat.append("multi planet - single transit") elif toi["Multiplanet System"] and not toi["Single Transit"]: cat.append("multi planet") elif not toi["Multiplanet System"] and toi["Single Transit"]: cat.append("single transit") else: cat.append("normal") return cat def format_logs(df): logs = [] for index, toi in df.iterrows(): l = "" if not toi.STATUS == "PASS": l = f"{toi.log_fn}:{toi.log_line}" logs.append(l) return logs tois = load_toi_summary_data('tess_atlas_catalog') print("done!") # + import matplotlib.pyplot as plt import numpy as np import matplotlib def plot_histogram_with_collection_bin(ax, data, bins, plt_kwargs): clipped_data = np.clip(data, bins[0], bins[-1]) ax.hist(clipped_data, bins=bins, **plt_kwargs) xlabels = bins[1:].astype(str) xlabels[-1] += '+' N_labels = len(xlabels) ax.set_xlim([min(bins), max(bins)]) xticks=ax.get_xticks().tolist() xticks[-1] = f"+{int(xticks[-1])}" ax.set_xticklabels(xticks) return ax def plot_runtimes(df): total_num_tois = len(df) df["time"] = df['duration'] fig, ax = plt.subplots() df_passed = df[df["STATUS"] == "PASS"] df_failed = df[df["STATUS"] != "PASS"] bins = np.linspace(0, 5, 50) histargs = dict(histtype="stepfilled",lw=2) ax = plot_histogram_with_collection_bin( ax, df_passed.time, bins, dict( **histargs, label=f"Passed ({len(df_passed)}/{len(df)})", edgecolor="tab:green", facecolor=(.128, .355, 0 , 0.3), )) ax = plot_histogram_with_collection_bin( ax, df_failed.time, bins, dict( **histargs, label=f"Failed ({len(df_failed)}/{len(df)})", edgecolor="tab:red", facecolor=(.255, .155, 0 , 0.3), )) legend = ax.legend() offset = matplotlib.text.OffsetFrom(legend, (1.0, 0.0)) avg_time, tot_time = np.mean(df_passed.duration), np.sum(df.duration) text = f"Avg Time: {avg_time:.2f} Hrs\nTotal time ~{int(tot_time)} Hrs" ax.annotate( text, xy=(0, 0), size=14, xycoords="figure fraction", xytext=(0, -20), textcoords=offset, horizontalalignment="right", verticalalignment="top", ) ax.set_xlim(left=0) ax.set_xlabel("Time [Hr]") plt.tight_layout() plot_runtimes(tois) plt.savefig("stats.png") # + from jinja2 import Template HTML_CODE = """

Run stats

{{table_html}} """ def generate_html(dataframe: pd.DataFrame): dataframe['duration[Hr]'] = dataframe['duration'] dataframe = dataframe[['TOI','STATUS', 'Classification', "category", "duration[Hr]", "Phase Plot", "logs"]] table_html = dataframe.to_html(table_id="table", index=False) table_html = table_html.replace("dataframe", "table table-striped table-bordered") table_html = table_html.replace("<", "<") table_html = table_html.replace(">", ">") return Template(HTML_CODE).render(table_html=table_html, image_path="stats.png") t = generate_html(tois) with open("stats.html", 'w') as f: f.write(t) print(t[2000:3000]) # - # TOI101 phaseplot # ## MAIN TOI PAGE # + import os import subprocess import shutil from tess_atlas.webbuilder.page_builder import DIR MENU_PAGE = os.path.join(DIR, "template/content/toi_fits.rst") builddir = "fit_page" if os.path.isdir(builddir): shutil.rmtree(builddir) os.makedirs(builddir, exist_ok=False) CONF = """ project = 'TESS Atlas"' copyright = "2022, TESS Atlas community" author = "the TESS Atlas community" html_title = "TESS Atlas" release = "1" extensions = [ "sphinx_togglebutton", "sphinx_tabs.tabs", "sphinx_copybutton", "myst_nb", "jupyter_book", "sphinx_comments", "sphinx.ext.intersphinx", "sphinx_design", "sphinx_book_theme", "sphinx_remove_toctrees", "sphinx_collapse" ] # nbsphinx_execute = 'never' jupyter_execute_notebooks = "off" exclude_patterns = [] html_theme = "sphinx_book_theme" html_theme_options = dict( repository_url="https://github.com/dfm/tess-atlas", use_repository_button=True, use_fullscreen_button=True, use_download_button=True, search_bar_text="Search the Atlas...", show_toc_level=1, collapse_navigation=True, show_prev_next=False, single_page=True ) language = None pygments_style = "sphinx" html_permalinks = True html_sourcelink_suffix = "" numfig = False panels_add_bootstrap_css = False suppress_warnings = ["myst.domains"] html_copy_source = False remove_from_toctrees = ["content/toi_notebooks/toi_*.ipynb"] """ with open(f"{builddir}/conf.py", "w") as f: f.write(CONF) CUR_PAGE = f"{builddir}/index.rst" shutil.copyfile(MENU_PAGE, CUR_PAGE) TOI_REGEX = "july12_cat/0.2.1.dev64+gc7fa3a0/toi_*.ipynb" #------------------------------------------------ # #!/usr/bin/env python # -*- coding: utf-8 -*- """Module to build home page for TOIs""" import glob import os from tess_atlas.data.exofop import ( get_toi_numbers_for_different_categories, get_toi_list, ) from jinja2 import Template CATEGORISED_TOIS = get_toi_numbers_for_different_categories() CATEGORISED_TOIS = {k:df['toi_numbers'].tolist() for k,df in CATEGORISED_TOIS.items()} TOI_LINK = Template("`TOI {{toi_int}} `_") IMAGE = Template( """.. figure:: toi_notebooks/{{rel_path}} :target: toi_notebooks/{{toi_fname}}.html """ ) def render_page_template(fname, page_data): with open(fname) as file_: template = Template(file_.read()) return template.render(**page_data) def get_toi_str_from_path(path): return get_toi_fname(path).split("_")[1] def get_toi_fname(path): return os.path.basename(path).split(".")[0] def get_toi_number(path): return int(get_toi_str_from_path(path)) def render_toi_data(path): fname = get_toi_fname(path) toi_int = get_toi_number(path) return TOI_LINK.render(toi_int=toi_int, toi_fname=fname) def sort_files(files): return sorted(files, key=lambda x: get_toi_number(x)) def get_phase_plots(notebook_path, notebook_dir): toi_str = get_toi_str_from_path(notebook_path) phase_regex = os.path.join(notebook_dir, f"toi_{toi_str}_files/phase*.png") phase_plots = glob.glob(phase_regex) if len(phase_plots) > 0: return [p.split(notebook_dir)[1] for p in phase_plots] return [] def render_image_data(notebook_path, notebook_dir): image_paths = get_phase_plots(notebook_path, notebook_dir) toi_fname = get_toi_fname(notebook_path) return [IMAGE.render(rel_path=p, toi_fname=toi_fname) for p in image_paths] def split_notebooks(notebook_files, notebook_dir): with_plots, without_plots = [], [] for notebook_path in notebook_files: if len(get_phase_plots(notebook_path, notebook_dir)) > 0: with_plots.append(notebook_path) else: without_plots.append(notebook_path) return with_plots, without_plots def generate_number_data(successful_data, failed_data): numbers = {k: len(v) for k, v in CATEGORISED_TOIS.items()} numbers["total"] = len(get_toi_list()) total_done, total_fail = 0, 0 for type in CATEGORISED_TOIS.keys(): numbers[f"{type}_done"] = len(successful_data[type].keys()) - 1 numbers[f"{type}_fail"] = len(failed_data[type]) total_done += numbers[f"{type}_done"] total_fail += numbers[f"{type}_fail"] numbers.update(dict(done=total_done, fail=total_fail)) return numbers def get_toi_category(notebook_path): toi_number = get_toi_number(notebook_path) for toi_type in ["single", "multi", "norm"]: if toi_number in CATEGORISED_TOIS[toi_type]: return toi_type raise ValueError(f"TOI{toi_number} is uncategorised.") def generate_page_data(notebook_regex): """ required data: - "number" dict with keys { done, fail, single, multi, norm, fail_single, fail_multi, fail_norm, done_single, done_norm, done_multi } - "successful_tois" dict of dict { "normal" {toi_link: toi_phase_plot}, "single" {toi_link: toi_phase_plot}, "multi" {toi_link: toi_phase_plot}, } - "failed_tois" dict of { "normal" [toi_link], "single" [toi_link] "multi" [toi_link] } """ notebook_files = sort_files(glob.glob(notebook_regex)) notebook_dir = os.path.dirname(notebook_regex) success_notebooks, failed_notebooks = split_notebooks( notebook_files, notebook_dir ) num_fail, num_pass = len(failed_notebooks), len(success_notebooks) successful_data = { k: {"TOI": ["Phase Plot"]} for k in CATEGORISED_TOIS.keys() } failed_data = {k: [] for k in CATEGORISED_TOIS.keys()} for notebook_path in success_notebooks: toi_data = render_toi_data(notebook_path) image_data = render_image_data(notebook_path, notebook_dir) toi_type = get_toi_category(notebook_path) successful_data[toi_type][toi_data] = image_data for notebook_path in failed_notebooks: toi_type = get_toi_category(notebook_path) failed_data[toi_type].append(render_toi_data(notebook_path)) number = generate_number_data(successful_data, failed_data) number["fail"], number["done"] = num_fail, num_pass return dict( number=number, successful_tois=successful_data, failed_tois=failed_data, ) def make_menu_page(notebook_regex, path_to_menu_page): page_data = generate_page_data(notebook_regex) page_contents = render_page_template(path_to_menu_page, page_data) with open(path_to_menu_page, "w") as f: f.write(page_contents) #------------------------------------------------ def main(): toi_regex = os.path.join(TOI_REGEX) make_menu_page( notebook_regex=toi_regex, path_to_menu_page=CUR_PAGE, ) print("Finished making template, building page") command = f"sphinx-build -b html -j auto {builddir} {builddir}/build -Q" subprocess.run(command, shell=True, check=True) print("DONE") main() # - command = f"sphinx-build -b html -j auto {builddir} {builddir}/build -Q" subprocess.run(command, shell=True, check=True) ```
avivajpeyi commented 1 year ago

See https://github.com/dfm/tess-atlas/blob/2a5da690c0e0f432fc4db165ecca70f1376a09c2/src/tess_atlas/notebook_controllers/controllers/menu_notebook_controller.py#L16