Open rwood-97 opened 11 months ago
I've played around with this tonight. Something along the lines of read_gexf
function format would be nice:
import json
import re
LABELS = ["Building", "No building"]
def read_labelstudio(file_path, labels=LABELS):
with open(file_path, "r") as f:
data = json.load(f)
rows = []
for file in data:
# Get patch name
patch_name = "-".join(data[0].get("file_upload").split("-")[1:])
# Get parent ID (parent map)
parent_id_detected = None
pattern = re.search(
r"map_(\d+)[^(_|\.)]", patch_name
)
if pattern:
parent_id_detected = pattern.group(0)
# Get choices from all annotators
annotations = file.get("annotations")
for annotation in annotations:
# completed_by = "annotationstudio-" + str(
# annotation.get("completed_by")
# )
all_choices = []
for result in annotation["result"]:
choices = result.get("value").get("choices")
if len(choices) > 1:
raise Warning("More than one choice selected")
choice = choices[0]
all_choices.append(choice)
if len(all_choices) > 1:
raise Warning("More than one choice selected")
label = all_choices[0]
rows.append(
{
"image_id": patch_name,
"parent_id": parent_id_detected,
"label": LABELS.index(label),
# "completed_by": completed_by,
}
)
return pd.DataFrame(rows)
data = read_labelstudio("../../../../Downloads/labelstudio-annotations.json")
Run Label Studio in Docker:
$ docker run -it -p 8080:8080 -v $(pwd)/mydata:/label-studio/data heartexlabs/label-studio:latest
Open your browser and navigate to http://localhost:8080
.
Set up an account for your Label Studio instance.
Invite more users (if needed):
0.0.0.0
with your server's IP address, and share it.Create a New Project:
Set up Labels:
Start Labeling:
Export Labels:
JSON Output:
file_upload
: Path to the image. ==note that this will be cleaned of any #
characters (and have a leading hash added to the name, both of which we'll have to deal with in our MapReader conventions...)annotations
: List of annotations for the image.I've also made a dump
-ish feature for the context images... Rough draft:
from itertools import product
import pandas as pd
import numpy as np
from PIL import Image, ImageOps
from pathlib import Path
SURROUNDING = 2
BORDER = 0
PATCH_SIZE = (100, 100)
def get_square(image_path, dim=True, border=False):
# Resize the image
im = Image.open(image_path)
# Dim the image
if dim in ["True", True]:
im_array = np.array(im)
im_array = 256 - (256 - im_array) * 0.4 # lighten image
im = Image.fromarray(im_array.astype(np.uint8))
if border in ["True", True] and BORDER:
w, h = im.size
im = ImageOps.expand(im, border=2, fill="red")
im = im.resize((w, h))
return im
def get_patch_dict(patch_directory):
return [
{
"relative_path": str(path),
"parent_map": path.stem.split("#")[1].strip(".png"),
"min_x": int(path.stem.split("-")[1]),
"min_y": int(path.stem.split("-")[2]),
"max_x": int(path.stem.split("-")[3]),
"max_y": int(path.stem.split("-")[4]),
}
for path in Path(patch_directory).glob("*.png")
]
def get_context_image(
images: list, patch_size: tuple[int, int], surrounding: int
):
"""Generates a context image.
Parameters
----------
patch_size : tuple[int, int]
Patch size in pixels as tuple of `(width, height)`.
surrounding : int
Number of surrounding patches to include in the context image.
"""
width, height = patch_size
total_width = (2 * surrounding + 1) * width
total_height = (2 * surrounding + 1) * height
context_image = Image.new(
"RGB", (total_width, total_height), color=(255, 255, 255)
)
y_offset = 0
for row in images:
x_offset = 0
for image in row:
context_image.paste(image, (x_offset, y_offset))
x_offset += width
y_offset += height
return context_image
def get_all_patches(path):
return pd.DataFrame(get_patch_dict(path))
def get_empty_square(patch_size: tuple[int, int]):
"""Generates an empty square image.
Parameters
----------
patch_size : tuple[int, int]
Patch size in pixels as tuple of `(width, height)`.
"""
im = Image.new(
size=patch_size,
mode="RGB",
color="white",
)
return im
def get_map(df, parent_map):
return df.query(f"parent_map == '{parent_map}'")
def get_image_list(items, ix):
ids = [x.index[0] if len(x.index) == 1 else None for x in items]
dim_bools = [x != ix for x in ids]
border_bools = [x == ix for x in ids]
image_paths = [
x.at[x.index[0], "relative_path"] if len(x.index) == 1 else None
for x in items
]
return list(zip(image_paths, dim_bools, border_bools))
def get_items(patch_df, ix):
min_y = patch_df.loc[ix, "min_y"]
min_x = patch_df.loc[ix, "min_x"]
height, width = PATCH_SIZE
deltas = list(range(-SURROUNDING, SURROUNDING + 1))
y_and_x = list(
product(
[min_y + y_delta * height for y_delta in deltas],
[min_x + x_delta * width for x_delta in deltas],
)
)
queries = [f"min_x == {x} & min_y == {y}" for y, x in y_and_x]
items = [patch_df.query(query) for query in queries]
return items
def get_images(image_list):
per_row = len(range(-SURROUNDING, SURROUNDING + 1))
images = [
[
(
get_square(image_path, dim=dim, border=border)
if image_path
else get_empty_square(PATCH_SIZE)
)
for image_path, dim, border in lst
]
for lst in np.array_split(image_list, per_row)
]
return images
def save_context_image(context_image, patch_df, ix, parent_dir):
parent_dir = Path(parent_dir)
parent_dir.mkdir(parents=True, exist_ok=True)
save_file = parent_dir / Path(patch_df.loc[ix, "relative_path"]).name
context_image.save(save_file)
def save_context_images(
patch_df, parent_dir, patch_size, surrounding, select="all"
):
"""
Saves context images for a given set of patches.
Parameters
----------
patch_df : pd.DataFrame
DataFrame containing patch information.
parent_dir : str
Directory to save context images to.
patch_size : tuple[int, int]
Patch size in pixels as tuple of `(width, height)`.
surrounding : int
Number of surrounding patches to include in the context image.
select : str
Selection method for patches. Options are:
- "all": All patches will be selected.
- "random-n": n random patches will be selected.
"""
indices = []
if select == "all":
indices = patch_df.index
if "random" in select:
select = select.split("-")
n = int(select[1])
indices = patch_df.sample(n).index
for ix in indices:
items = get_items(patch_df, ix)
image_list = get_image_list(items, ix)
images = get_images(image_list)
context_image = get_context_image(images, patch_size, surrounding)
save_context_image(context_image, patch_df, ix, parent_dir)
df = get_all_patches("./ipywidgets-test-patches")
first_map_id = [map_id for map_id, _ in df.groupby("parent_map").parent_map][0]
patch_df = get_map(df, first_map_id)
save_context_images(patch_df, "./test-context-images", PATCH_SIZE, SURROUNDING, select="random-10")
much reused code from Annotator here...in desperate need of clean up but some prototype code in there :)
Also see https://github.com/maps-as-data/MapReader/blob/main/mapreader/classify/datasets.py#L387 and for the context classifier (I think both annotator and context classisfier are aligned already but would be good to keep them aligned.
How this basically works is that you train a model using the context image not just the patch but we are hoping it would learn to classify based on the center of the image.
Is your feature request related to a problem? Please describe. This would be to enable users to load annotations from alternative annotation tools into the Annotator and to then dump the MapReader annotations into a LabelStudio format as output.
See #173 for more discussion.