null test - Githubissues

Processing "OTG AnSat Sine F_F1.wav" should in theory yield 256 segments of 2048 samples each. In fact, e_seg produces an extra segment (257 ttl) by splitting the first wavecycle to an up and a down halfcycle. that is supposed to be dealt with by combining them to make one less segment, i.e. 256, but when we interpolate we get a first sample that is a double cycle, and another sample, perhaps the last, missing too, to make 254 samples! so the interpolation must make 254 samples into 256, which could introduce problems, distortions, maybe even aliasing.

I think the problem is the hacky solution to the first and last wavecycles and the root cause needs to be addressed instead of fixing afteerwards. so why does it split up that first wavecycle, and where did ht missing one go?

here is e_seg:

import os
import numpy as np
import soundfile as sf
import re
from aa_common import (input_with_quit, input_with_defaults, 
    initialize_settings, update_settings, get_tmp_folder, 
    set_all_segment_sizes, get_base, ensure_tmp_folder, 
    get_wavecycle_samples_target, set_wavecycle_samples_target,
    is_rising_zero_crossing, set_wavecycle_samples
)

def run_segment(file_path, settings):
    """
    This function segments the audio file into wavecycle segments and saves them.
    """
    # Check if the file exists (this includes the full path)
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}. Ensure the prep file is not deleted.")

    # Read the file
    data, samplerate = sf.read(file_path)

    base = os.path.splitext(os.path.basename(file_path))[0]
    tmp_folder = get_tmp_folder()
    ext = ".wav"

    # Use the most recent settings
    wavecycle_samples_target = get_wavecycle_samples_target()

    # Ensure tmp folder is created
    ensure_tmp_folder()

    segment_sizes = []  # Initialize the list to hold segment sizes
    prev_start_index = 0  # Start from the beginning

    # print(f"Starting segmentation for {file_path}")

    # Iterate over the data to segment it
    for i in range(1, len(data)):
        # Debugging: Show progress through the data
        if i % 10000 == 0:
            continue
        if is_rising_zero_crossing(data, i):
            wave_cycle = data[prev_start_index:i]
            prev_start_index = i  # Advance to the new start index

            # Minimum wave cycle length to save as a valid segment
            if len(wave_cycle) > 64:
                segment_name = f"{base}_seg_{len(segment_sizes):04d}{ext}"
                # print(f"Saving segment: {segment_name}, length: {len(wave_cycle)} samples")

                segment_sizes.append((segment_name, len(wave_cycle)))
                tmp_base_seg_path = os.path.join(tmp_folder, segment_name)

                # Try saving the file and catch any issues
                try:
                    sf.write(tmp_base_seg_path, wave_cycle, samplerate=192000, format='WAV', subtype='FLOAT')
                    # print(f"Segment saved: {tmp_base_seg_path}")
                except Exception as e:
                    print(f"Error saving segment {segment_name}: {e}")

    # print(f"Finished segmentation for {file_path}")
    return segment_sizes

def add_wavecycle_segment(segment_name, sample_count):
    set_wavecycle_samples(segment_name, sample_count)

def check_and_combine_first_segments(first_segment_path, second_segment_path):
    """
    This function checks and concatenates the first two segments (seg_0000 and seg_0001).
    It combines them into a single segment (seg_0001), deletes seg_0000, and ensures 
    that the result forms a valid full wavecycle.
    """
    # Ensure both files exist
    if not os.path.exists(first_segment_path) or not os.path.exists(second_segment_path):
        print(f"One or both segments are missing: {first_segment_path}, {second_segment_path}")
        return False

    # Read the data from the first two segments
    first_data, sr = sf.read(first_segment_path)
    second_data, _ = sf.read(second_segment_path)

    # Concatenate the first and second segments
    combined_data = np.concatenate((first_data, second_data))

    # Save the combined segment as seg_0001 (using the second segment's name)
    combined_segment_path = second_segment_path
    sf.write(combined_segment_path, combined_data, sr)

    # Remove the old seg_0000
    os.remove(first_segment_path)
    # print(f"Removed old {first_segment_path}")

    return True

def run(processed_files):
    # Initialize settings with defaults and update them
    settings = initialize_settings()
    settings = update_settings(settings)

    tmp_folder = get_tmp_folder()
    subfolder_192k = os.path.join(tmp_folder, "192k")

    segment_sizes = []

    # Use the files from the 192k subfolder
    for file_path in processed_files:
        full_path = os.path.normpath(os.path.join(subfolder_192k, os.path.basename(file_path)))
        # print(f"parsing {full_path}")
        new_segment_sizes = []

        if os.path.exists(full_path):
            new_segment_sizes = run_segment(full_path, settings)

        for segment_name, sample_count in new_segment_sizes:
            add_wavecycle_segment(segment_name, sample_count)

        segment_sizes.extend(new_segment_sizes)

    # Get the base and regex pattern
    base = get_base()

    # Regex pattern to account for extra characters before _seg_0000 and _seg_0001
    segment_pattern = re.compile(rf"{re.escape(base)}.*_seg_000[01]\.wav")

    # List files in tmp_folder and match the ones we care about
    all_files = os.listdir(tmp_folder)
    first_segment_path = None
    second_segment_path = None

    for filename in all_files:
        if segment_pattern.match(filename):
            if '_seg_0000.wav' in filename:
                first_segment_path = os.path.normpath(os.path.join(tmp_folder, filename))
            elif '_seg_0001.wav' in filename:
                second_segment_path = os.path.normpath(os.path.join(tmp_folder, filename))

    # Print and log matched paths
    # print(f"First segment path (matched): {first_segment_path}")
    # print(f"Second segment path (matched): {second_segment_path}")

    # Check if the files exist after segmentation
    if first_segment_path and second_segment_path and os.path.exists(first_segment_path) and os.path.exists(second_segment_path):
        # print(f"Files exist: {first_segment_path}, {second_segment_path}")

        # Call the function to check and combine the first two segments, passing the correct paths
        check_and_combine_first_segments(first_segment_path, second_segment_path)
    else:
        print(f"Error: One or both files are missing after segmentation: {first_segment_path}, {second_segment_path}")

    # Store segment_sizes
    set_all_segment_sizes(segment_sizes)

rcrath / wvtbl

null test #50