pdphilip / laravel-elasticsearch

Laravel Elasticsearch: An Elasticsearch implementation of Laravel's Eloquent ORM
https://elasticsearch.pdphilip.com/
MIT License
94 stars 17 forks source link

Some issues with date format and date queries #22

Closed abkrim closed 6 months ago

abkrim commented 7 months ago

Package version

ex: v3.10.0 Elasticsearch: 8.12.1

Describe the bug

After some issues with queries with date times, create a test seeder to see the question.

To Reproduce

Steps to reproduce the behavior:

Create a index for test purposes

<?php

use Illuminate\Database\Migrations\Migration;
use PDPhilip\Elasticsearch\Schema\IndexBlueprint;
use PDPhilip\Elasticsearch\Schema\Schema;

return new class extends Migration
{
    public function up(): void
    {
        Schema::create('elastic_tests', function (IndexBlueprint $table) {
            $table->field('date', 'custom_date', [
                'format' => 'epoch_second||yyyy-MM-dd HH:mm:ss||yyyy-MM-dd',
                'ignore_malformed' => true,
            ]);
            $table->date('default_date');
            $table->field('date', 'default_date_forced', [
                'format' => 'strict_date_optional_time||epoch_millis',
                'ignore_malformed' => true,
            ]);
            $table->text('description');
            $table->date('epoch_date', 'epoch_second');
            $table->keyword('status_code');
        });
    }

    public function down(): void
    {
        Schema::deleteIfExists('elastic_tests');
    }
};

Create Model

<?php

namespace App\Models;

use Illuminate\Database\Eloquent\Factories\HasFactory;
use PDPhilip\Elasticsearch\Eloquent\Model;

class ElasticTest extends Model
{
    use HasFactory;

    protected $connection = 'elasticsearch';

    protected $index = 'elastic_tests';
}

Create factory

<?php

namespace Database\Factories;

use App\Models\ElasticTest;
use Illuminate\Database\Eloquent\Factories\Factory;
use Illuminate\Support\Carbon;

class ElasticTestFactory extends Factory
{
    protected $model = ElasticTest::class;

    public function definition(): array
    {
        $date = Carbon::now();

        $custom_status_arr = ['200', '200', '200', '200', '200', '200', '200', '200', '200', '500'];

        $custom_date = $custom_status_arr[array_rand($custom_status_arr, 1)];

        return [
            'description' => $this->faker->text(),
            'default_date' => $date,
            'default_date_forced' => $date, // 'strict_date_optional_time||epoch_millis
            'epoch_date' => $date->timestamp, // since epoch_second requires a timestamp
            'custom_date' => $date,
            'status_code' => $custom_date, // adjust as you need
            'created_at' => $date,
            'updated_at' => $date,
        ];
    }
}

Create seeder

<?php

namespace Database\Seeders;

use Database\Factories\ElasticTestFactory;
use Illuminate\Database\Seeder;
use Illuminate\Support\Carbon;

class ElasticTestSeeder extends Seeder
{
    public function run(): void
    {
        $startDate = Carbon::now()->startOfDay();

        for ($i = 0; $i < 1440; $i++) {
            // Set the current date time for the factory
            ElasticTestFactory::times(1)->state([
                'default_date' => $startDate,
                'epoch_date' => $startDate->timestamp,
                'custom_date' => $startDate,
                'default_date_forced' => $startDate,
                'created_at' => $startDate,
                'updated_at' => $startDate,
            ])->create();
            // Add 1 minute to the start time
            $startDate->addMinute();
        }

    }
}

Run migration

php artisan migrate

Check index mapping

{
  "elastic_tests": {
    "mappings": {
      "properties": {
        "custom_date": {
          "type": "date",
          "format": "epoch_second||yyyy-MM-dd HH:mm:ss||yyyy-MM-dd",
          "ignore_malformed": true
        },
        "default_date": {
          "type": "date"
        },
        "default_date_forced": {
          "type": "date",
          "ignore_malformed": true
        },
        "description": {
          "type": "text"
        },
        "epoch_date": {
          "type": "date",
          "format": "epoch_second"
        },
        "status_code": {
          "type": "keyword"
        }
      }
    }
  }
}

Run migrations

a db:seed --class=ElasticTestSeeder

Expected behavior

Working pass

> ElasticTest::where('default_date_forced', '>=', Carbon::parse(1712059200))->count();
= 720
> ElasticTest::where('default_date', '>=', Carbon::parse(1712059200))->count();
> 720

default_date and default_date_forced acceptable value timestamp

They both fail. Show all instead of half.

> ElasticTest::where('default_date', '>=', 1712059200)->count();
= 1440

> ElasticTest::where('default_date', '>=', '1712059200')->count();
= 1440

epoch_date fails with timestamp if not casting to string

> ElasticTest::where('epoch_date', '>=', Carbon::parse(1712059200)->timestamp)->count();
= 1440
> ElasticTest::where('epoch_date', '>=', (string) Carbon::parse(1712059200)->timestamp)->count();
= 720
> ElasticTest::where('epoch_date', '>=', (string) 1712059200)->count();
= 720
> ElasticTest::where('epoch_date', '>=', 1712059200)->count();
= 1440

Screenshots: If applicable, add screenshots to help explain your problem.

Component Versions (Paste in the require section from your composer.json file):

  "require": {
    "php": "^8.1",
    "ext-pdo": "*",
    "ext-redis": "*",
    "ext-zlib": "*",
    "archtechx/enums": "^0.3.0",
    "babenkoivan/elastic-scout-driver": "^3.0",
    "babenkoivan/elastic-scout-driver-plus": "^4.2",
    "barryvdh/laravel-dompdf": "^2.0",
    "binarytorch/larecipe": "^2.5",
    "codeat3/blade-google-material-design-icons": "^1.18",
    "codeat3/blade-iconpark": "^1.5",
    "codeat3/blade-jam-icons": "^1.5",
    "dompdf/dompdf": "^2.0",
    "elasticsearch/elasticsearch": "^8.10",
    "filament/filament": "^3.0-stable",
    "geokit/geokit": "^1.3",
    "guzzlehttp/guzzle": "^7.2",
    "laravel-notification-channels/telegram": "^4.0",
    "laravel/framework": "^10.0",
    "laravel/horizon": "^5.17",
    "laravel/pail": "^1.0",
    "laravel/prompts": "^0.1.13",
    "laravel/sanctum": "^3.2",
    "laravel/scout": "^9.8",
    "laravel/tinker": "^2.7",
    "livewire/livewire": "^3.0",
    "lorisleiva/laravel-actions": "^2.4",
    "maatwebsite/excel": "^3.1",
    "matanyadaev/laravel-eloquent-spatial": "^3.1",
    "novadaemon/filament-pretty-json": "^2.0",
    "owen-it/laravel-auditing": "^13.3",
    "pdphilip/elasticsearch": "~3.10",
    "predis/predis": "^2.2",
    "ramsey/uuid": "^4.7",
    "robinvdvleuten/ulid": "^5.0",
    "silber/bouncer": "^1.0",
    "spatie/laravel-backup": "^8.4",
    "spatie/laravel-data": "^2.1",
    "spatie/laravel-health": "^1.27",
    "spatie/laravel-query-builder": "^5.1",
    "spatie/laravel-rate-limited-job-middleware": "^2.2",
    "spatie/laravel-ray": "^1.35",
    "spatie/laravel-settings": "^2.8",
    "spatie/laravel-translatable": "^6.1",
    "spatie/simple-excel": "^3.0",
    "vlucas/phpdotenv": "^5.4"
  },

Additional context:

I don't know if I'm right, but well, I preferred to spend some time, after the last re-examination, being asked by my team about the methodology, since it was necessary:

And certainly after reindexing 100 million docs, I found that I had not done enough testing)

Acknowledgments

pdphilip commented 7 months ago

Have run your tests and these seem to fall into elasticsearch quirks. The package just makes the DSL queries but you will still need to pass in the correctly typed values and make sure you save the correct values.

If set ignore_malformed to true then ES will save whatever value you give it, but you may not be able to query it since it will effectively be unmapped. I would avoid that to ensure you have the correct format.

For timestamps, you'll need to pass it in as a string or in milliseconds if you want to use int, I don't know why, but you can test directly on ES:

Screenshot 2024-04-03 at 13 51 03 Screenshot 2024-04-03 at 13 51 09 Screenshot 2024-04-03 at 13 51 41

Given your mapping - the below queries all work:

$test = [];
$test[] = ElasticTest::where('custom_date', '>=', (string)$ts)->count();
$test[] = ElasticTest::where('custom_date', '>=', $ts * 1000)->count();
$test[] = ElasticTest::where('custom_date', '>=', Carbon::parse($ts)->format('Y-m-d H:i:s'))->count();

$test[] = ElasticTest::where('default_date', '>=', Carbon::parse($ts))->count();
$test[] = ElasticTest::where('default_date', '>=', $ts * 1000)->count();

$test[] = ElasticTest::where('default_date_forced', '>=', Carbon::parse($ts))->count();
$test[] = ElasticTest::where('default_date_forced', '>=', Carbon::parse($ts)->getTimestampMs())->count();
$test[] = ElasticTest::where('default_date_forced', '>=', $ts * 1000)->count();

$test[] = ElasticTest::where('epoch_date', '>=', $ts * 1000)->count();
$test[] = ElasticTest::where('epoch_date', '>=', Carbon::parse($ts)->getTimestampMs())->count();
$test[] = ElasticTest::where('epoch_date', '>=', (string)$ts)->count();

I would just stick to saving date-time directly with Carbon, and query using Cabon. It's consistent, works with timezones and easy to work with.

I can't find the bug within this package - just weird Elasticsearch formatting. Did I miss something?

abkrim commented 7 months ago

Hello Much appreciate your time.

The truth is that the topic is a real headache, especially when it comes to indices with a huge amount of data.

In the next remapping that I am preparing, after consulting the endpoints and the other developers on how they are using JS and the API, I consult them and see that everyone uses timestamp and that it is only the project manager who needs accessibility via eloquent or ISO type formats, I will do what you say in your manual.

Use the default value, since the ignore_malformed is already outdated from two years ago because a method was put in place to clean the logs with malformed data, so there is no data with malformed data.

This change, given the implication and the fact that it is about production, will force me to do many tests, which I hope pass properly.

Cuando termine, añadiré aqui, mis resultados e impresiones

Once again, my greatest thanks and gratitude for such a productive package and your time.

pdphilip commented 7 months ago

Thanks @abkrim, your feedback is great and useful for developing a more robust package.

I'm going to make this an enhancement issue and convert whereDate() to work with carbon input (and validate accordingly), then add a new method whereTimestamp() whereby I can intercept the input and ensure the correct type casting. I'll keep this open until this feature has been published. Feel free to comment further here until then.

pdphilip commented 6 months ago

Hi @abkrim, new releases have been published that include whereTimestamp()

Thanks