kirin-ri / memo

0 stars 0 forks source link

test #26

Open kirin-ri opened 5 months ago

kirin-ri commented 5 months ago

dd

kirin-ri commented 5 months ago
        # 前処理
        # データ読み込み
        # 設備稼働時間
        df = d2s.readData(
            self.spark,
            self.dbutils,
            sfDatabase="ECM_DEV",
            sfSchema="PUBLIC",
            query=f"""
                SELECT
                    EQUIPMENT_ID,
                    OPERATION_START_DATE,
                    OPERATION_END_DATE,
                    OPERATION_TIME
                FROM
                    ECM_RAW_OPERATION_TIME
                WHERE
                    (OPERATION_START_DATE <= '{end_time_str}'
                    AND (OPERATION_END_DATE >= '{start_time_str}'
                    OR OPERATION_END_DATE IS NULL))
                    OR (OPERATION_START_DATE <= '{start_time_str}'
                    AND OPERATION_END_DATE IS NULL)
        """,
        )

        # データ加工
        # タイムスタンプ型に変換
        date_range = [start_date + datetime.timedelta(days=x)
                      for x in range((end_date - start_date).days + 1)]
        for date in date_range:
kirin-ri commented 5 months ago
# start_dateとend_dateの決定
df.createOrReplaceTempView("operation_time")

# start_dateの決定
start_date_query = """
    SELECT MIN(OPERATION_START_DATE) AS start_date
    FROM operation_time
"""
start_date_row = self.spark.sql(start_date_query).collect()[0]
start_date = start_date_row["start_date"]

# end_dateの決定
end_date_query = """
    SELECT MAX(OPERATION_END_DATE) AS end_date
    FROM operation_time
"""
end_date_row = self.spark.sql(end_date_query).collect()[0]
end_date = end_date_row["end_date"]

# OPERATION_END_DATEがNULLの場合は今日の日付をend_dateとして設定
if end_date is None:
    end_date = datetime.datetime.now()
kirin-ri commented 5 months ago
# 日付のフォーマットを変換
for record in data:
    record["OPERATION_START_DATE"] = datetime.datetime.strptime(record["OPERATION_START_DATE"], "%Y-%m-%d")
    if record["OPERATION_END_DATE"]:
        record["OPERATION_END_DATE"] = datetime.datetime.strptime(record["OPERATION_END_DATE"], "%Y-%m-%d")
    else:
        record["OPERATION_END_DATE"] = None

# start_dateの決定
start_date = min(record["OPERATION_START_DATE"] for record in data)

# end_dateの決定
end_date = max(record["OPERATION_END_DATE"] for record in data if record["OPERATION_END_DATE"] is not None)

# OPERATION_END_DATEがNULLの場合は今日の日付をend_dateとして設定
if end_date is None:
    end_date = datetime.datetime.now()
else:
    end_date = max(end_date, datetime.datetime.now())

# 日付範囲の作成
date_range = [start_date + datetime.timedelta(days=x) for x in range((end_date - start_date).days + 1)]

# 結果表示
print("Start Date:", start_date)
print("End Date:", end_date)
print("Date Range:", date_range)
kirin-ri commented 5 months ago
        df = d2s.readData(
            self.spark,
            self.dbutils,
            sfDatabase="ECM_DEV",
            sfSchema="PUBLIC",
            query=f"""
                SELECT
                    EQUIPMENT_ID,
                    OPERATION_START_DATE,
                    OPERATION_END_DATE,
                    OPERATION_TIME
                FROM
                    ECM_RAW_OPERATION_TIME
                WHERE
                    (OPERATION_START_DATE <= '{end_time_str}'
                    AND (OPERATION_END_DATE >= '{start_time_str}'
                    OR OPERATION_END_DATE IS NULL))
                    OR (OPERATION_START_DATE <= '{start_time_str}'
                    AND OPERATION_END_DATE IS NULL)
        """,
        )

        # データ加工
        # タイムスタンプ型に変換
        date_range = [start_date + datetime.timedelta(days=x)
                      for x in range((end_date - start_date).days + 1)]
kirin-ri commented 5 months ago

ValueError: unconverted data remains: T04:12:34Z

kirin-ri commented 5 months ago
# 日付のフォーマットを変換
for record in data:
    record["OPERATION_START_DATE"] = datetime.datetime.strptime(record["OPERATION_START_DATE"], "%Y-%m-%dT%H:%M:%SZ")
    if record["OPERATION_END_DATE"]:
        record["OPERATION_END_DATE"] = datetime.datetime.strptime(record["OPERATION_END_DATE"], "%Y-%m-%dT%H:%M:%SZ")
    else:
        record["OPERATION_END_DATE"] = None

# start_dateの決定
start_date = min(record["OPERATION_START_DATE"] for record in data)

# end_dateの決定
end_date = max(record["OPERATION_END_DATE"] for record in data if record["OPERATION_END_DATE"] is not None)

# OPERATION_END_DATEがNULLの場合は今日の日付をend_dateとして設定
if end_date is None:
    end_date = datetime.datetime.now()
else:
    end_date = max(end_date, datetime.datetime.now())

# 日付範囲の作成
date_range = [start_date + datetime.timedelta(days=x) for x in range((end_date - start_date).days + 1)]

# 結果表示
print("Start Date:", start_date)
print("End Date:", end_date)
print("Date Range:", date_range)
kirin-ri commented 5 months ago

Row' object does not support item assignment

kirin-ri commented 5 months ago
# OPERATION_START_DATEとOPERATION_END_DATEの最小値と最大値を取得
start_date = df.select(spark_min(col("OPERATION_START_DATE"))).collect()[0][0]
end_date = df.select(spark_max(col("OPERATION_END_DATE"))).collect()[0][0]

# OPERATION_END_DATEがNULLの場合は今日の日付をend_dateとして設定
if end_date is None:
    end_date = datetime.datetime.now()
else:
    end_date = max(end_date, datetime.datetime.now())

# 日付のフォーマットを変換
start_date = datetime.datetime.strptime(start_date, "%Y-%m-%dT%H:%M:%SZ")
if end_date is not None:
    end_date = datetime.datetime.strptime(end_date, "%Y-%m-%dT%H:%M:%SZ")

# 日付範囲の作成
date_range = [start_date + datetime.timedelta(days=x) for x in range((end_date - start_date).days + 1)]

# 結果表示
print("Start Date:", start_date)
print("End Date:", end_date)
print("Date Range:", date_range)
kirin-ri commented 5 months ago
# OPERATION_START_DATEの最小値を取得
start_date = df.select(spark_min(col("OPERATION_START_DATE"))).collect()[0][0]

# OPERATION_END_DATEの最大値を取得、NULLがある場合は現在の日付を使用
max_end_date = df.select(spark_max(col("OPERATION_END_DATE"))).collect()[0][0]
null_end_date_count = df.filter(col("OPERATION_END_DATE").isNull()).count()

# 日付のフォーマットを変換
start_date = datetime.datetime.strptime(start_date, "%Y-%m-%dT%H:%M:%SZ")

if null_end_date_count > 0:
    end_date = datetime.datetime.now()
else:
    end_date = datetime.datetime.strptime(max_end_date, "%Y-%m-%dT%H:%M:%SZ")
    end_date = max(end_date, datetime.datetime.now())
kirin-ri commented 5 months ago
agg_result = df.agg(
    spark_min(col("OPERATION_START_DATE")).alias("min_start_date"),
    spark_max(col("OPERATION_END_DATE")).alias("max_end_date")
).collect()[0]

start_date = agg_result["min_start_date"]
max_end_date = agg_result["max_end_date"]

# 日付のフォーマットを変換
start_date = datetime.datetime.strptime(start_date, "%Y-%m-%dT%H:%M:%SZ")

# OPERATION_END_DATEがNULLの場合は今日の日付をend_dateとして設定
if max_end_date is None:
    end_date = datetime.datetime.now()
else:
    end_date = datetime.datetime.strptime(max_end_date, "%Y-%m-%dT%H:%M:%SZ")
    end_date = max(end_date, datetime.datetime.now())
kirin-ri commented 5 months ago
# OPERATION_START_DATEの最小値を取得
start_date = df.select(spark_min(col("OPERATION_START_DATE")).alias("min_start_date")).collect()[0][0]

# OPERATION_END_DATEの最大値を取得し、NULLがある場合は現在の日付を設定
current_date_str = datetime.datetime.now().strftime("%Y-%m-%dT%H:%M:%S")
max_end_date_df = df.withColumn("OPERATION_END_DATE", coalesce(col("OPERATION_END_DATE"), lit(current_date_str)))

end_date = max_end_date_df.select(spark_max(col("OPERATION_END_DATE")).alias("max_end_date")).collect()[0][0]

# 日付のフォーマットを変換
start_date = datetime.datetime.strptime(start_date, "%Y-%m-%dT%H:%M:%SZ")
end_date = datetime.datetime.strptime(end_date, "%Y-%m-%dT%H:%M:%S")
kirin-ri commented 5 months ago

日付のフォーマットを変換

start_date = datetime.datetime.strptime(start_date, "%Y-%m-%dT%H:%M:%SZ") start_date = start_date.replace(hour=0, minute=0, second=0, microsecond=0)

end_date = datetime.datetime.strptime(end_date, "%Y-%m-%dT%H:%M:%S") end_date = end_date.replace(hour=23, minute=59, second=59, microsecond=999999)

kirin-ri commented 5 months ago
# 基準日算出
if len(fromPeriod) > 0:
    start_date = datetime.datetime.strptime(fromPeriod,
                                            '%Y-%m-%d %H:%M:%S')
    end_date = datetime.datetime.strptime(toPeriod,
                                            '%Y-%m-%d %H:%M:%S')
else:
    today = datetime.datetime.today()
    start_date = datetime.datetime.strptime(today,
                                            '%Y-%m-%d %H:%M:%S')
    end_date = datetime.datetime.strptime(today,
                                            '%Y-%m-%d %H:%M:%S')

start_time = datetime.datetime(start_date.year, start_date.month,
                                start_date.day, 0, 0, 0)
end_time = datetime.datetime(end_date.year, end_date.month,
                                end_date.day, 23, 59, 59)

# タイムスタンプ文字列に変換
start_time_str = start_time.strftime('%Y-%m-%dT%H:%M:%SZ')
end_time_str = end_time.strftime('%Y-%m-%dT%H:%M:%SZ')

# 前処理
# データ読み込み
# 設備稼働時間
df = d2s.readData(
    # self.spark,
    # self.dbutils,
    spark,
    dbutils,
    sfDatabase="ECM_DEV",
    sfSchema="PUBLIC",
    query=f"""
        SELECT
            EQUIPMENT_ID,
            OPERATION_START_DATE,
            OPERATION_END_DATE,
            OPERATION_TIME
        FROM
            ECM_RAW_OPERATION_TIME
        WHERE
            (OPERATION_START_DATE <= '{end_time_str}'
            AND (OPERATION_END_DATE >= '{start_time_str}' OR OPERATION_END_DATE IS NULL))
            OR (OPERATION_START_DATE <= '{start_time_str}' AND OPERATION_END_DATE IS NULL)
    """,
)

display(df)

# データ加工
# OPERATION_START_DATEの最小値を取得
start_date = df.select(_min(col("OPERATION_START_DATE")).alias("min_start_date")).collect()[0][0]

# OPERATION_END_DATEの最大値を取得し、NULLがある場合は現在の日付を設定
current_date_str = datetime.datetime.now().strftime("%Y-%m-%dT%H:%M:%S")
max_end_date_df = df.withColumn("OPERATION_END_DATE", coalesce(col("OPERATION_END_DATE"), lit(current_date_str)))

end_date = max_end_date_df.select(_max(col("OPERATION_END_DATE")).alias("max_end_date")).collect()[0][0]

# 日付のフォーマットを変換
start_date = datetime.datetime.strptime(start_date, "%Y-%m-%dT%H:%M:%SZ")
start_date = start_date.replace(hour=0, minute=0, second=0)
end_date = datetime.datetime.strptime(end_date, "%Y-%m-%dT%H:%M:%S")
end_date = end_date.replace(hour=23, minute=59, second=59)
print("Start Date:", start_date)
print("End Date:", end_date)
kirin-ri commented 5 months ago
# データ加工
# OPERATION_START_DATEの最小値を取得
operation_start_date = df.select(spark_min(col("OPERATION_START_DATE")).alias("min_start_date")).collect()[0][0]

# OPERATION_END_DATEの最大値を取得し、NULLがある場合は現在の日付を設定
current_date_str = datetime.datetime.now().strftime("%Y-%m-%dT%H:%M:%S")
max_end_date_df = df.withColumn("OPERATION_END_DATE", coalesce(col("OPERATION_END_DATE"), lit(current_date_str)))

operation_end_date = max_end_date_df.select(spark_max(col("OPERATION_END_DATE")).alias("max_end_date")).collect()[0][0]

# 日付のフォーマットを変換
operation_start_date = datetime.datetime.strptime(operation_start_date, "%Y-%m-%dT%H:%M:%SZ")
operation_end_date = datetime.datetime.strptime(operation_end_date, "%Y-%m-%dT%H:%M:%S")

# 基準日と比較して最終的な開始日と終了日を設定
final_start_date = min(start_date, operation_start_date)
final_end_date = max(end_date, operation_end_date)

# 開始日と終了日をそれぞれ0時0分0秒と23時59分59秒に設定
final_start_date = final_start_date.replace(hour=0, minute=0, second=0, microsecond=0)
final_end_date = final_end_date.replace(hour=23, minute=59, second=59, microsecond=999999)

# 日付範囲の作成
date_range = [final_start_date + datetime.timedelta(days=x) for x in range((final_end_date - final_start_date).days + 1)]
kirin-ri commented 5 months ago
# OPERATION_START_DATEの最小値を取得
operation_start_date = df.select(spark_min(col("OPERATION_START_DATE")).alias("min_start_date")).collect()[0][0]

# OPERATION_END_DATEの最大値を取得し、NULLがある場合は現在の日付を設定
current_date_str = datetime.datetime.now().strftime("%Y-%m-%dT%H:%M:%S")
max_end_date_df = df.withColumn("OPERATION_END_DATE", coalesce(col("OPERATION_END_DATE"), lit(current_date_str)))

operation_end_date = max_end_date_df.select(spark_max(col("OPERATION_END_DATE")).alias("max_end_date")).collect()[0][0]

# 日付のフォーマットを変換
operation_start_date = datetime.datetime.strptime(operation_start_date, "%Y-%m-%dT%H:%M:%SZ")
operation_end_date = datetime.datetime.strptime(operation_end_date, "%Y-%m-%dT%H:%M:%S")

# 基準日と比較して最終的な開始日と終了日を設定
final_start_date = max(start_date, operation_start_date)
final_end_date = min(end_date, operation_end_date)

# 開始日と終了日をそれぞれ0時0分0秒と23時59分59秒に設定
final_start_date = final_start_date.replace(hour=0, minute=0, second=0, microsecond=0)
final_end_date = final_end_date.replace(hour=23, minute=59, second=59, microsecond=999999)

# 日付範囲の作成
date_range = [final_start_date + datetime.timedelta(days=x) for x in range((final_end_date - final_start_date).days + 1)]
kirin-ri commented 5 months ago

設備A 2024-05-01T04:12:34Z 2024-05-01T05:45:12Z 設備A 2024-05-01T10:00:16Z 2024-05-01T11:30:20Z 設備A 2024-04-30T22:00:10Z 2024-05-01T01:45:32Z 設備A 2024-05-01T22:00:10Z 2024-05-02T01:45:32Z 設備A 2024-04-28T05:12:24Z 2024-04-30T08:25:37Z 設備B 2024-05-01T08:45:00Z 2024-05-01T12:00:00Z 設備B 2024-05-01T04:12:34Z 2024-05-01T05:45:12Z 設備B 2024-04-28T08:45:00Z 2024-04-28T12:00:00Z 設備B 2024-05-02T04:12:34Z 2024-05-02T05:45:12Z 設備C 2024-04-27T08:14:24Z
設備D 2024-05-01T08:14:24Z

kirin-ri commented 5 months ago

設備A 2024-04-28 00:00:00 18.79333333 設備A 2024-04-29 00:00:00 23.99972222 設備A 2024-04-30 00:00:00 10.42416667 設備A 2024-05-01 00:00:00 6.801111111 設備A 2024-05-02 00:00:00 1.758888889 設備B 2024-04-28 00:00:00 3.25 設備B 2024-05-01 00:00:00 4.793888889 設備B 2024-05-02 00:00:00 1.543888889 設備C 2024-04-27 00:00:00 15.76 設備C 2024-04-28 00:00:00 23.99972222 設備C 2024-04-29 00:00:00 23.99972222 設備C 2024-04-30 00:00:00 23.99972222 設備C 2024-05-01 00:00:00 23.99972222 設備C 2024-05-02 00:00:00 23.99972222 設備C 2024-05-03 00:00:00 23.99972222 設備C 2024-05-04 00:00:00 23.99972222 設備C 2024-05-05 00:00:00 23.99972222 設備C 2024-05-06 00:00:00 23.99972222 設備C 2024-05-07 00:00:00 23.99972222 設備C 2024-05-08 00:00:00 23.99972222 設備C 2024-05-09 00:00:00 23.99972222 設備C 2024-05-10 00:00:00 23.99972222 設備C 2024-05-11 00:00:00 23.99972222 設備C 2024-05-12 00:00:00 23.99972222 設備C 2024-05-13 00:00:00 23.99972222 設備C 2024-05-14 00:00:00 23.99972222 設備C 2024-05-15 00:00:00 23.99972222 設備C 2024-05-16 00:00:00 23.99972222 設備C 2024-05-17 00:00:00 23.99972222 設備C 2024-05-18 00:00:00 23.99972222 設備C 2024-05-19 00:00:00 23.99972222 設備C 2024-05-20 00:00:00 23.99972222 設備C 2024-05-21 00:00:00 23.99972222 設備C 2024-05-22 00:00:00 23.99972222 設備C 2024-05-23 00:00:00 23.99972222 設備C 2024-05-24 00:00:00 23.99972222 設備C 2024-05-25 00:00:00 23.99972222 設備C 2024-05-26 00:00:00 23.99972222 設備C 2024-05-27 00:00:00 23.99972222 設備C 2024-05-28 00:00:00 23.99972222 設備C 2024-05-29 00:00:00 23.99972222 設備C 2024-05-30 00:00:00 23.99972222 設備C 2024-05-31 00:00:00 23.99972222 設備C 2024-06-01 00:00:00 23.99972222 設備C 2024-06-02 00:00:00 23.99972222 設備C 2024-06-03 00:00:00 23.99972222 設備C 2024-06-04 00:00:00 23.99972222 設備C 2024-06-05 00:00:00 23.99972222 設備C 2024-06-06 00:00:00 23.99972222 設備C 2024-06-07 00:00:00 23.99972222 設備C 2024-06-08 00:00:00 23.99972222 設備C 2024-06-09 00:00:00 23.99972222 設備C 2024-06-10 00:00:00 23.99972222 設備C 2024-06-11 00:00:00 23.99972222 設備C 2024-06-12 00:00:00 23.99972222 設備C 2024-06-13 00:00:00 23.99972222 設備C 2024-06-14 00:00:00 23.99972222 設備C 2024-06-15 00:00:00 23.99972222 設備C 2024-06-16 00:00:00 23.99972222 設備C 2024-06-17 00:00:00 23.99972222 設備C 2024-06-18 00:00:00 23.99972222 設備C 2024-06-19 00:00:00 23.99972222 設備C 2024-06-20 00:00:00 23.99972222 設備C 2024-06-21 00:00:00 23.99972222 設備C 2024-06-22 00:00:00 23.99972222 設備C 2024-06-23 00:00:00 23.99972222 設備C 2024-06-24 00:00:00 23.99972222 設備C 2024-06-25 00:00:00 0 設備D 2024-05-01 00:00:00 15.76 設備D 2024-05-02 00:00:00 23.99972222 設備D 2024-05-03 00:00:00 23.99972222 設備D 2024-05-04 00:00:00 23.99972222 設備D 2024-05-05 00:00:00 23.99972222 設備D 2024-05-06 00:00:00 23.99972222 設備D 2024-05-07 00:00:00 23.99972222 設備D 2024-05-08 00:00:00 23.99972222 設備D 2024-05-09 00:00:00 23.99972222 設備D 2024-05-10 00:00:00 23.99972222 設備D 2024-05-11 00:00:00 23.99972222 設備D 2024-05-12 00:00:00 23.99972222 設備D 2024-05-13 00:00:00 23.99972222 設備D 2024-05-14 00:00:00 23.99972222 設備D 2024-05-15 00:00:00 23.99972222 設備D 2024-05-16 00:00:00 23.99972222 設備D 2024-05-17 00:00:00 23.99972222 設備D 2024-05-18 00:00:00 23.99972222 設備D 2024-05-19 00:00:00 23.99972222 設備D 2024-05-20 00:00:00 23.99972222 設備D 2024-05-21 00:00:00 23.99972222 設備D 2024-05-22 00:00:00 23.99972222 設備D 2024-05-23 00:00:00 23.99972222 設備D 2024-05-24 00:00:00 23.99972222 設備D 2024-05-25 00:00:00 23.99972222 設備D 2024-05-26 00:00:00 23.99972222 設備D 2024-05-27 00:00:00 23.99972222 設備D 2024-05-28 00:00:00 23.99972222 設備D 2024-05-29 00:00:00 23.99972222 設備D 2024-05-30 00:00:00 23.99972222 設備D 2024-05-31 00:00:00 23.99972222 設備D 2024-06-01 00:00:00 23.99972222 設備D 2024-06-02 00:00:00 23.99972222 設備D 2024-06-03 00:00:00 23.99972222 設備D 2024-06-04 00:00:00 23.99972222 設備D 2024-06-05 00:00:00 23.99972222 設備D 2024-06-06 00:00:00 23.99972222 設備D 2024-06-07 00:00:00 23.99972222 設備D 2024-06-08 00:00:00 23.99972222 設備D 2024-06-09 00:00:00 23.99972222 設備D 2024-06-10 00:00:00 23.99972222 設備D 2024-06-11 00:00:00 23.99972222 設備D 2024-06-12 00:00:00 23.99972222 設備D 2024-06-13 00:00:00 23.99972222 設備D 2024-06-14 00:00:00 23.99972222 設備D 2024-06-15 00:00:00 23.99972222 設備D 2024-06-16 00:00:00 23.99972222 設備D 2024-06-17 00:00:00 23.99972222 設備D 2024-06-18 00:00:00 23.99972222 設備D 2024-06-19 00:00:00 23.99972222 設備D 2024-06-20 00:00:00 23.99972222 設備D 2024-06-21 00:00:00 23.99972222 設備D 2024-06-22 00:00:00 23.99972222 設備D 2024-06-23 00:00:00 23.99972222 設備D 2024-06-24 00:00:00 23.99972222 設備D 2024-06-25 00:00:00 0

kirin-ri commented 4 months ago

: unconverted data remains: 23:59:59

kirin-ri commented 4 months ago

fromPeriod = '2024-04-30 00:00:00' toPeriod = '2025-05-01 23:59:59'

kirin-ri commented 4 months ago
        SELECT
            EQUIPMENT_ID,
            REF_DATE,
            OPERATION_TIME
        FROM
            CONV_RAW_OPERATION_TIME_DAILY
        WHERE
            REF_DATE BETWEEN TO_TIMESTAMP('{refdate}') AND TO_TIMESTAMP('{enddate}')
kirin-ri commented 4 months ago
        operation_start_date = combined_df.select(F.min(
            F.col("OPERATION_START_DATE")).alias(
                "min_start_date")).collect()[0][0]

        operation_end_date = combined_df.select(F.max(
            F.col("OPERATION_END_DATE")).alias(
                "max_end_date")).collect()[0][0]

        if 

        # 日付のフォーマットを変換
        operation_start_date = datetime.datetime.strptime(operation_start_date,
                                                          "%Y-%m-%dT%H:%M:%SZ")
        operation_end_date = datetime.datetime.strptime(operation_end_date,
                                                        "%Y-%m-%dT%H:%M:%SZ")
kirin-ri commented 4 months ago
        # ウィンドウ定義
        window_spec = Window.partitionBy(
            "EQUIPMENT_ID").orderBy("OPERATION_DATE")

        # LEAD関数を使って次のOPERATION_DATEとOPERATION_FLAGを取得
        df_with_lead = df.withColumn(
            "NEXT_OPERATION_DATE",
            F.lead("OPERATION_DATE").over(window_spec)).withColumn(
                "NEXT_OPERATION_FLAG", F.lead(
                    "OPERATION_FLAG").over(window_spec))

        # OPERATION_FLAGが1で、NEXT_OPERATION_FLAGが2のレコードを抽出
        paired_df = df_with_lead.filter((
            F.col("OPERATION_FLAG") == 1) & (
                F.col("NEXT_OPERATION_FLAG") == 2)).select(F.col(
                    "EQUIPMENT_ID"), F.col("OPERATION_DATE").alias(
                        "OPERATION_START_DATE"), F.col(
                            "NEXT_OPERATION_DATE").alias(
                                "OPERATION_END_DATE")
        )

        # リード、ラグ関数で次の行と前の行を取得
        df = df.withColumn("next_date", F.lead(
            "OPERATION_DATE", 1).over(window_spec))
        df = df.withColumn("next_flag", F.lead(
            "OPERATION_FLAG", 1).over(window_spec))
        df = df.withColumn("prev_flag", F.lag(
            "OPERATION_FLAG", 1).over(window_spec))

        # 開始時間と終了時間の設定
        df = df.withColumn("OPERATION_START_DATE", F.when((
            F.col("OPERATION_FLAG") == 2) & (
                F.col("prev_flag").isNull()),
            F.concat(F.col("OPERATION_DATE").substr(0, 10),
                     F.lit("T00:00:00Z")))
            .when(F.col("OPERATION_FLAG") == 1,
                  F.col("OPERATION_DATE")))
        df = df.withColumn("OPERATION_END_DATE",
                           F.when((F.col("OPERATION_FLAG") == 1) & (
                               F.col("next_flag").isNull()),
                               F.concat(F.col("OPERATION_DATE").substr(0, 10),
                                        F.lit("T23:59:59Z"))).when(
                                            F.col("OPERATION_FLAG") == 2,
                                            F.col("OPERATION_DATE")))

        # 必要な列の選択とフィルタリング
        result_df = df.filter((F.col("OPERATION_FLAG") == 1) | (F.col(
            "OPERATION_FLAG") == 2)).select(
                "EQUIPMENT_ID",
                "OPERATION_START_DATE",
                "OPERATION_END_DATE").filter(F.col(
                    "OPERATION_START_DATE").isNotNull() & F.col(
                        "OPERATION_END_DATE").isNotNull())

        # 結果の表示
        combined_df = result_df.union(paired_df)

        operation_start_date = combined_df.select(F.min(
            F.col("OPERATION_START_DATE")).alias(
                "min_start_date")).collect()[0][0]

        operation_end_date = combined_df.select(F.max(
            F.col("OPERATION_END_DATE")).alias(
                "max_end_date")).collect()[0][0]
kirin-ri commented 4 months ago

df_hは以下です。 ORDER_DATE SUPPLIER_NAME ITEM UNIT_PRICE 2024-01-01 A 商品A 393.7421855

df_mは以下です。 PREDICT_REF_DATE ITEM UNIT_PRICE PREDICT_UNIT_PRICE 2024-03-11 商品A 389.2958003 386.7700159

発注先

文字列

SUPPLIER_NAME

商品

文字列

ITEM

実績価格差分比率

数値(Float)

PRICE_VARIANCE_RATIO

予測価格差分比率

数値(Float)

PREDICTPRICE_VARIANCE_RATIO

購買実績基準日

日付

PURCHASE_REF_DATE

市場価格基準日

日付

MARKET_PRICE_REF_DATE

kirin-ri commented 4 months ago
result = df_h.join(df_m, "ITEM").select(
    col("df_h.ORDER_DATE"),
    col("df_h.SUPPLIER_NAME"),
    col("df_h.ITEM"),
    col("df_h.UNIT_PRICE").alias("df_h_UNIT_PRICE"),
    col("df_m.UNIT_PRICE").alias("df_m_UNIT_PRICE"),
    col("df_m.PREDICT_UNIT_PRICE"),
    ((col("df_h.UNIT_PRICE") - col("df_m.UNIT_PRICE")) / col("df_m.UNIT_PRICE") * 100).alias("PRICE_VARIANCE_RATIO"),
    ((col("df_h.UNIT_PRICE") - col("df_m.PREDICT_UNIT_PRICE")) / col("df_m.PREDICT_UNIT_PRICE") * 100).alias("PREDICTPRICE_VARIANCE_RATIO")
)
kirin-ri commented 4 months ago

check:b'{"error_code":"INVALID_PARAMETER_VALUE","message":"Only Latin1 (ASCII) characters are currently supported. Any international characters must be removed or replaced in python parameters"}'

kirin-ri commented 4 months ago

['A', '商品A', '2024-01-01', '2024-03-11']

kirin-ri commented 4 months ago
payload = {
    "job_id": job_id_to_run,
    "python_params": data_params
}
kirin-ri commented 4 months ago

['A', '商品A', '2024-01-01', '2024-03-11']

kirin-ri commented 4 months ago

response = requests.post(run_now_url, headers=headers, json=payload)

kirin-ri commented 4 months ago

response = requests.post(run_now_url, headers=headers, json=payload)``

kirin-ri commented 4 months ago
def escape_non_ascii(data):
    return ''.join([char if ord(char) < 128 else '\\u{:04x}'.format(ord(char)) for char in data])

data_params = ['A', '商品A', '2024-01-01', '2024-03-11']
escaped_data_params = [escape_non_ascii(param) for param in data_params]
kirin-ri commented 4 months ago

UnicodeEncodeError: 'latin-1' codec can't encode characters in position 53-54: Body ('商品') is not valid Latin-1. Use body.encode('utf-8') if you want to send it encoded in UTF-8.

kirin-ri commented 4 months ago

2024-07-10 05:38:56,965 INFO -20240710-19e7dcd0-70fa-4e31-8707-114995b825cc check:b'{"error_code":"INVALID_PARAMETER_VALUE","message":"Only Latin1 (ASCII) characters are currently supported. Any international characters must be removed or replaced in python parameters"}'

kirin-ri commented 4 months ago
(base) q_li@vm-I-DNA-daas-2:~/Desktop/catalog-web-app$ curl -X POST  http://20.18.197.159/api/metricsTest -d "supplier_name=A&item=ITEM-C&order_date=2024-02-19&predict_ref_date=2024-02-26"
<!doctype html>
<html lang=en>
<title>Redirecting...</title>
<h1>Redirecting...</h1>
<p>You should be redirected automatically to the target URL: <a href="/login">/login</a>. If not, click the link.
kirin-ri commented 4 months ago

curl -X POST http://20.18.197.159/api/login -d "username=your_username&password=your_password"

kirin-ri commented 4 months ago

<!doctype html>

405 Method Not Allowed

Method Not Allowed

The method is not allowed for the requested URL.

kirin-ri commented 4 months ago
<!doctype html>
<html lang=en>
<title>405 Method Not Allowed</title>
<h1>Method Not Allowed</h1>
<p>The method is not allowed for the requested URL.</p>
kirin-ri commented 4 months ago
curl -X POST "http://20.18.197.159/api/metricsTest" \
     -H "Authorization: Bearer your_token" \
     -d "supplier_name=A&item=ITEM-C&order_date=2024-02-19&predict_ref_date=2024-02-26"
kirin-ri commented 4 months ago
auth = identity.web.Auth(
    session=session,
    authority=app.config["AUTHORITY"],
    client_id=app.config["CLIENT_ID"],
    client_credential=app.config["CLIENT_SECRET"],
)

# 認証確認用decorator
def token_required(func):
    @wraps(func)
    def wrapper(*args, **kwargs):
        token = auth.get_token_for_user(app_config.SCOPE)
        if "error" in token:
            return redirect(url_for("login"))
        return func(*args, **kwargs)
    wrapper.__name__ = func.__name__
    return wrapper

@app.route("/login")
def login():
    redirect_url = auth.log_in(
        scopes=app_config.SCOPE, redirect_uri=url_for("auth_response",
                                                      _external=True)
    )["auth_uri"]
    return redirect(redirect_url)

@app.route(app_config.REDIRECT_PATH)
def auth_response():
    result = auth.complete_log_in(request.args)
    if "error" in result:
        return redirect(url_for("login"))
    return redirect(url_for("index"))

@app.route("/")
def index():
    if not auth.get_user():
        return redirect(url_for("login"))
    return app.send_static_file("index.html")``
kirin-ri commented 4 months ago
@app.route("/api/execMetrics", methods=["POST"])
def execMetricsRouter():
    app.logger.info("execMetrics")
    return execMetrics(request.get_json())
kirin-ri commented 4 months ago

curl -X POST http://localhost:3000/api/execMetrics -d '{"id":"cfai_partsStock_priceVarianceRatio","supplier_name":"A","item":"ITEM-C","order_date":"2024-02-19","predict_ref_date":"2024-02-26"}'

kirin-ri commented 4 months ago
(base) q_li@vm-I-DNA-daas-2:~/Desktop/catalog-web-app$ curl -X POST  http://localhost:3000/api/execMetrics -d '{"id":"cfai_partsStock_
priceVarianceRatio","supplier_name":"A","item":"ITEM-C","order_date":"2024-02-19","predict_ref_date":"2024-02-26"}'
{"error":{"description":"The browser (or proxy) sent a request that this server could not understand.","name":"Bad Request"}}
kirin-ri commented 4 months ago
<!doctype html>
<html lang=en>
<title>Redirecting...</title>
<h1>Redirecting...</h1>
<p>You should be redirected automatically to the target URL: <a href="/login">/login</a>. If not, click the link.
(base) q_li@vm-I-DNA-daas-2:~/Desktop/catalog-web-app$ curl -X POST http://20.18.197.159/login
<!doctype html>
<html lang=en>
<title>405 Method Not Allowed</title>
<h1>Method Not Allowed</h1>
<p>The method is not allowed for the requested URL.</p>
kirin-ri commented 4 months ago
サインイン
申し訳ありませんが、サインイン中に問題が発生しました。

AADSTS50011: The redirect URI 'http://20.18.197.159/getAToken' specified in the request does not match the redirect URIs configured for the application 'd69ba062-cb62-440c-bf7d-4cda629c4e97'. Make sure the redirect URI sent in the request matches one added to your application in the Azure portal. Navigate to https://aka.ms/redirectUriMismatchError to learn more about how to fix this.
kirin-ri commented 4 months ago
auth = identity.web.Auth(
    session=session,
    authority=app.config["AUTHORITY"],
    client_id=app.config["CLIENT_ID"],
    client_credential=app.config["CLIENT_SECRET"],
)

# 認証確認用decorator
def token_required(func):
    @wraps(func)
    def wrapper(*args, **kwargs):
        token = auth.get_token_for_user(app_config.SCOPE)
        if "error" in token:
            return redirect(url_for("login"))
        return func(*args, **kwargs)
    wrapper.__name__ = func.__name__
    return wrapper

@app.route("/login")
def login():
    redirect_url = auth.log_in(
        scopes=app_config.SCOPE, redirect_uri=url_for("auth_response",
                                                      _external=True)
    )["auth_uri"]
    return redirect(redirect_url)

@app.route(app_config.REDIRECT_PATH)
def auth_response():
    result = auth.complete_log_in(request.args)
    if "error" in result:
        return redirect(url_for("login"))
    return redirect(url_for("index"))

@app.route("/")
def index():
    if not auth.get_user():
        return redirect(url_for("login"))
    return app.send_static_file("index.html")

@app.route("/api/metrics/<id>", methods=["GET"])
@token_required
def getCatalogMetaDetailByIdRouter(id: str):
    app.logger.info("getCatalogMetaDetailById")
    return getCatalogMetaDetailById(id)
kirin-ri commented 4 months ago
import requests

tenant_id = 'YOUR_TENANT_ID'
client_id = 'YOUR_CLIENT_ID'
client_secret = 'YOUR_CLIENT_SECRET'
scope = ['YOUR_SCOPE']  # 例: ['https://graph.microsoft.com/.default']
grant_type = 'client_credentials'

url = f'https://login.microsoftonline.com/{tenant_id}/oauth2/v2.0/token'
data = {
    'client_id': client_id,
    'client_secret': client_secret,
    'scope': ' '.join(scope),
    'grant_type': grant_type,
}

response = requests.post(url, data=data)
response_json = response.json()

if 'access_token' in response_json:
    access_token = response_json['access_token']
    print(f'Access Token: {access_token}')
else:
    print(f'Error: {response_json}')