Open kirin-ri opened 5 months ago
# 前処理
# データ読み込み
# 設備稼働時間
df = d2s.readData(
self.spark,
self.dbutils,
sfDatabase="ECM_DEV",
sfSchema="PUBLIC",
query=f"""
SELECT
EQUIPMENT_ID,
OPERATION_START_DATE,
OPERATION_END_DATE,
OPERATION_TIME
FROM
ECM_RAW_OPERATION_TIME
WHERE
(OPERATION_START_DATE <= '{end_time_str}'
AND (OPERATION_END_DATE >= '{start_time_str}'
OR OPERATION_END_DATE IS NULL))
OR (OPERATION_START_DATE <= '{start_time_str}'
AND OPERATION_END_DATE IS NULL)
""",
)
# データ加工
# タイムスタンプ型に変換
date_range = [start_date + datetime.timedelta(days=x)
for x in range((end_date - start_date).days + 1)]
for date in date_range:
# start_dateとend_dateの決定
df.createOrReplaceTempView("operation_time")
# start_dateの決定
start_date_query = """
SELECT MIN(OPERATION_START_DATE) AS start_date
FROM operation_time
"""
start_date_row = self.spark.sql(start_date_query).collect()[0]
start_date = start_date_row["start_date"]
# end_dateの決定
end_date_query = """
SELECT MAX(OPERATION_END_DATE) AS end_date
FROM operation_time
"""
end_date_row = self.spark.sql(end_date_query).collect()[0]
end_date = end_date_row["end_date"]
# OPERATION_END_DATEがNULLの場合は今日の日付をend_dateとして設定
if end_date is None:
end_date = datetime.datetime.now()
# 日付のフォーマットを変換
for record in data:
record["OPERATION_START_DATE"] = datetime.datetime.strptime(record["OPERATION_START_DATE"], "%Y-%m-%d")
if record["OPERATION_END_DATE"]:
record["OPERATION_END_DATE"] = datetime.datetime.strptime(record["OPERATION_END_DATE"], "%Y-%m-%d")
else:
record["OPERATION_END_DATE"] = None
# start_dateの決定
start_date = min(record["OPERATION_START_DATE"] for record in data)
# end_dateの決定
end_date = max(record["OPERATION_END_DATE"] for record in data if record["OPERATION_END_DATE"] is not None)
# OPERATION_END_DATEがNULLの場合は今日の日付をend_dateとして設定
if end_date is None:
end_date = datetime.datetime.now()
else:
end_date = max(end_date, datetime.datetime.now())
# 日付範囲の作成
date_range = [start_date + datetime.timedelta(days=x) for x in range((end_date - start_date).days + 1)]
# 結果表示
print("Start Date:", start_date)
print("End Date:", end_date)
print("Date Range:", date_range)
df = d2s.readData(
self.spark,
self.dbutils,
sfDatabase="ECM_DEV",
sfSchema="PUBLIC",
query=f"""
SELECT
EQUIPMENT_ID,
OPERATION_START_DATE,
OPERATION_END_DATE,
OPERATION_TIME
FROM
ECM_RAW_OPERATION_TIME
WHERE
(OPERATION_START_DATE <= '{end_time_str}'
AND (OPERATION_END_DATE >= '{start_time_str}'
OR OPERATION_END_DATE IS NULL))
OR (OPERATION_START_DATE <= '{start_time_str}'
AND OPERATION_END_DATE IS NULL)
""",
)
# データ加工
# タイムスタンプ型に変換
date_range = [start_date + datetime.timedelta(days=x)
for x in range((end_date - start_date).days + 1)]
ValueError: unconverted data remains: T04:12:34Z
# 日付のフォーマットを変換
for record in data:
record["OPERATION_START_DATE"] = datetime.datetime.strptime(record["OPERATION_START_DATE"], "%Y-%m-%dT%H:%M:%SZ")
if record["OPERATION_END_DATE"]:
record["OPERATION_END_DATE"] = datetime.datetime.strptime(record["OPERATION_END_DATE"], "%Y-%m-%dT%H:%M:%SZ")
else:
record["OPERATION_END_DATE"] = None
# start_dateの決定
start_date = min(record["OPERATION_START_DATE"] for record in data)
# end_dateの決定
end_date = max(record["OPERATION_END_DATE"] for record in data if record["OPERATION_END_DATE"] is not None)
# OPERATION_END_DATEがNULLの場合は今日の日付をend_dateとして設定
if end_date is None:
end_date = datetime.datetime.now()
else:
end_date = max(end_date, datetime.datetime.now())
# 日付範囲の作成
date_range = [start_date + datetime.timedelta(days=x) for x in range((end_date - start_date).days + 1)]
# 結果表示
print("Start Date:", start_date)
print("End Date:", end_date)
print("Date Range:", date_range)
Row' object does not support item assignment
# OPERATION_START_DATEとOPERATION_END_DATEの最小値と最大値を取得
start_date = df.select(spark_min(col("OPERATION_START_DATE"))).collect()[0][0]
end_date = df.select(spark_max(col("OPERATION_END_DATE"))).collect()[0][0]
# OPERATION_END_DATEがNULLの場合は今日の日付をend_dateとして設定
if end_date is None:
end_date = datetime.datetime.now()
else:
end_date = max(end_date, datetime.datetime.now())
# 日付のフォーマットを変換
start_date = datetime.datetime.strptime(start_date, "%Y-%m-%dT%H:%M:%SZ")
if end_date is not None:
end_date = datetime.datetime.strptime(end_date, "%Y-%m-%dT%H:%M:%SZ")
# 日付範囲の作成
date_range = [start_date + datetime.timedelta(days=x) for x in range((end_date - start_date).days + 1)]
# 結果表示
print("Start Date:", start_date)
print("End Date:", end_date)
print("Date Range:", date_range)
# OPERATION_START_DATEの最小値を取得
start_date = df.select(spark_min(col("OPERATION_START_DATE"))).collect()[0][0]
# OPERATION_END_DATEの最大値を取得、NULLがある場合は現在の日付を使用
max_end_date = df.select(spark_max(col("OPERATION_END_DATE"))).collect()[0][0]
null_end_date_count = df.filter(col("OPERATION_END_DATE").isNull()).count()
# 日付のフォーマットを変換
start_date = datetime.datetime.strptime(start_date, "%Y-%m-%dT%H:%M:%SZ")
if null_end_date_count > 0:
end_date = datetime.datetime.now()
else:
end_date = datetime.datetime.strptime(max_end_date, "%Y-%m-%dT%H:%M:%SZ")
end_date = max(end_date, datetime.datetime.now())
agg_result = df.agg(
spark_min(col("OPERATION_START_DATE")).alias("min_start_date"),
spark_max(col("OPERATION_END_DATE")).alias("max_end_date")
).collect()[0]
start_date = agg_result["min_start_date"]
max_end_date = agg_result["max_end_date"]
# 日付のフォーマットを変換
start_date = datetime.datetime.strptime(start_date, "%Y-%m-%dT%H:%M:%SZ")
# OPERATION_END_DATEがNULLの場合は今日の日付をend_dateとして設定
if max_end_date is None:
end_date = datetime.datetime.now()
else:
end_date = datetime.datetime.strptime(max_end_date, "%Y-%m-%dT%H:%M:%SZ")
end_date = max(end_date, datetime.datetime.now())
# OPERATION_START_DATEの最小値を取得
start_date = df.select(spark_min(col("OPERATION_START_DATE")).alias("min_start_date")).collect()[0][0]
# OPERATION_END_DATEの最大値を取得し、NULLがある場合は現在の日付を設定
current_date_str = datetime.datetime.now().strftime("%Y-%m-%dT%H:%M:%S")
max_end_date_df = df.withColumn("OPERATION_END_DATE", coalesce(col("OPERATION_END_DATE"), lit(current_date_str)))
end_date = max_end_date_df.select(spark_max(col("OPERATION_END_DATE")).alias("max_end_date")).collect()[0][0]
# 日付のフォーマットを変換
start_date = datetime.datetime.strptime(start_date, "%Y-%m-%dT%H:%M:%SZ")
end_date = datetime.datetime.strptime(end_date, "%Y-%m-%dT%H:%M:%S")
start_date = datetime.datetime.strptime(start_date, "%Y-%m-%dT%H:%M:%SZ") start_date = start_date.replace(hour=0, minute=0, second=0, microsecond=0)
end_date = datetime.datetime.strptime(end_date, "%Y-%m-%dT%H:%M:%S") end_date = end_date.replace(hour=23, minute=59, second=59, microsecond=999999)
# 基準日算出
if len(fromPeriod) > 0:
start_date = datetime.datetime.strptime(fromPeriod,
'%Y-%m-%d %H:%M:%S')
end_date = datetime.datetime.strptime(toPeriod,
'%Y-%m-%d %H:%M:%S')
else:
today = datetime.datetime.today()
start_date = datetime.datetime.strptime(today,
'%Y-%m-%d %H:%M:%S')
end_date = datetime.datetime.strptime(today,
'%Y-%m-%d %H:%M:%S')
start_time = datetime.datetime(start_date.year, start_date.month,
start_date.day, 0, 0, 0)
end_time = datetime.datetime(end_date.year, end_date.month,
end_date.day, 23, 59, 59)
# タイムスタンプ文字列に変換
start_time_str = start_time.strftime('%Y-%m-%dT%H:%M:%SZ')
end_time_str = end_time.strftime('%Y-%m-%dT%H:%M:%SZ')
# 前処理
# データ読み込み
# 設備稼働時間
df = d2s.readData(
# self.spark,
# self.dbutils,
spark,
dbutils,
sfDatabase="ECM_DEV",
sfSchema="PUBLIC",
query=f"""
SELECT
EQUIPMENT_ID,
OPERATION_START_DATE,
OPERATION_END_DATE,
OPERATION_TIME
FROM
ECM_RAW_OPERATION_TIME
WHERE
(OPERATION_START_DATE <= '{end_time_str}'
AND (OPERATION_END_DATE >= '{start_time_str}' OR OPERATION_END_DATE IS NULL))
OR (OPERATION_START_DATE <= '{start_time_str}' AND OPERATION_END_DATE IS NULL)
""",
)
display(df)
# データ加工
# OPERATION_START_DATEの最小値を取得
start_date = df.select(_min(col("OPERATION_START_DATE")).alias("min_start_date")).collect()[0][0]
# OPERATION_END_DATEの最大値を取得し、NULLがある場合は現在の日付を設定
current_date_str = datetime.datetime.now().strftime("%Y-%m-%dT%H:%M:%S")
max_end_date_df = df.withColumn("OPERATION_END_DATE", coalesce(col("OPERATION_END_DATE"), lit(current_date_str)))
end_date = max_end_date_df.select(_max(col("OPERATION_END_DATE")).alias("max_end_date")).collect()[0][0]
# 日付のフォーマットを変換
start_date = datetime.datetime.strptime(start_date, "%Y-%m-%dT%H:%M:%SZ")
start_date = start_date.replace(hour=0, minute=0, second=0)
end_date = datetime.datetime.strptime(end_date, "%Y-%m-%dT%H:%M:%S")
end_date = end_date.replace(hour=23, minute=59, second=59)
print("Start Date:", start_date)
print("End Date:", end_date)
# データ加工
# OPERATION_START_DATEの最小値を取得
operation_start_date = df.select(spark_min(col("OPERATION_START_DATE")).alias("min_start_date")).collect()[0][0]
# OPERATION_END_DATEの最大値を取得し、NULLがある場合は現在の日付を設定
current_date_str = datetime.datetime.now().strftime("%Y-%m-%dT%H:%M:%S")
max_end_date_df = df.withColumn("OPERATION_END_DATE", coalesce(col("OPERATION_END_DATE"), lit(current_date_str)))
operation_end_date = max_end_date_df.select(spark_max(col("OPERATION_END_DATE")).alias("max_end_date")).collect()[0][0]
# 日付のフォーマットを変換
operation_start_date = datetime.datetime.strptime(operation_start_date, "%Y-%m-%dT%H:%M:%SZ")
operation_end_date = datetime.datetime.strptime(operation_end_date, "%Y-%m-%dT%H:%M:%S")
# 基準日と比較して最終的な開始日と終了日を設定
final_start_date = min(start_date, operation_start_date)
final_end_date = max(end_date, operation_end_date)
# 開始日と終了日をそれぞれ0時0分0秒と23時59分59秒に設定
final_start_date = final_start_date.replace(hour=0, minute=0, second=0, microsecond=0)
final_end_date = final_end_date.replace(hour=23, minute=59, second=59, microsecond=999999)
# 日付範囲の作成
date_range = [final_start_date + datetime.timedelta(days=x) for x in range((final_end_date - final_start_date).days + 1)]
# OPERATION_START_DATEの最小値を取得
operation_start_date = df.select(spark_min(col("OPERATION_START_DATE")).alias("min_start_date")).collect()[0][0]
# OPERATION_END_DATEの最大値を取得し、NULLがある場合は現在の日付を設定
current_date_str = datetime.datetime.now().strftime("%Y-%m-%dT%H:%M:%S")
max_end_date_df = df.withColumn("OPERATION_END_DATE", coalesce(col("OPERATION_END_DATE"), lit(current_date_str)))
operation_end_date = max_end_date_df.select(spark_max(col("OPERATION_END_DATE")).alias("max_end_date")).collect()[0][0]
# 日付のフォーマットを変換
operation_start_date = datetime.datetime.strptime(operation_start_date, "%Y-%m-%dT%H:%M:%SZ")
operation_end_date = datetime.datetime.strptime(operation_end_date, "%Y-%m-%dT%H:%M:%S")
# 基準日と比較して最終的な開始日と終了日を設定
final_start_date = max(start_date, operation_start_date)
final_end_date = min(end_date, operation_end_date)
# 開始日と終了日をそれぞれ0時0分0秒と23時59分59秒に設定
final_start_date = final_start_date.replace(hour=0, minute=0, second=0, microsecond=0)
final_end_date = final_end_date.replace(hour=23, minute=59, second=59, microsecond=999999)
# 日付範囲の作成
date_range = [final_start_date + datetime.timedelta(days=x) for x in range((final_end_date - final_start_date).days + 1)]
設備A 2024-05-01T04:12:34Z 2024-05-01T05:45:12Z
設備A 2024-05-01T10:00:16Z 2024-05-01T11:30:20Z
設備A 2024-04-30T22:00:10Z 2024-05-01T01:45:32Z
設備A 2024-05-01T22:00:10Z 2024-05-02T01:45:32Z
設備A 2024-04-28T05:12:24Z 2024-04-30T08:25:37Z
設備B 2024-05-01T08:45:00Z 2024-05-01T12:00:00Z
設備B 2024-05-01T04:12:34Z 2024-05-01T05:45:12Z
設備B 2024-04-28T08:45:00Z 2024-04-28T12:00:00Z
設備B 2024-05-02T04:12:34Z 2024-05-02T05:45:12Z
設備C 2024-04-27T08:14:24Z
設備D 2024-05-01T08:14:24Z
設備A 2024-04-28 00:00:00 18.79333333 設備A 2024-04-29 00:00:00 23.99972222 設備A 2024-04-30 00:00:00 10.42416667 設備A 2024-05-01 00:00:00 6.801111111 設備A 2024-05-02 00:00:00 1.758888889 設備B 2024-04-28 00:00:00 3.25 設備B 2024-05-01 00:00:00 4.793888889 設備B 2024-05-02 00:00:00 1.543888889 設備C 2024-04-27 00:00:00 15.76 設備C 2024-04-28 00:00:00 23.99972222 設備C 2024-04-29 00:00:00 23.99972222 設備C 2024-04-30 00:00:00 23.99972222 設備C 2024-05-01 00:00:00 23.99972222 設備C 2024-05-02 00:00:00 23.99972222 設備C 2024-05-03 00:00:00 23.99972222 設備C 2024-05-04 00:00:00 23.99972222 設備C 2024-05-05 00:00:00 23.99972222 設備C 2024-05-06 00:00:00 23.99972222 設備C 2024-05-07 00:00:00 23.99972222 設備C 2024-05-08 00:00:00 23.99972222 設備C 2024-05-09 00:00:00 23.99972222 設備C 2024-05-10 00:00:00 23.99972222 設備C 2024-05-11 00:00:00 23.99972222 設備C 2024-05-12 00:00:00 23.99972222 設備C 2024-05-13 00:00:00 23.99972222 設備C 2024-05-14 00:00:00 23.99972222 設備C 2024-05-15 00:00:00 23.99972222 設備C 2024-05-16 00:00:00 23.99972222 設備C 2024-05-17 00:00:00 23.99972222 設備C 2024-05-18 00:00:00 23.99972222 設備C 2024-05-19 00:00:00 23.99972222 設備C 2024-05-20 00:00:00 23.99972222 設備C 2024-05-21 00:00:00 23.99972222 設備C 2024-05-22 00:00:00 23.99972222 設備C 2024-05-23 00:00:00 23.99972222 設備C 2024-05-24 00:00:00 23.99972222 設備C 2024-05-25 00:00:00 23.99972222 設備C 2024-05-26 00:00:00 23.99972222 設備C 2024-05-27 00:00:00 23.99972222 設備C 2024-05-28 00:00:00 23.99972222 設備C 2024-05-29 00:00:00 23.99972222 設備C 2024-05-30 00:00:00 23.99972222 設備C 2024-05-31 00:00:00 23.99972222 設備C 2024-06-01 00:00:00 23.99972222 設備C 2024-06-02 00:00:00 23.99972222 設備C 2024-06-03 00:00:00 23.99972222 設備C 2024-06-04 00:00:00 23.99972222 設備C 2024-06-05 00:00:00 23.99972222 設備C 2024-06-06 00:00:00 23.99972222 設備C 2024-06-07 00:00:00 23.99972222 設備C 2024-06-08 00:00:00 23.99972222 設備C 2024-06-09 00:00:00 23.99972222 設備C 2024-06-10 00:00:00 23.99972222 設備C 2024-06-11 00:00:00 23.99972222 設備C 2024-06-12 00:00:00 23.99972222 設備C 2024-06-13 00:00:00 23.99972222 設備C 2024-06-14 00:00:00 23.99972222 設備C 2024-06-15 00:00:00 23.99972222 設備C 2024-06-16 00:00:00 23.99972222 設備C 2024-06-17 00:00:00 23.99972222 設備C 2024-06-18 00:00:00 23.99972222 設備C 2024-06-19 00:00:00 23.99972222 設備C 2024-06-20 00:00:00 23.99972222 設備C 2024-06-21 00:00:00 23.99972222 設備C 2024-06-22 00:00:00 23.99972222 設備C 2024-06-23 00:00:00 23.99972222 設備C 2024-06-24 00:00:00 23.99972222 設備C 2024-06-25 00:00:00 0 設備D 2024-05-01 00:00:00 15.76 設備D 2024-05-02 00:00:00 23.99972222 設備D 2024-05-03 00:00:00 23.99972222 設備D 2024-05-04 00:00:00 23.99972222 設備D 2024-05-05 00:00:00 23.99972222 設備D 2024-05-06 00:00:00 23.99972222 設備D 2024-05-07 00:00:00 23.99972222 設備D 2024-05-08 00:00:00 23.99972222 設備D 2024-05-09 00:00:00 23.99972222 設備D 2024-05-10 00:00:00 23.99972222 設備D 2024-05-11 00:00:00 23.99972222 設備D 2024-05-12 00:00:00 23.99972222 設備D 2024-05-13 00:00:00 23.99972222 設備D 2024-05-14 00:00:00 23.99972222 設備D 2024-05-15 00:00:00 23.99972222 設備D 2024-05-16 00:00:00 23.99972222 設備D 2024-05-17 00:00:00 23.99972222 設備D 2024-05-18 00:00:00 23.99972222 設備D 2024-05-19 00:00:00 23.99972222 設備D 2024-05-20 00:00:00 23.99972222 設備D 2024-05-21 00:00:00 23.99972222 設備D 2024-05-22 00:00:00 23.99972222 設備D 2024-05-23 00:00:00 23.99972222 設備D 2024-05-24 00:00:00 23.99972222 設備D 2024-05-25 00:00:00 23.99972222 設備D 2024-05-26 00:00:00 23.99972222 設備D 2024-05-27 00:00:00 23.99972222 設備D 2024-05-28 00:00:00 23.99972222 設備D 2024-05-29 00:00:00 23.99972222 設備D 2024-05-30 00:00:00 23.99972222 設備D 2024-05-31 00:00:00 23.99972222 設備D 2024-06-01 00:00:00 23.99972222 設備D 2024-06-02 00:00:00 23.99972222 設備D 2024-06-03 00:00:00 23.99972222 設備D 2024-06-04 00:00:00 23.99972222 設備D 2024-06-05 00:00:00 23.99972222 設備D 2024-06-06 00:00:00 23.99972222 設備D 2024-06-07 00:00:00 23.99972222 設備D 2024-06-08 00:00:00 23.99972222 設備D 2024-06-09 00:00:00 23.99972222 設備D 2024-06-10 00:00:00 23.99972222 設備D 2024-06-11 00:00:00 23.99972222 設備D 2024-06-12 00:00:00 23.99972222 設備D 2024-06-13 00:00:00 23.99972222 設備D 2024-06-14 00:00:00 23.99972222 設備D 2024-06-15 00:00:00 23.99972222 設備D 2024-06-16 00:00:00 23.99972222 設備D 2024-06-17 00:00:00 23.99972222 設備D 2024-06-18 00:00:00 23.99972222 設備D 2024-06-19 00:00:00 23.99972222 設備D 2024-06-20 00:00:00 23.99972222 設備D 2024-06-21 00:00:00 23.99972222 設備D 2024-06-22 00:00:00 23.99972222 設備D 2024-06-23 00:00:00 23.99972222 設備D 2024-06-24 00:00:00 23.99972222 設備D 2024-06-25 00:00:00 0
: unconverted data remains: 23:59:59
fromPeriod = '2024-04-30 00:00:00' toPeriod = '2025-05-01 23:59:59'
SELECT
EQUIPMENT_ID,
REF_DATE,
OPERATION_TIME
FROM
CONV_RAW_OPERATION_TIME_DAILY
WHERE
REF_DATE BETWEEN TO_TIMESTAMP('{refdate}') AND TO_TIMESTAMP('{enddate}')
operation_start_date = combined_df.select(F.min(
F.col("OPERATION_START_DATE")).alias(
"min_start_date")).collect()[0][0]
operation_end_date = combined_df.select(F.max(
F.col("OPERATION_END_DATE")).alias(
"max_end_date")).collect()[0][0]
if
# 日付のフォーマットを変換
operation_start_date = datetime.datetime.strptime(operation_start_date,
"%Y-%m-%dT%H:%M:%SZ")
operation_end_date = datetime.datetime.strptime(operation_end_date,
"%Y-%m-%dT%H:%M:%SZ")
# ウィンドウ定義
window_spec = Window.partitionBy(
"EQUIPMENT_ID").orderBy("OPERATION_DATE")
# LEAD関数を使って次のOPERATION_DATEとOPERATION_FLAGを取得
df_with_lead = df.withColumn(
"NEXT_OPERATION_DATE",
F.lead("OPERATION_DATE").over(window_spec)).withColumn(
"NEXT_OPERATION_FLAG", F.lead(
"OPERATION_FLAG").over(window_spec))
# OPERATION_FLAGが1で、NEXT_OPERATION_FLAGが2のレコードを抽出
paired_df = df_with_lead.filter((
F.col("OPERATION_FLAG") == 1) & (
F.col("NEXT_OPERATION_FLAG") == 2)).select(F.col(
"EQUIPMENT_ID"), F.col("OPERATION_DATE").alias(
"OPERATION_START_DATE"), F.col(
"NEXT_OPERATION_DATE").alias(
"OPERATION_END_DATE")
)
# リード、ラグ関数で次の行と前の行を取得
df = df.withColumn("next_date", F.lead(
"OPERATION_DATE", 1).over(window_spec))
df = df.withColumn("next_flag", F.lead(
"OPERATION_FLAG", 1).over(window_spec))
df = df.withColumn("prev_flag", F.lag(
"OPERATION_FLAG", 1).over(window_spec))
# 開始時間と終了時間の設定
df = df.withColumn("OPERATION_START_DATE", F.when((
F.col("OPERATION_FLAG") == 2) & (
F.col("prev_flag").isNull()),
F.concat(F.col("OPERATION_DATE").substr(0, 10),
F.lit("T00:00:00Z")))
.when(F.col("OPERATION_FLAG") == 1,
F.col("OPERATION_DATE")))
df = df.withColumn("OPERATION_END_DATE",
F.when((F.col("OPERATION_FLAG") == 1) & (
F.col("next_flag").isNull()),
F.concat(F.col("OPERATION_DATE").substr(0, 10),
F.lit("T23:59:59Z"))).when(
F.col("OPERATION_FLAG") == 2,
F.col("OPERATION_DATE")))
# 必要な列の選択とフィルタリング
result_df = df.filter((F.col("OPERATION_FLAG") == 1) | (F.col(
"OPERATION_FLAG") == 2)).select(
"EQUIPMENT_ID",
"OPERATION_START_DATE",
"OPERATION_END_DATE").filter(F.col(
"OPERATION_START_DATE").isNotNull() & F.col(
"OPERATION_END_DATE").isNotNull())
# 結果の表示
combined_df = result_df.union(paired_df)
operation_start_date = combined_df.select(F.min(
F.col("OPERATION_START_DATE")).alias(
"min_start_date")).collect()[0][0]
operation_end_date = combined_df.select(F.max(
F.col("OPERATION_END_DATE")).alias(
"max_end_date")).collect()[0][0]
df_hは以下です。 ORDER_DATE SUPPLIER_NAME ITEM UNIT_PRICE 2024-01-01 A 商品A 393.7421855
df_mは以下です。 PREDICT_REF_DATE ITEM UNIT_PRICE PREDICT_UNIT_PRICE 2024-03-11 商品A 389.2958003 386.7700159
発注先
文字列
SUPPLIER_NAME
商品
文字列
ITEM
実績価格差分比率
数値(Float)
PRICE_VARIANCE_RATIO
予測価格差分比率
数値(Float)
PREDICTPRICE_VARIANCE_RATIO
購買実績基準日
日付
PURCHASE_REF_DATE
市場価格基準日
日付
MARKET_PRICE_REF_DATE
result = df_h.join(df_m, "ITEM").select(
col("df_h.ORDER_DATE"),
col("df_h.SUPPLIER_NAME"),
col("df_h.ITEM"),
col("df_h.UNIT_PRICE").alias("df_h_UNIT_PRICE"),
col("df_m.UNIT_PRICE").alias("df_m_UNIT_PRICE"),
col("df_m.PREDICT_UNIT_PRICE"),
((col("df_h.UNIT_PRICE") - col("df_m.UNIT_PRICE")) / col("df_m.UNIT_PRICE") * 100).alias("PRICE_VARIANCE_RATIO"),
((col("df_h.UNIT_PRICE") - col("df_m.PREDICT_UNIT_PRICE")) / col("df_m.PREDICT_UNIT_PRICE") * 100).alias("PREDICTPRICE_VARIANCE_RATIO")
)
check:b'{"error_code":"INVALID_PARAMETER_VALUE","message":"Only Latin1 (ASCII) characters are currently supported. Any international characters must be removed or replaced in python parameters"}'
['A', '商品A', '2024-01-01', '2024-03-11']
payload = {
"job_id": job_id_to_run,
"python_params": data_params
}
['A', '商品A', '2024-01-01', '2024-03-11']
response = requests.post(run_now_url, headers=headers, json=payload)
response = requests.post(run_now_url, headers=headers, json=payload)``
def escape_non_ascii(data):
return ''.join([char if ord(char) < 128 else '\\u{:04x}'.format(ord(char)) for char in data])
data_params = ['A', '商品A', '2024-01-01', '2024-03-11']
escaped_data_params = [escape_non_ascii(param) for param in data_params]
UnicodeEncodeError: 'latin-1' codec can't encode characters in position 53-54: Body ('商品') is not valid Latin-1. Use body.encode('utf-8') if you want to send it encoded in UTF-8.
2024-07-10 05:38:56,965 INFO -20240710-19e7dcd0-70fa-4e31-8707-114995b825cc check:b'{"error_code":"INVALID_PARAMETER_VALUE","message":"Only Latin1 (ASCII) characters are currently supported. Any international characters must be removed or replaced in python parameters"}'
(base) q_li@vm-I-DNA-daas-2:~/Desktop/catalog-web-app$ curl -X POST http://20.18.197.159/api/metricsTest -d "supplier_name=A&item=ITEM-C&order_date=2024-02-19&predict_ref_date=2024-02-26"
<!doctype html>
<html lang=en>
<title>Redirecting...</title>
<h1>Redirecting...</h1>
<p>You should be redirected automatically to the target URL: <a href="/login">/login</a>. If not, click the link.
curl -X POST http://20.18.197.159/api/login -d "username=your_username&password=your_password"
<!doctype html>
The method is not allowed for the requested URL.
<!doctype html>
<html lang=en>
<title>405 Method Not Allowed</title>
<h1>Method Not Allowed</h1>
<p>The method is not allowed for the requested URL.</p>
curl -X POST "http://20.18.197.159/api/metricsTest" \
-H "Authorization: Bearer your_token" \
-d "supplier_name=A&item=ITEM-C&order_date=2024-02-19&predict_ref_date=2024-02-26"
auth = identity.web.Auth(
session=session,
authority=app.config["AUTHORITY"],
client_id=app.config["CLIENT_ID"],
client_credential=app.config["CLIENT_SECRET"],
)
# 認証確認用decorator
def token_required(func):
@wraps(func)
def wrapper(*args, **kwargs):
token = auth.get_token_for_user(app_config.SCOPE)
if "error" in token:
return redirect(url_for("login"))
return func(*args, **kwargs)
wrapper.__name__ = func.__name__
return wrapper
@app.route("/login")
def login():
redirect_url = auth.log_in(
scopes=app_config.SCOPE, redirect_uri=url_for("auth_response",
_external=True)
)["auth_uri"]
return redirect(redirect_url)
@app.route(app_config.REDIRECT_PATH)
def auth_response():
result = auth.complete_log_in(request.args)
if "error" in result:
return redirect(url_for("login"))
return redirect(url_for("index"))
@app.route("/")
def index():
if not auth.get_user():
return redirect(url_for("login"))
return app.send_static_file("index.html")``
@app.route("/api/execMetrics", methods=["POST"])
def execMetricsRouter():
app.logger.info("execMetrics")
return execMetrics(request.get_json())
curl -X POST http://localhost:3000/api/execMetrics -d '{"id":"cfai_partsStock_priceVarianceRatio","supplier_name":"A","item":"ITEM-C","order_date":"2024-02-19","predict_ref_date":"2024-02-26"}'
(base) q_li@vm-I-DNA-daas-2:~/Desktop/catalog-web-app$ curl -X POST http://localhost:3000/api/execMetrics -d '{"id":"cfai_partsStock_
priceVarianceRatio","supplier_name":"A","item":"ITEM-C","order_date":"2024-02-19","predict_ref_date":"2024-02-26"}'
{"error":{"description":"The browser (or proxy) sent a request that this server could not understand.","name":"Bad Request"}}
<!doctype html>
<html lang=en>
<title>Redirecting...</title>
<h1>Redirecting...</h1>
<p>You should be redirected automatically to the target URL: <a href="/login">/login</a>. If not, click the link.
(base) q_li@vm-I-DNA-daas-2:~/Desktop/catalog-web-app$ curl -X POST http://20.18.197.159/login
<!doctype html>
<html lang=en>
<title>405 Method Not Allowed</title>
<h1>Method Not Allowed</h1>
<p>The method is not allowed for the requested URL.</p>
サインイン
申し訳ありませんが、サインイン中に問題が発生しました。
AADSTS50011: The redirect URI 'http://20.18.197.159/getAToken' specified in the request does not match the redirect URIs configured for the application 'd69ba062-cb62-440c-bf7d-4cda629c4e97'. Make sure the redirect URI sent in the request matches one added to your application in the Azure portal. Navigate to https://aka.ms/redirectUriMismatchError to learn more about how to fix this.
auth = identity.web.Auth(
session=session,
authority=app.config["AUTHORITY"],
client_id=app.config["CLIENT_ID"],
client_credential=app.config["CLIENT_SECRET"],
)
# 認証確認用decorator
def token_required(func):
@wraps(func)
def wrapper(*args, **kwargs):
token = auth.get_token_for_user(app_config.SCOPE)
if "error" in token:
return redirect(url_for("login"))
return func(*args, **kwargs)
wrapper.__name__ = func.__name__
return wrapper
@app.route("/login")
def login():
redirect_url = auth.log_in(
scopes=app_config.SCOPE, redirect_uri=url_for("auth_response",
_external=True)
)["auth_uri"]
return redirect(redirect_url)
@app.route(app_config.REDIRECT_PATH)
def auth_response():
result = auth.complete_log_in(request.args)
if "error" in result:
return redirect(url_for("login"))
return redirect(url_for("index"))
@app.route("/")
def index():
if not auth.get_user():
return redirect(url_for("login"))
return app.send_static_file("index.html")
@app.route("/api/metrics/<id>", methods=["GET"])
@token_required
def getCatalogMetaDetailByIdRouter(id: str):
app.logger.info("getCatalogMetaDetailById")
return getCatalogMetaDetailById(id)
import requests
tenant_id = 'YOUR_TENANT_ID'
client_id = 'YOUR_CLIENT_ID'
client_secret = 'YOUR_CLIENT_SECRET'
scope = ['YOUR_SCOPE'] # 例: ['https://graph.microsoft.com/.default']
grant_type = 'client_credentials'
url = f'https://login.microsoftonline.com/{tenant_id}/oauth2/v2.0/token'
data = {
'client_id': client_id,
'client_secret': client_secret,
'scope': ' '.join(scope),
'grant_type': grant_type,
}
response = requests.post(url, data=data)
response_json = response.json()
if 'access_token' in response_json:
access_token = response_json['access_token']
print(f'Access Token: {access_token}')
else:
print(f'Error: {response_json}')
dd