Deal json and xml data

json convert to csv

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
import requests
import json
url = 'api_key'
headers = {
    'accept': 'application/json',
}
response = requests.get(url, headers=headers)
print(response)

# 檢查請求是否成功
if response.status_code == 200:
    json_data = response.json()  # 解析回應為 JSON
    print(json_data)  # 輸出結果
else:
    print(f"請求失敗，狀態碼: {response.status_code}")

# indent json data as formatted_json.json, observe data structure
formatted_json = json.dumps(json_data, indent=4)
with open("formatted_json.json", "w") as file:
    file.write(formatted_json)


import pandas as pd
loc1 = []
loc2 = []
json_data = response.json()
for location in json_data['records']['location']:
    a = [location['station']['StationName'],location['station']['StationID'], location['station']['StationAttribute']]
    loc1.append(a)

for location in json_data['records']['location']:
    for obs_time in location['stationObsTimes']['stationObsTime']:
        a1 = [obs_time['Date'],obs_time['weatherElements']['Precipitation'] ]
        loc2.append(a1)

#check 33 station total data number
total_day = len(loc2)/len(loc1)
total_day = int(total_day)
datatime = [a[0] for a in loc2[0:total_day] ]   

df_location = pd.DataFrame(loc1, columns=['Name', 'ID', 'Attribute']) 
df_date = pd.DataFrame(datatime, columns=['Date']) 

# every station get each day
Precipitation_data = df_location.merge(df_date, how='cross')
Precipitation_data['Precipitation'] = [a[1] for a in loc2[:]]
Precipitation_data.to_csv("雨量資料_2024.csv", encoding='utf-8-sig')

xml convert to csv

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
import xmltodict
import pandas as pd
import json

# 讀取並解析 XML 檔案
with open('dy_Report_2023.xml', encoding='utf-8') as xml_file:
    data_dict = xmltodict.parse(xml_file.read())
# 將資料轉換成 DataFrame
df = pd.json_normalize(data_dict)
# 將 DataFrame 轉換回字典格式
json_data = df.to_dict(orient='dict')
# 將字典格式的資料寫入 JSON 檔案
with open('雨量_2023.json', 'w', encoding='utf-8') as outfile:
    json.dump(json_data, outfile, ensure_ascii=False, indent=4)

loc1 = []
loc2 = []
for location in json_data['cwaopendata.resources.resource.data.surfaceObs.location'
][0]:
    a = [location['station']['StationName'],location['station']['StationID'], location['station']['StationAttribute']]
    loc1.append(a)

for location in json_data['cwaopendata.resources.resource.data.surfaceObs.location'
][0]:
    for obs_time in location['stationObsTimes']['stationObsTime']:
        a1 = [obs_time['Date'],obs_time['weatherElements']['Precipitation'] ]
        loc2.append(a1)

#check 34 station total data number is 365
total_day = len(loc2)/len(loc1)
total_day = int(total_day)
datatime = [a[0] for a in loc2[0:total_day] ]   

df_location = pd.DataFrame(loc1, columns=['Name', 'ID', 'Attribute']) 
df_date = pd.DataFrame(datatime, columns=['Date']) 

# every station get each day
Precipitation_data = df_location.merge(df_date, how='cross')
Precipitation_data['Precipitation'] = [a[1] for a in loc2[:]]
Precipitation_data.to_csv("雨量資料_2023.csv", encoding='utf-8-sig')

Reference

中央氣象署開放資料擷取API
氣象資料開放平台