转换NOAA天气数据文件“ .fly”为Pandas DataFrame 获取数据 ftp://ftp.ncdc.noaa.gov/pub/data/ghcn/daily In [1]: import matplotlib.pyplot as plt import pandas as pd import numpy as np import re import ftplib
% matplotlib notebook
In [2]: # download data from FTP
def download_file_from_ftp ( FTP_SERVER , FTP_PATH , FILENAME ): with ftplib . FTP ( FTP_SERVER ) as ftp : ftp . login () ftp . cwd ( FTP_PATH ) with open ( FILENAME , 'wb' ) as f : ftp . retrbinary ( 'RETR ' + FILENAME , f . write )
查询站IDIn [3]: def get_station_ID ( station_to_find , filename ): for line in open ( filename ): if station_to_find in line : line_with_station = line station_ID = re . split ( " " , line_with_station )[ 0 ] return station_ID return None # warning, it is slow, download it only once download_file_from_ftp ( "ftp.ncdc.noaa.gov" , "/pub/data/ghcn/daily" , "ghcnd-stations.txt" )
station_to_find = "GUANGZHOU" # USE CAPS station_ID = get_station_ID ( station_to_find , "ghcnd-stations.txt" )
下载天气数据In [4]: weather_data_filename = station_ID + '.dly'
# warning, it is slow, download it only once download_file_from_ftp ( "ftp.ncdc.noaa.gov" , "/pub/data/ghcn/daily/all" , weather_data_filename )
将.fly转换为pandas DataframeIn [7]:
df = convert_dly_to_dataframe ( weather_data_filename ) df . head ()
Out[7]:
| YEAR | MONTH | ELEMENT | VALUE1 | VALUE2 | VALUE3 | VALUE4 | VALUE5 | VALUE6 | VALUE7 | ... | VALUE22 | VALUE23 | VALUE24 | VALUE25 | VALUE26 | VALUE27 | VALUE28 | VALUE29 | VALUE30 | VALUE31 |
---|
0 | 1945 | 11 | TAVG | NaN | NaN | NaN | NaN | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 107.0 | NaN |
---|
1 | 1945 | 12 | TAVG | 123.0 | 136.0 | 152.0 | 144.0 | 146.0 | 189.0 | 219.0 | ... | 179.0 | 146.0 | 128.0 | 107.0 | 104.0 | 112.0 | 122.0 | 127.0 | 129.0 | 156.0 |
---|
2 | 1946 | 1 | TAVG | 150.0 | 150.0 | 123.0 | 117.0 | 112.0 | 121.0 | 125.0 | ... | 146.0 | 153.0 | 173.0 | 196.0 | 211.0 | 212.0 | 218.0 | 201.0 | 156.0 | 131.0 |
---|
3 | 1946 | 2 | TAVG | 114.0 | 112.0 | 147.0 | 181.0 | 195.0 | 192.0 | 149.0 | ... | 201.0 | 196.0 | 231.0 | 226.0 | 221.0 | 229.0 | 240.0 | NaN | NaN | NaN |
---|
4 | 1946 | 3 | TAVG | 237.0 | 162.0 | 142.0 | 133.0 | 183.0 | 187.0 | 160.0 | ... | 183.0 | 192.0 | 205.0 | 216.0 | 223.0 | 238.0 | 207.0 | 195.0 | 233.0 | 228.0 |
---|
|