# Get some resources
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd


%matplotlib notebook


# Load the 2022 data
df2022 = pd.read_csv('https://gml.noaa.gov/aftp/data/meteorology/in-situ/brw/met_brw_insitu_1_obop_hour_2022.txt',
                        delimiter=r"\s+",header=None, 
                        usecols=[0,1,2,3,4,5,6,9],
                        names=['station','year','month','day','hour','winddirection','windspeed','temperature']) 

# Print some information about the dataframe
display(df2022)

# Plotting the temperatures as a function of month
plt.figure()
plt.plot(df2022['month'], df2022['temperature'], 'kx', label='2022')
plt.xlabel('month')
plt.ylabel('Temperature (C)')
plt.title('Hourly temperatures, by month, for year 2022')
plt.grid()
plt.legend()

<matplotlib.legend.Legend at 0x7fc2b767f760>


### BEGIN SOLUTION
df1977 = pd.read_csv('https://gml.noaa.gov/aftp/data/meteorology/in-situ/brw/met_brw_insitu_1_obop_hour_1977.txt',
                        delimiter=r"\s+",header=None, 
                        usecols=[0,1,2,3,4,5,6,9],
                        names=['station','year','month','day','hour','winddirection','windspeed','temperature']) 

# Print some information about the dataframe
display(df1977)

# Plot the temperatures as a function of month
plt.figure()
plt.plot(df1977['month'], df1977['temperature'], 'b+', label='1977')
plt.xlabel('month')
plt.ylabel('Temperature (C)')
plt.title('Hourly temperatures, by month, for year 1977')
plt.legend()
plt.grid()
### END SOLUTION


# Find bad temperatures
badindices = df2022[ df2022['temperature'] == -999.9 ].index
print('I am dropping this many missing data points: ', len(badindices))
df2022.drop(badindices,inplace=True)

# Plot
plt.figure()
plt.plot(df2022['month'],df2022['temperature'], 'kx', label='2022')
plt.xlabel('month')
plt.ylabel('Temperature (C)')
plt.title('Hourly temperatures, by month, for year 2022')
plt.grid()
plt.legend()

I am dropping this many missing data points:  2584

<matplotlib.legend.Legend at 0x7fc2b52d0bb0>


### BEGIN SOLUTION
badindices = df1977[ df1977['temperature'] == -999.9 ].index
print('I am dropping this many missing data points: ', len(badindices))
df1977.drop(badindices,inplace=True)

plt.figure()
plt.plot(df1977['month'], df1977['temperature'], 'b+', label='1977')
plt.xlabel('month')
plt.ylabel('Temperature (C)')
plt.title('Hourly temperatures, by month, for year 1977')
plt.grid()
plt.legend()
### END SOLUTION

I am dropping this many missing data points:  606

<matplotlib.legend.Legend at 0x7fc2b53313c0>


### BEGIN SOLUTION
plt.figure()
plt.plot(df2022['month'], df2022['temperature'], 'kx', label='2022')
plt.plot(df1977['month'], df1977['temperature'], 'b+', label='1977')
plt.xlabel('month')
plt.ylabel('Temperature (C)')
plt.title('Hourly temperatures, by month, for years 1977 and 2022')
plt.grid()
plt.legend()

### END SOLUTION

<matplotlib.legend.Legend at 0x7fc2b52d1a20>


# Specify which month we want to focus on
month_of_interest = 3


# Extract data belonging to the month of interest
df2022_month_of_interest = df2022[df2022['month'] == month_of_interest]
display(df2022_month_of_interest) 

# Open a figure
plt.figure()

# Convert to numpy arrays and plot the day and temperature
plt.plot(df2022_month_of_interest['day'], df2022_month_of_interest['temperature'], 'kx')
plt.title('Hourly temperatures, by day, for month '+str(month_of_interest)+', year 2022')
plt.xlabel('day')
plt.ylabel('Temperature (C)')
plt.grid()


### BEGIN SOLUTION
# Extract data belonging to the month of interest
df1977_month_of_interest = df1977[df1977['month'] == month_of_interest]
display(df1977_month_of_interest) 

# Plotting the data
plt.figure()
plt.plot(df1977_month_of_interest['day'], df1977_month_of_interest['temperature'], 'b+')
plt.title('Hourly temperatures, by day, for month '+str(month_of_interest)+', year 1977')
plt.xlabel('day')
plt.ylabel('Temperature (C)')
plt.grid()
### END SOLUTION


# Get the histogram for the modern dataset
h2022_month_of_interest, e2022_month_of_interest = np.histogram(df2022_month_of_interest['temperature'],density=True)

# Check on some array lengths
print(np.size(h2022_month_of_interest))
print(np.size(e2022_month_of_interest))
print(np.size(e2022_month_of_interest[0:-1]))

# Plot the histogram 
plt.figure()
plt.plot(e2022_month_of_interest[0:-1],h2022_month_of_interest,'k')
plt.title('Hourly temperature probability density for month '+str(month_of_interest)+', year 2022')
plt.xlabel('Temperature (C)')
plt.grid()

10
11
10


### BEGIN SOLUTION

h1977_month_of_interest, e1977_month_of_interest = np.histogram(df1977_month_of_interest['temperature'],density=True)
plt.figure()
plt.plot(e2022_month_of_interest[0:-1],h2022_month_of_interest,'k',label='2022')
plt.plot(e1977_month_of_interest[0:-1],h1977_month_of_interest,'b',label='1977')
plt.title('Hourly temperature probability density for month '+str(month_of_interest))
plt.xlabel('Temperature (C)')
plt.grid()
plt.legend()

### END SOLUTION

<matplotlib.legend.Legend at 0x7fc2b52943d0>


# Modern data: load three years of data as separate dataframes
df2020 = pd.read_csv('https://gml.noaa.gov/aftp/data/meteorology/in-situ/brw/met_brw_insitu_1_obop_hour_2020.txt', 
                        delimiter=r"\s+",header=None, 
                        usecols=[0,1,2,3,5,6,9], 
                        names=['station','year','month','day','winddirection','windspeed','temperature']) 

df2021 = pd.read_csv('https://gml.noaa.gov/aftp/data/meteorology/in-situ/brw/met_brw_insitu_1_obop_hour_2021.txt', 
                        delimiter=r"\s+",header=None, 
                        usecols=[0,1,2,3,5,6,9], 
                        names=['station','year','month','day','winddirection','windspeed','temperature']) 

# Now join them and display the result
df2020s = pd.concat( [df2020, df2021, df2022])
display(df2020s)


### BEGIN SOLUTION
df1978 = pd.read_csv('https://gml.noaa.gov/aftp/data/meteorology/in-situ/brw/met_brw_insitu_1_obop_hour_1978.txt', 
                        delimiter=r"\s+",header=None, 
                        usecols=[0,1,2,3,5,6,9], 
                        names=['station','year','month','day','winddirection','windspeed','temperature']) 

df1979 = pd.read_csv('https://gml.noaa.gov/aftp/data/meteorology/in-situ/brw/met_brw_insitu_1_obop_hour_1979.txt', 
                        delimiter=r"\s+",header=None, 
                        usecols=[0,1,2,3,5,6,9], 
                        names=['station','year','month','day','winddirection','windspeed','temperature']) 

# Now join and display the result
df1970s = pd.concat( [df1977, df1978, df1979])
display(df1970s)
### END SOLUTION


### BEGIN SOLUTION
badindices = df1970s[ df1970s['temperature'] == -999.9 ].index
print('I am dropping this many missing data points: ', len(badindices))
df1970s.drop(badindices,inplace=True)

badindices = df2020s[ df2020s['temperature'] == -999.9 ].index
print('I am dropping this many missing data points: ', len(badindices))
df2020s.drop(badindices,inplace=True)

# # Plot the data 
# plt.figure()
# plt.plot(df2022['month'], df2022['temperature'], 'kx', label='2022')
# plt.plot(df1977['month'], df1977['temperature'], 'b+', label='1977')
# plt.xlabel('month')
# plt.ylabel('Temperature (C)')
# plt.title('Time series for years 1977 and 2022')
# plt.grid()
# plt.legend()

# Plot the histograms
df2020s_month_of_interest = df2020s[df2020s['month'] == month_of_interest]
df1970s_month_of_interest = df1970s[df1970s['month'] == month_of_interest]

h2020s_month_of_interest, e2020s_month_of_interest = np.histogram(df2020s_month_of_interest['temperature'],density=True)
h1970s_month_of_interest, e1970s_month_of_interest = np.histogram(df1970s_month_of_interest['temperature'],density=True)

plt.figure()
plt.plot(e2020s_month_of_interest[0:-1],h2020s_month_of_interest,'k',label='2020s')
plt.plot(e1970s_month_of_interest[0:-1],h1970s_month_of_interest,'b',label='1970s')
plt.title('Hourly temperature probability densities for month '+str(month_of_interest))
plt.xlabel('Temperature (C)')
plt.grid()
plt.legend()

# Not part of the solution, but part of the next answer, about global temperatures 
deltaT_BRW_March = 5
# The NOAA website given below says median values of 0.22 in 1978, and 1.11 in 2022
deltaT_global_March = 1.11 - 0.22
print(deltaT_global_March)
print('Ratio = ', deltaT_BRW_March/deltaT_global_March)

### END SOLUTION

I am dropping this many missing data points:  188
I am dropping this many missing data points:  29

0.8900000000000001
Ratio =  5.617977528089887


### BEGIN SOLUTION

Option = 1
badindices = df1970s[ df1970s['winddirection'] == -999.0 ].index
print('I am dropping this many missing data points: ', len(badindices))
df1970s.drop(badindices,inplace=True)

badindices = df2020s[ df2020s['winddirection'] == -999.0 ].index
print('I am dropping this many missing data points: ', len(badindices))
df2020s.drop(badindices,inplace=True)

# Plot the data 
plt.figure()
plt.plot(df1970s['month'], df1970s['winddirection'], 'b+', label='1970s')
plt.plot(df2020s['month'], df2020s['winddirection'], 'kx', label='2020s')
plt.xlabel('month')
plt.ylabel('Wind direction')
plt.title('Time series for years 1970s and 2020s')
plt.grid()
plt.legend()

# Plot the histograms
df2020s_month_of_interest = df2020s[df2020s['month'] == month_of_interest]
df1970s_month_of_interest = df1970s[df1970s['month'] == month_of_interest]

h2020s_month_of_interest, e2020s_month_of_interest = np.histogram(df2020s_month_of_interest['winddirection'],density=True)
h1970s_month_of_interest, e1970s_month_of_interest = np.histogram(df1970s_month_of_interest['winddirection'],density=True)

plt.figure()
plt.plot(e2020s_month_of_interest[0:-1],h2020s_month_of_interest,'k',label='2020s')
plt.plot(e1970s_month_of_interest[0:-1],h1970s_month_of_interest,'b',label='1970s')
plt.title('Option 1: Wind direction probability densities for month '+str(month_of_interest))
plt.xlabel('Degrees from North')
plt.grid()
plt.legend()

Option = 2
badindices = df1970s[ df1970s['windspeed'] == -99.9 ].index
print('I am dropping this many missing data points: ', len(badindices))
df1970s.drop(badindices,inplace=True)

badindices = df2020s[ df2020s['windspeed'] == -999.9 ].index
print('I am dropping this many missing data points: ', len(badindices))
df2020s.drop(badindices,inplace=True)

# Plot the data 
plt.figure()
plt.plot(df1970s['month'], df1970s['windspeed'], 'b+', label='1970s')
plt.plot(df2020s['month'], df2020s['windspeed'], 'kx', label='2020s')
plt.xlabel('month')
plt.ylabel('windspeed (m/s)')
plt.title('Option 2: wind speed probability densities for month '+str(month_of_interest))
plt.grid()
plt.legend()

# Plot the histograms
df2020s_month_of_interest = df2020s[df2020s['month'] == month_of_interest]
df1970s_month_of_interest = df1970s[df1970s['month'] == month_of_interest]

h2020s_month_of_interest, e2020s_month_of_interest = np.histogram(df2020s_month_of_interest['windspeed'],density=True)
h1970s_month_of_interest, e1970s_month_of_interest = np.histogram(df1970s_month_of_interest['windspeed'],density=True)

plt.figure()
plt.plot(e2020s_month_of_interest[0:-1],h2020s_month_of_interest,'k',label='2020s')
plt.plot(e1970s_month_of_interest[0:-1],h1970s_month_of_interest,'b',label='1970s')
plt.title('Wind speed histograms for month '+str(month_of_interest))
plt.xlabel('Wind speed (m/s)')
plt.grid()
plt.legend()

### END SOLUTION

I am dropping this many missing data points:  358
I am dropping this many missing data points:  73

I am dropping this many missing data points:  0
I am dropping this many missing data points:  0

<matplotlib.legend.Legend at 0x7fc2b5055d20>

	station	year	month	day	hour	winddirection	windspeed	temperature
0	BRW	2022	1	1	0	328	3.5	-26.6
1	BRW	2022	1	1	1	338	3.6	-26.8
2	BRW	2022	1	1	2	345	3.5	-27.3
3	BRW	2022	1	1	3	349	3.8	-26.7
4	BRW	2022	1	1	4	350	3.8	-26.7
...	...	...	...	...	...	...	...	...
8634	BRW	2022	12	31	19	194	1.2	-17.0
8635	BRW	2022	12	31	20	182	1.3	-17.3
8636	BRW	2022	12	31	21	170	1.6	-17.6
8637	BRW	2022	12	31	22	197	2.1	-18.5
8638	BRW	2022	12	31	23	196	1.5	-17.8

	station	year	month	day	hour	winddirection	windspeed	temperature
0	BRW	1977	1	1	0	72	6.3	-20.1
1	BRW	1977	1	1	1	72	6.3	-20.3
2	BRW	1977	1	1	2	70	6.7	-20.5
3	BRW	1977	1	1	3	76	6.7	-20.5
4	BRW	1977	1	1	4	73	6.7	-20.4
...	...	...	...	...	...	...	...	...
8755	BRW	1977	12	31	19	145	3.6	-16.4
8756	BRW	1977	12	31	20	66	1.3	-14.9
8757	BRW	1977	12	31	21	75	2.2	-15.6
8758	BRW	1977	12	31	22	161	6.7	-17.3
8759	BRW	1977	12	31	23	130	5.4	-17.7

	station	year	month	day	hour	winddirection	windspeed	temperature
1417	BRW	1977	3	1	1	350	1.3	-30.0
1418	BRW	1977	3	1	2	300	0.9	-29.4
1419	BRW	1977	3	1	3	35	4.0	-28.8
1420	BRW	1977	3	1	4	7	4.5	-28.9
1421	BRW	1977	3	1	5	353	4.5	-30.2
...	...	...	...	...	...	...	...	...
2132	BRW	1977	3	30	20	60	8.5	-25.2
2133	BRW	1977	3	30	21	58	8.0	-24.1
2134	BRW	1977	3	30	22	53	7.6	-24.3
2135	BRW	1977	3	30	23	50	6.7	-24.4
2159	BRW	1977	3	31	23	-999	-99.9	-22.7

	station	year	month	day	winddirection	windspeed	temperature	hour
0	BRW	2020	1	1	5	7.1	-25.5	NaN
1	BRW	2020	1	1	0	7.0	-25.8	NaN
2	BRW	2020	1	1	3	7.6	-25.3	NaN
3	BRW	2020	1	1	4	7.6	-25.2	NaN
4	BRW	2020	1	1	15	7.5	-25.1	NaN
...	...	...	...	...	...	...	...	...
8634	BRW	2022	12	31	194	1.2	-17.0	19.0
8635	BRW	2022	12	31	182	1.3	-17.3	20.0
8636	BRW	2022	12	31	170	1.6	-17.6	21.0
8637	BRW	2022	12	31	197	2.1	-18.5	22.0
8638	BRW	2022	12	31	196	1.5	-17.8	23.0

	station	year	month	day	hour	winddirection	windspeed	temperature
0	BRW	1977	1	1	0.0	72	6.3	-20.1
1	BRW	1977	1	1	1.0	72	6.3	-20.3
2	BRW	1977	1	1	2.0	70	6.7	-20.5
3	BRW	1977	1	1	3.0	76	6.7	-20.5
4	BRW	1977	1	1	4.0	73	6.7	-20.4
...	...	...	...	...	...	...	...	...
8755	BRW	1979	12	31	NaN	210	3.6	-30.0
8756	BRW	1979	12	31	NaN	210	3.6	-31.5
8757	BRW	1979	12	31	NaN	210	3.6	-31.5
8758	BRW	1979	12	31	NaN	216	3.6	-31.0
8759	BRW	1979	12	31	NaN	204	3.6	-33.5

Neshyba & Deloya Garcia, 2023¶

ClimateStats¶

Overview¶

Learning goals¶

Interpreting metadata¶

BEGIN SOLUTION¶

END SOLUTION¶

Loading (and plotting) time series of hourly temperature data, using Pandas¶

Pause for analysis¶

Quality control¶

Your turn¶

Plotting hourly temperatures on the same graph¶

Pause for analysis¶

BEGIN SOLUTION¶

END SOLUTION¶

Focusing on a month of interest¶

Examining hourly temperatures by day of month¶

Your turn¶

Histograms of hourly temperatures¶

Bins and bin edges in a probability density¶

Your turn¶

Pause for analysis¶

BEGIN SOLUTION¶

END SOLUTION¶

Combining dataframes from multiple years¶

Your turn¶

Comparisons¶

Pause for analysis¶

BEGIN SOLUTION¶

END SOLUTION¶

Extensions¶

Pause for analysis¶

BEGIN SOLUTION¶

END SOLUTION¶

Refresh/save/validate¶

Close/submit/logout¶

	station	year	month	day	hour	winddirection	windspeed	temperature
1416	BRW	2022	3	1	0	61	2.4	-6.7
1417	BRW	2022	3	1	1	59	2.8	-8.3
1418	BRW	2022	3	1	2	65	2.3	-8.9
1419	BRW	2022	3	1	3	78	2.9	-10.5
1420	BRW	2022	3	1	4	65	3.8	-10.7
...	...	...	...	...	...	...	...	...
2155	BRW	2022	3	31	19	80	8.6	-25.5
2156	BRW	2022	3	31	20	80	8.9	-24.8
2157	BRW	2022	3	31	21	79	9.5	-24.2
2158	BRW	2022	3	31	22	80	9.5	-23.4
2159	BRW	2022	3	31	23	78	9.2	-22.9