#suppress warnings for a clean notebook just to moderate error messages
import warnings
warnings.filterwarnings('ignore')
#import libraries
#load our data
import pandas as pd
df = pd.read_csv(r'C:\Users\PC\Desktop\Project excel\kingc.csv')
df.head()
Locator | WaterYear | WQI | Month | ParameterGroup | lng | lat | MostRecentSample | SiteName | StreamName | WQI_binned | |
---|---|---|---|---|---|---|---|---|---|---|---|
0 | 311 | 1970 | 70.93 | 13 | AnnualScore | -122.2479 | 47.4655 | False | Green River at Interurban | Green | 2 |
1 | 311 | 1971 | 61.14 | 13 | AnnualScore | -122.2479 | 47.4655 | False | Green River at Interurban | Green | 2 |
2 | 311 | 1972 | 74.90 | 13 | AnnualScore | -122.2479 | 47.4655 | False | Green River at Interurban | Green | 2 |
3 | 311 | 1973 | 75.38 | 13 | AnnualScore | -122.2479 | 47.4655 | False | Green River at Interurban | Green | 2 |
4 | 311 | 1974 | 83.90 | 13 | AnnualScore | -122.2479 | 47.4655 | False | Green River at Interurban | Green | 3 |
#Data information eg formats
df.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 319326 entries, 0 to 319325 Data columns (total 11 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Locator 319326 non-null object 1 WaterYear 319326 non-null int64 2 WQI 319326 non-null float64 3 Month 319326 non-null int64 4 ParameterGroup 319326 non-null object 5 lng 319326 non-null float64 6 lat 319326 non-null float64 7 MostRecentSample 319326 non-null bool 8 SiteName 319326 non-null object 9 StreamName 319326 non-null object 10 WQI_binned 319326 non-null int64 dtypes: bool(1), float64(3), int64(3), object(4) memory usage: 24.7+ MB
#Count items at Locator
item_counts = df['Locator'].value_counts()
print(item_counts)
3106 6981 B319 6772 470 6593 430 6579 474 6564 ... A670 1441 BSE_1MUDMTNRD 1313 B499 946 A687 648 0484A 321 Name: Locator, Length: 78, dtype: int64
#Count items at ParameterGroup
item_counts = df['ParameterGroup'].value_counts()
print(item_counts)
Temperature 28819 FecalBacteria 28717 Sediment 28194 Nutrient 28172 Total.Phosphorus 28150 Total.Suspended.Solids 28059 pH 27562 Turbidity 27109 MonthlyScore 26734 Penalty 26362 Total.Nitrogen 20669 DissolvedOxygen 18287 AnnualScore 2492 Name: ParameterGroup, dtype: int64
#Count items at SiteName
item_counts = df['SiteName'].value_counts()
print(item_counts)
Green River at Starfire Way 6981 Green River at 212th Way SE 6772 Swamp Creek mouth at Bothell Way NE 6593 Lyon Creek mouth at Bothell Way NE 6579 North Creek mouth at Sammamish River Trail 6564 ... Laughin Jacobs mouth at E Lake Sammamish Pkwy 1441 Boise Creek mouth at SE Mud Mountain Rd 1313 Yarrow Creek near 101st Way NE 946 Zackuse Creek 648 Bear Creek upstream of mouth 321 Name: SiteName, Length: 78, dtype: int64
#Count items at StreamName
item_counts = df['StreamName'].value_counts()
print(item_counts)
Green 25473 Issaquah 17441 Bear 16244 Pipers 13054 Evans 10825 Juanita 10538 North 9717 Swamp 9362 Little Bear 8967 Cedar 8764 Sammamish 8726 Crisp 8523 Lyon 6579 Newaukum 6554 Thornton 6477 May 6461 Soos 6353 Springbrook/Black River 6340 Jenkins 6275 McAleer 5810 Mill 5806 Coal 5802 Little Soos 5796 Mercer Slough 5792 Cottage Lake Creek 5739 Forbes 5736 Covington 5696 Cochran Springs 5542 Venema 4476 Pine Lake 4433 Longfellow 4286 Idylwood 3722 Lewis 3704 Ebright 3561 George Davis 3536 Tibbetts 3379 Judd 2467 Shinglemill 2292 Mileta 2292 Fisher 2288 Gorsuch 2073 Cherry Creek 2003 Harris Creek 1959 Ames Creek 1959 Patterson Creek 1959 Ravensdale 1881 Rock 1871 Griffin Creek 1862 Snoqualmie - South Fork 1861 Snoqualmie - Middle Fork 1861 Snoqualmie - North Fork 1861 Tolt River 1861 Snoqualmie 1860 Raging River 1859 Skykomish 1850 Tahlequah 1570 Laughing Jacobs 1441 Boise 1313 Yarrow 946 Zackuse 648 Name: StreamName, dtype: int64
#Count items at WaterYear
item_counts = df['WaterYear'].value_counts()
print(item_counts)
2021 11624 2022 11608 2019 11561 2018 11455 2017 11361 2016 11285 2015 11218 2020 10192 2007 8776 2008 8717 2014 8699 2005 7954 2006 7896 2004 7601 2003 7552 1999 7452 2013 7340 2001 7305 2002 7249 1998 7076 2000 7007 1997 6742 1996 6413 1995 6263 2023 6225 1994 5944 2012 5706 1993 5535 2009 5236 1988 4961 1992 4943 1989 4896 2011 4860 1987 4782 1982 4774 1981 4770 1990 4559 1985 4534 1980 4520 1984 4508 1991 4438 1983 4205 1986 4158 1979 4016 2010 3766 1977 1803 1978 1442 1976 1010 1973 969 1972 928 1974 789 1975 476 1971 149 1970 78 Name: WaterYear, dtype: int64
#Count items at WQI_binned
item_counts = df['WQI_binned'].value_counts()
print(item_counts)
3 222573 2 66488 1 30265 Name: WQI_binned, dtype: int64
item_counts = df['Month'].value_counts()
print(item_counts)
3 25511 2 25432 12 24918 13 24820 9 24722 7 24615 10 24595 8 24446 1 24406 6 24356 5 24285 4 23841 11 23379 Name: Month, dtype: int64
# rread oour column of interest
df = df[['WQI']]
df.head()
WQI | |
---|---|
0 | 70.93 |
1 | 61.14 |
2 | 74.90 |
3 | 75.38 |
4 | 83.90 |