!jupyter nbconvert --to html meteorite_landings.ipynb

[NbConvertApp] Converting notebook meteorite_landings.ipynb to html
[NbConvertApp] Writing 1754523 bytes to meteorite_landings.html


# import packages
import numpy as np
import pandas as pd
%matplotlib inline
import matplotlib.pyplot as plt
import matplotlib.cm as cm
import matplotlib.patches as mpatches
import seaborn as sns
import geopandas as gpd


# import dataset
df = pd.read_csv('meteorite-landings.csv')
df.head()


# inspect data types
#df.dtypes
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 45716 entries, 0 to 45715
Data columns (total 10 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   name         45716 non-null  object 
 1   id           45716 non-null  int64  
 2   nametype     45716 non-null  object 
 3   recclass     45716 non-null  object 
 4   mass         45585 non-null  float64
 5   fall         45716 non-null  object 
 6   year         45428 non-null  float64
 7   reclat       38401 non-null  float64
 8   reclong      38401 non-null  float64
 9   GeoLocation  38401 non-null  object 
dtypes: float64(4), int64(1), object(5)
memory usage: 3.5+ MB


# Check missing values
percent_missing = df.isnull().sum() * 100 / len(df)
missing_value_df = pd.DataFrame({'percent_missing': percent_missing})
missing_value_df

# There are quite a few missing geolocations(16%).


# convert year from float to integer
df['year'] = df['year'].fillna(0).astype(int)
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 45716 entries, 0 to 45715
Data columns (total 10 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   name         45716 non-null  object 
 1   id           45716 non-null  int64  
 2   nametype     45716 non-null  object 
 3   recclass     45716 non-null  object 
 4   mass         45585 non-null  float64
 5   fall         45716 non-null  object 
 6   year         45716 non-null  int32  
 7   reclat       38401 non-null  float64
 8   reclong      38401 non-null  float64
 9   GeoLocation  38401 non-null  object 
dtypes: float64(3), int32(1), int64(1), object(5)
memory usage: 3.3+ MB


# inspect data with describe
df.describe()


# clean data
# filter out data with year < 860 and > 2016 
df2 = df[(df['year'] >= 860) & (df['year'] <= 2016)]
df2


# clean data
# filter out data with 0,0 coordinates
df3 = df2.loc[(df2['reclat'] != 0) & (df2['reclong'] != 0)]


mean_mass= round(df3['mass'].mean(),2)
mean_mass

15442.83


df3['mass']=np.where(np.isnan(df3['mass']),mean_mass,df3['mass'])
df3['mass']

D:\anaconda\envs\geo_env\lib\site-packages\pandas\core\frame.py:3607: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._set_item(key, value)

0            21.0
1           720.0
2        107000.0
3          1914.0
4           780.0
           ...   
45711       172.0
45712        46.0
45713         3.3
45714      2167.0
45715       200.0
Name: mass, Length: 39015, dtype: float64


df3.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 39015 entries, 0 to 45715
Data columns (total 10 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   name         39015 non-null  object 
 1   id           39015 non-null  int64  
 2   nametype     39015 non-null  object 
 3   recclass     39015 non-null  object 
 4   mass         39015 non-null  float64
 5   fall         39015 non-null  object 
 6   year         39015 non-null  int32  
 7   reclat       31813 non-null  float64
 8   reclong      31813 non-null  float64
 9   GeoLocation  31813 non-null  object 
dtypes: float64(3), int32(1), int64(1), object(5)
memory usage: 3.1+ MB


# inspecting properties and distribution of variables

# types of recclass
meteor_types = pd.DataFrame(df3['recclass'].value_counts())
meteor_types.tail(50)


# Plotting the counts of the meteor types we can see that are 8 most common types, 
# after which there are many types with relatively low numbers

plt.rcParams['figure.figsize'] = [16,8,]
meteor_types.head(50).plot.bar()

<AxesSubplot:>


# inspecting properties and distribution of variables

# column 'fall'

df3['fall'].value_counts()

# There are many more 'found' meteorites than 'fell' meteorites

Found    37910
Fell      1105
Name: fall, dtype: int64


sns.violinplot(x = 'year', data = df3)

# the vast majority of observations are between 1900 and 2016, which is to be expected

<AxesSubplot:xlabel='year'>


from geopy.geocoders import Nominatim

df3


# create geodataframe, with geometry column from long, lat columns

gdf = gpd.GeoDataFrame(df3, geometry = gpd.points_from_xy(df3['reclong'], df3['reclat']))
gdf.head()


#check whether the geometry column is the right datatype. It should be a geopandas geoseries

type(gdf.geometry)

geopandas.geoseries.GeoSeries


# import world map geodataframe. The lowres worldmap dataset from Natural Earth can be imported directly from geopandas

world = gpd.read_file(gpd.datasets.get_path('naturalearth_lowres'))
world.rename(columns = {'name': 'country'}, inplace = True)
world.head()


# plot world map with country borders

plt.rcParams['figure.figsize'] = [16,8]
world.plot(facecolor = 'silver', edgecolor = 'grey');


# Plot layer of meteorites

gdf.plot(marker='*', color='blue', markersize=5);


# There's a weird point that must be a mistake. longitude is larger than 300
# find entry in df 

gdf.loc[(df3['reclong'] > 300)]


# remove entry

gdf = gdf.drop([22946], axis=0)


# Plot meteorites again to check whether the point is gone

gdf.plot(marker='*', color='blue', markersize=5);


# Before combining maps, ensure they share a common CRS (coordinate reference system) so that they will spatially align

# first assign a CRS to the gdf (a common one is WGS84 latitude-longitude coordinate system)

gdf.crs = "EPSG:4326"

# then, align crs to world crs

gdf = gdf.to_crs(world.crs)


# Plotting the world map data and the meteorite points in two layers:

base = world.plot(color='cornsilk', edgecolor='grey')
gdf.plot(ax=base, marker='o', color='blue', markersize=2);


# Create df total meteorites per country
# first, spatial join meteorite gdf and world gdf to assign countries to meteorite coordinates

gdf_countries = gpd.sjoin(world, gdf, how="right", op="contains")
gdf_countries.head()


# check shape

gdf_countries.shape

(39014, 17)


# aggregate total number of meteorites per country

by_country = gdf_countries.groupby('country')[['id']].count()
by_country = by_country.reset_index()
by_country.rename(columns = {'id': 'meteorite_count'}, inplace = True)              
by_country.iloc[40:45]


# make new df with all country data, even if no meteorite data is available (otherwise there will be gaps in the worldmap)

countries_count = pd.merge(world, by_country, on = 'country', how = 'left')
countries_count.head()


# Fill NaN values for meteorite count with zeroes

countries_count.fillna(value = 0, inplace = True)
countries_count.head()


# make choropleth map with number of meteorites per country indicated with colours
# see: https://geopandas.org/mapping.html
# include legend

plt.rcParams['figure.figsize'] = [20,12]
fig, ax = plt.subplots(1, 1)
countries_count.plot(column='meteorite_count', ax=ax, legend=True, cmap='summer_r',legend_kwds={'label': "Found meteorites by country",'orientation': "horizontal"})

<AxesSubplot:>


# plot choropleth map with scheme option with user defined settings

plt.rcParams['figure.figsize'] = [20, 10]
fig, ax = plt.subplots(1, 1)

countries_count.plot(column='meteorite_count', legend=True, ax = ax, cmap='summer_r', scheme='user_defined', classification_kwds = {'bins':[0, 5, 25, 250, 2500, 25000]})

cmap = cm.get_cmap('summer_r')
patch1 = mpatches.Patch(color=cmap(0.0), label = '0')
patch2 = mpatches.Patch(color=cmap(0.2), label = '1 - 5')
patch3 = mpatches.Patch(color=cmap(0.4), label = '5 - 25')
patch4 = mpatches.Patch(color=cmap(0.6), label = '25 - 250')
patch5 = mpatches.Patch(color=cmap(0.8), label = '250 - 2500')
patch6 = mpatches.Patch(color=cmap(1.0), label = '2500 - 25000')

plt.legend(handles = [patch1, patch2, patch3, patch4, patch5, patch6], prop = {'size':12},loc = 0)

plt.show()


# create new dataframe without Antarctica data

countries_count2 = countries_count[countries_count['continent'] != 'Antarctica']


# plot the map

plt.rcParams['figure.figsize'] = [20, 10]
fig, ax = plt.subplots(1, 1)

countries_count2.plot(column='meteorite_count', legend=True, ax = ax, cmap='summer_r', scheme='user_defined', classification_kwds = {'bins':[0, 5, 25, 250, 1000, 5000]})

cmap = cm.get_cmap('summer_r')
patch1 = mpatches.Patch(color=cmap(0.0), label = '0')
patch2 = mpatches.Patch(color=cmap(0.2), label = '1 - 5')
patch3 = mpatches.Patch(color=cmap(0.4), label = '5 - 25')
patch4 = mpatches.Patch(color=cmap(0.6), label = '25 - 250')
patch5 = mpatches.Patch(color=cmap(0.8), label = '250 - 1000')
patch6 = mpatches.Patch(color=cmap(1.0), label = '1000 - 5000')

plt.legend(handles = [patch1, patch2, patch3, patch4, patch5, patch6], prop = {'size':12})

plt.show()


# import high-res worldmap data

world2 = gpd.read_file('./data/ne_50m_admin_0_countries.shp')
world2.plot()

<AxesSubplot:>


# inspect worlddata gdf

world2.head()


# select map shape INDIA

india = world2.loc[world2['SOVEREIGNT'] == 'India']
india


# select meteorite data for india

india_count = gdf_countries.loc[gdf_countries['country'] == 'India']
india_count.head()


# plot two layer map for India

base = india.plot(color='cornsilk', edgecolor='black')
india_count.plot(ax=base, marker='o', color='red', markersize=5);


# counting the top 10 meteorite types in the india data

india_top10 = pd.DataFrame(india_count['recclass'].value_counts().head(10))
india_top10 = india_top10.reset_index()
india_top10


# Select meteorite data for india, top 10 most common meteorites

india_count10 = india_count.loc[india_count['recclass'].isin(india_top10['index'])]
india_count10.head()


# plot them on the map, colourcoded

base = india.plot(color='cornsilk', edgecolor='black')
india_count10.plot(ax=base, marker='o', column='recclass', legend = True, markersize=40);


# there are a lot of points in the same spot, making it difficult to see where the CM2 type is
# to visualize the rare meteorite type better, we select and plot only this type

india_CM2 = india_count.loc[india_count['recclass'] == 'CM2']
india_CM2.head()


# plot CM2 on map

base = india.plot(color='cornsilk', edgecolor='black')
india_CM2.plot(ax=base, marker='o', color = 'blue', markersize=80)

<AxesSubplot:>

	name	id	nametype	recclass	mass	fall	year	reclat	reclong	GeoLocation
0	Aachen	1	Valid	L5	21.0	Fell	1880.0	50.77500	6.08333	(50.775000, 6.083330)
1	Aarhus	2	Valid	H6	720.0	Fell	1951.0	56.18333	10.23333	(56.183330, 10.233330)
2	Abee	6	Valid	EH4	107000.0	Fell	1952.0	54.21667	-113.00000	(54.216670, -113.000000)
3	Acapulco	10	Valid	Acapulcoite	1914.0	Fell	1976.0	16.88333	-99.90000	(16.883330, -99.900000)
4	Achiras	370	Valid	L6	780.0	Fell	1902.0	-33.16667	-64.95000	(-33.166670, -64.950000)

	id	mass	year	reclat	reclong
count	45716.000000	4.558500e+04	45716.000000	38401.000000	38401.000000
mean	26889.735104	1.327808e+04	1979.224495	-39.122580	61.074319
std	16860.683030	5.749889e+05	159.904386	46.378511	80.647298
min	1.000000	0.000000e+00	0.000000	-87.366670	-165.433330
25%	12688.750000	7.200000e+00	1987.000000	-76.714240	0.000000
50%	24261.500000	3.260000e+01	1998.000000	-71.500000	35.666670
75%	40656.750000	2.026000e+02	2003.000000	0.000000	157.166670
max	57458.000000	6.000000e+07	2501.000000	81.166670	354.473330

	name	id	nametype	recclass	mass	fall	year	reclat	reclong	GeoLocation
0	Aachen	1	Valid	L5	21.0	Fell	1880	50.77500	6.08333	(50.775000, 6.083330)
1	Aarhus	2	Valid	H6	720.0	Fell	1951	56.18333	10.23333	(56.183330, 10.233330)
2	Abee	6	Valid	EH4	107000.0	Fell	1952	54.21667	-113.00000	(54.216670, -113.000000)
3	Acapulco	10	Valid	Acapulcoite	1914.0	Fell	1976	16.88333	-99.90000	(16.883330, -99.900000)
4	Achiras	370	Valid	L6	780.0	Fell	1902	-33.16667	-64.95000	(-33.166670, -64.950000)
...	...	...	...	...	...	...	...	...	...	...
45711	Zillah 002	31356	Valid	Eucrite	172.0	Found	1990	29.03700	17.01850	(29.037000, 17.018500)
45712	Zinder	30409	Valid	Pallasite, ungrouped	46.0	Found	1999	13.78333	8.96667	(13.783330, 8.966670)
45713	Zlin	30410	Valid	H4	3.3	Found	1939	49.25000	17.66667	(49.250000, 17.666670)
45714	Zubkovsky	31357	Valid	L6	2167.0	Found	2003	49.78917	41.50460	(49.789170, 41.504600)
45715	Zulu Queen	30414	Valid	L3.7	200.0	Found	1976	33.98333	-115.68333	(33.983330, -115.683330)

	recclass
H/L3.6	1
CO3.7	1
L/LL6-an	1
Iron?	1
H3.8-5	1
R3.8-5	1
L3.9/4	1
H/L3.9	1
L/LL3.10	1
L~3	1
H(L)3-an	1
L-melt breccia	1
CV2	1
L3.2-3.5	1
EL7	1
CH/CBb	1
H/L~4	1
LL3.7-6	1
H3.7/3.8	1
L3.7/3.8	1
L3.5-3.9	1
L3.3-3.5	1
LL3/4	1
L3.0-3.7	1
K	1
Acapulcoite/lodranite	1
L3.10	1
L/LL4/5	1
L3.8-an	1
C5/6-ung	1
H3.8-4	1
H6	1
H3	1
CR1	1
H3.4/3.5	1
Mesosiderite?	1
L3.5-3.7	1
Iron, IIAB-an	1
L3.3-3.7	1
L3.2-3.6	1
L3.3-3.6	1
C2	1
C4/5	1
L3.5-5	1
L/LL(?)3	1
H4(?)	1
Relict iron	1
EL4/5	1
L5-7	1
L/LL	1

	name	id	nametype	recclass	mass	fall	year	reclat	reclong	GeoLocation
0	Aachen	1	Valid	L5	21.0	Fell	1880	50.77500	6.08333	(50.775000, 6.083330)
1	Aarhus	2	Valid	H6	720.0	Fell	1951	56.18333	10.23333	(56.183330, 10.233330)
2	Abee	6	Valid	EH4	107000.0	Fell	1952	54.21667	-113.00000	(54.216670, -113.000000)
3	Acapulco	10	Valid	Acapulcoite	1914.0	Fell	1976	16.88333	-99.90000	(16.883330, -99.900000)
4	Achiras	370	Valid	L6	780.0	Fell	1902	-33.16667	-64.95000	(-33.166670, -64.950000)
...	...	...	...	...	...	...	...	...	...	...
45711	Zillah 002	31356	Valid	Eucrite	172.0	Found	1990	29.03700	17.01850	(29.037000, 17.018500)
45712	Zinder	30409	Valid	Pallasite, ungrouped	46.0	Found	1999	13.78333	8.96667	(13.783330, 8.966670)
45713	Zlin	30410	Valid	H4	3.3	Found	1939	49.25000	17.66667	(49.250000, 17.666670)
45714	Zubkovsky	31357	Valid	L6	2167.0	Found	2003	49.78917	41.50460	(49.789170, 41.504600)
45715	Zulu Queen	30414	Valid	L3.7	200.0	Found	1976	33.98333	-115.68333	(33.983330, -115.683330)

Meteorite landings on Earth¶

1. Introduction¶

Aim of this project¶

About the data¶

2. Import Packages and data¶

3. Inspecting and cleaning the data¶

Types of meteorite¶

4. Plotting the meteorites on a world map¶

Insights from this map¶

5. Creating a choropleth map of meteorite numbers by country¶

Insights from this map¶

Insights from this map¶

Insights from this map¶

6. Zooming in on one country: India¶

Insights from this map¶

Let's look at the distribution of the top 10 meteorites in india.¶

7. Conclusions¶

	pop_est	continent	country	iso_a3	gdp_md_est	geometry
0	920938	Oceania	Fiji	FJI	8374.0	MULTIPOLYGON (((180.00000 -16.06713, 180.00000...
1	53950935	Africa	Tanzania	TZA	150600.0	POLYGON ((33.90371 -0.95000, 34.07262 -1.05982...
2	603253	Africa	W. Sahara	ESH	906.5	POLYGON ((-8.66559 27.65643, -8.66512 27.58948...
3	35623680	North America	Canada	CAN	1674000.0	MULTIPOLYGON (((-122.84000 49.00000, -122.9742...
4	326625791	North America	United States of America	USA	18560000.0	MULTIPOLYGON (((-122.84000 49.00000, -120.0000...

	index_left	pop_est	continent	country	iso_a3	gdp_md_est	name	id	nametype	recclass	mass	fall	year	reclat	reclong	GeoLocation	geometry
0	129.0	11491346.0	Europe	Belgium	BEL	508600.0	Aachen	1	Valid	L5	21.0	Fell	1880	50.77500	6.08333	(50.775000, 6.083330)	POINT (6.08333 50.77500)
1	142.0	5605948.0	Europe	Denmark	DNK	264800.0	Aarhus	2	Valid	H6	720.0	Fell	1951	56.18333	10.23333	(56.183330, 10.233330)	POINT (10.23333 56.18333)
2	3.0	35623680.0	North America	Canada	CAN	1674000.0	Abee	6	Valid	EH4	107000.0	Fell	1952	54.21667	-113.00000	(54.216670, -113.000000)	POINT (-113.00000 54.21667)
3	27.0	124574795.0	North America	Mexico	MEX	2307000.0	Acapulco	10	Valid	Acapulcoite	1914.0	Fell	1976	16.88333	-99.90000	(16.883330, -99.900000)	POINT (-99.90000 16.88333)
4	9.0	44293293.0	South America	Argentina	ARG	879400.0	Achiras	370	Valid	L6	780.0	Fell	1902	-33.16667	-64.95000	(-33.166670, -64.950000)	POINT (-64.95000 -33.16667)

	featurecla	scalerank	LABELRANK	SOVEREIGNT	SOV_A3	LEVEL	TYPE	ADMIN	ADM0_A3	...	NAME_KO	NAME_NL	NAME_PL	NAME_PT	NAME_RU	NAME_SV	NAME_TR	NAME_VI	NAME_ZH	geometry
0	Admin-0 country	1	3	Zimbabwe	ZWE	2	Sovereign country	Zimbabwe	ZWE	...	짐바브웨	Zimbabwe	Zimbabwe	Zimbábue	Зимбабве	Zimbabwe	Zimbabve	Zimbabwe	辛巴威	POLYGON ((31.28789 -22.40205, 31.19727 -22.344...
1	Admin-0 country	1	3	Zambia	ZMB	2	Sovereign country	Zambia	ZMB	...	잠비아	Zambia	Zambia	Zâmbia	Замбия	Zambia	Zambiya	Zambia	赞比亚	POLYGON ((30.39609 -15.64307, 30.25068 -15.643...
2	Admin-0 country	1	3	Yemen	YEM	2	Sovereign country	Yemen	YEM	...	예멘	Jemen	Jemen	Iémen	Йемен	Jemen	Yemen	Yemen	也门	MULTIPOLYGON (((53.08564 16.64839, 52.58145 16...
3	Admin-0 country	3	2	Vietnam	VNM	2	Sovereign country	Vietnam	VNM	...	베트남	Vietnam	Wietnam	Vietname	Вьетнам	Vietnam	Vietnam	Việt Nam	越南	MULTIPOLYGON (((104.06396 10.39082, 104.08301 ...
4	Admin-0 country	5	3	Venezuela	VEN	2	Sovereign country	Venezuela	VEN	...	베네수엘라	Venezuela	Wenezuela	Venezuela	Венесуэла	Venezuela	Venezuela	Venezuela	委內瑞拉	MULTIPOLYGON (((-60.82119 9.13838, -60.94141 9...

	index_left	pop_est	continent	country	iso_a3	gdp_md_est	name	id	nametype	recclass	mass	fall	year	reclat	reclong	GeoLocation	geometry
14	98.0	1.281936e+09	Asia	India	IND	8721000.0	Akbarpur	427	Valid	H4	1800.0	Fell	1838	29.71667	77.95000	(29.716670, 77.950000)	POINT (77.95000 29.71667)
32	98.0	1.281936e+09	Asia	India	IND	8721000.0	Ambapur Nagla	2290	Valid	H5	6400.0	Fell	1895	27.66667	78.25000	(27.666670, 78.250000)	POINT (78.25000 27.66667)
33	98.0	1.281936e+09	Asia	India	IND	8721000.0	Andhara	2294	Valid	Stone-uncl	2700.0	Fell	1880	26.58333	85.56667	(26.583330, 85.566670)	POINT (85.56667 26.58333)
36	98.0	1.281936e+09	Asia	India	IND	8721000.0	Andura	2298	Valid	H6	17900.0	Fell	1939	20.88333	76.86667	(20.883330, 76.866670)	POINT (76.86667 20.88333)
52	98.0	1.281936e+09	Asia	India	IND	8721000.0	Atarra	4883	Valid	L4	1280.0	Fell	1920	25.25417	80.62500	(25.254170, 80.625000)	POINT (80.62500 25.25417)

	index	recclass
0	L6	24
1	H6	20
2	H5	14
3	L5	10
4	LL6	6
5	Stone-uncl	4
6	H4	3
7	Eucrite-mmict	3
8	CM2	3
9	Ureilite	3

	country	meteorite_count
40	Greenland	3
41	Guatemala	1
42	Honduras	1
43	Hungary	7
44	India	132