Skip to content

Commit

Permalink
Refine subdivision process
Browse files Browse the repository at this point in the history
  • Loading branch information
sabas committed Mar 15, 2024
1 parent 1b2c902 commit 3648bfa
Show file tree
Hide file tree
Showing 3 changed files with 39 additions and 38 deletions.
47 changes: 21 additions & 26 deletions data/subdivision-codes.csv
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
SUCountry,SUCode,SUName,SUType
AD,02,Canillo,
AD,02,Canillo,Parish
AD,03,Encamp,Parish
AD,04,La Massana,Parish
AD,05,Ordino,Parish
Expand Down Expand Up @@ -172,7 +172,7 @@ AZ,LAN,Lənkəran,Rayon
AZ,LER,Lerik,Rayon
AZ,MAS,Masallı,Rayon
AZ,MI,Mingəçevir,Municipality
AZ,,Naftalan,Municipality
AZ,NA,Naftalan,Municipality
AZ,NEF,Neftçala,Rayon
AZ,NV,Naxçıvan,Municipality
AZ,NX,Naxçıvan,Autonomous republic
Expand Down Expand Up @@ -1043,7 +1043,7 @@ ES,M,Madrid,Province
ES,MA,Málaga,Province
ES,ML,Melilla,Autonomous city in North Africa
ES,MU,Murcia,Province
ES,,Navarra / Nafarroa,Province
ES,NA,Navarra / Nafarroa,Province
ES,O,Asturias,Province
ES,OR,Ourense [Orense],Province
ES,P,Palencia,Province
Expand Down Expand Up @@ -1758,7 +1758,6 @@ IN,HP,Himāchal Pradesh,State
IN,HR,Haryāna,State
IN,JH,Jhārkhand,State
IN,JK,Jammu and Kashmīr,Union territory
IN,JK,Jammu and Kashmīr,Union territory
IN,KA,Karnātaka,State
IN,KL,Kerala,State
IN,LA,Ladākh,Union territory
Expand Down Expand Up @@ -1792,7 +1791,7 @@ IQ,KA,Karbalā',Governorate
IQ,KI,Kirkūk,Governorate
IQ,MA,Maysān,Governorate
IQ,MU,AI Muthanná,Governorate
IQ,,An Najaf,Governorate
IQ,NA,An Najaf,Governorate
IQ,NI,Nīnawá,Governorate
IQ,QA,Al Qādisīyah,Governorate
IQ,SD,Şalāh ad Dīn,Governorate
Expand Down Expand Up @@ -1894,7 +1893,7 @@ IT,MN,Mantova,Province
IT,MO,Modena,Province
IT,MS,Massa-Carrara,Province
IT,MT,Matera,Province
IT,,Napoli,Metropolitan City
IT,NA,Napoli,Metropolitan City
IT,NO,Novara,Province
IT,NU,Nuoro,Province
IT,OR,Oristano,Province
Expand Down Expand Up @@ -2199,7 +2198,7 @@ LB,BH,Baalbek-Hermel,Governorate
LB,BI,El Béqaa,Governorate
LB,JA,Liban‐Sud,Governorate
LB,JL,Mont‐Liban,Governorate
LB,,Nabatîyé,Governorate
LB,NA,Nabatîyé,Governorate
LC,01,Anse la Raye,District
LC,02,Castries,District
LC,03,Choiseul,District
Expand Down Expand Up @@ -2452,7 +2451,6 @@ MA,CAS,Casablanca [Dar el Beïda]*,Prefecture
MA,CHE,Chefchaouene,Province
MA,CHI,Chichaoua,Province
MA,CHT,Chtouka-Ait Baha,Province
MA,CHT,Chtouka-Ait Baha,Province
MA,DRI, Driouch,Province
MA,ERR,Errachidia,Province
MA,ESI,Essaouira,Province
Expand All @@ -2472,8 +2470,6 @@ MA,JDI,El Jadida,Province
MA,JRA, Jerada,Province
MA,KEN,Kénitra,Province
MA,KES, El Kelâa des Sraghna,Province
MA,KES, El Kelâa des Sraghna,Province
MA,KES,Kelaat Sraghna,Province
MA,KES,Kelaat Sraghna,Province
MA,KHE,Khémisset,Province
MA,KHN,Khénifra,Province
Expand Down Expand Up @@ -2644,7 +2640,6 @@ MK,202,Vinica,Municipality
MK,203,Delčevo,Municipality
MK,204,Zrnovci,Municipality
MK,205,Karbinci,Municipality
MK,205,Karbinci,Municipality
MK,206,Kočani,Municipality
MK,207,Makedonska Kamenica,Municipality
MK,208,Pehčevo,Municipality
Expand Down Expand Up @@ -2965,20 +2960,20 @@ MZ,P,Cabo Delgado,Province
MZ,Q,Zambézia,Province
MZ,S,Sofala,Province
MZ,T,Tete,Province
,CA,Zambezi,Region
,ER,Erongo,Region
,HA,Hardap,Region
,KA,//Karas,Region
,KE,Kavango East,Region
,KH,Khomas,Region
,KU,Kunene,Region
,KW,Kavango West,
,OD,Otjozondjupa,Region
,OH,Omaheke,Region
,ON,Oshana,Region
,OS,Omusati,Region
,OT,Oshikoto,Region
,OW,Ohangwena,Region
NA,CA,Zambezi,Region
NA,ER,Erongo,Region
NA,HA,Hardap,Region
NA,KA,//Karas,Region
NA,KE,Kavango East,Region
NA,KH,Khomas,Region
NA,KU,Kunene,Region
NA,KW,Kavango West,
NA,OD,Otjozondjupa,Region
NA,OH,Omaheke,Region
NA,ON,Oshana,Region
NA,OS,Omusati,Region
NA,OT,Oshikoto,Region
NA,OW,Ohangwena,Region
NE,1,Agadez,Region
NE,2,Diffa,Region
NE,3,Dosso,Region
Expand Down Expand Up @@ -3012,7 +3007,7 @@ NG,KO,Kogi,State
NG,KT,Katsina,State
NG,KW,Kwara,State
NG,LA,Lagos,State
NG,,Nasarawa,State
NG,NA,Nasarawa,State
NG,NI,Niger,State
NG,OG,Ogun,State
NG,ON,Ondo,State
Expand Down
2 changes: 1 addition & 1 deletion datapackage.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{
"name": "un-locode",
"title": "UN-LOCODE Codelist",
"version": "2023.2.3",
"version": "2023.2.4",
"licenses": [
{
"name": "ODC-PDDL-1.0",
Expand Down
28 changes: 17 additions & 11 deletions scripts/integrate.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from titlecase import titlecase
import csv


def process(extracted_files):

# Process CSV files
Expand Down Expand Up @@ -33,31 +34,36 @@ def process(extracted_files):
if current_entry:
writer.writerow(current_entry)

df = pd.read_csv('tmpsub.csv', encoding='cp1252', dtype=str)
df = pd.read_csv('tmpsub.csv', encoding='cp1252',
dtype=str, header=None, na_filter=False)
df.columns = ['SUCountry', 'SUCode', 'SUName', 'SUType']
df_base = pd.read_csv(f"data/subdivision-codes.csv", dtype=str)
print("Columns in df:", df.columns)
print("Columns in df_base:", df_base.columns)
merged_df = pd.merge(df_base, df[['SUCountry', 'SUCode', 'SUType']], on=['SUCountry', 'SUCode'], how='left')
df_base = pd.read_csv(
f"data/subdivision-codes.csv", dtype=str, na_filter=False)
merged_df = pd.merge(df_base, df[['SUCountry', 'SUCode', 'SUType']], on=[
'SUCountry', 'SUCode'], how='left')
merged_df = merged_df.drop_duplicates()
merged_df.to_csv(f"data/subdivision-codes.csv", index=False)
print(f"Processed {file_name}")
continue

unlocode_df = pd.read_csv(file_name, encoding='cp1252', header=None, dtype=str)
unlocode_df = pd.read_csv(
file_name, encoding='cp1252', header=None, dtype=str)
unlocode_df.columns = ['Change', 'Country', 'Location', 'Name', 'NameWoDiacritics', 'Subdivision',
'Function', 'Status', 'Date', 'IATA', 'Coordinates', 'Remarks']
'Function', 'Status', 'Date', 'IATA', 'Coordinates', 'Remarks']

for index, row in unlocode_df.iterrows():
if pd.isna(row['Location']) or row['Location'] == '':
if row['Change'] == '=': #alias row
alias_df.loc[len(alias_df.index)] = row[['Country', 'Name', 'NameWoDiacritics']]
if row['Change'] == '=': # alias row
alias_df.loc[len(alias_df.index)] = row[[
'Country', 'Name', 'NameWoDiacritics']]
continue
print(f"Processed {file_name}")

alias_df.to_csv(f"data/alias.csv", index=False)
print("Processed and saved UNLOCODE files")
return


if __name__ == "__main__":

if len(sys.argv) != 2:
Expand All @@ -79,7 +85,7 @@ def process(extracted_files):
os.remove(file_name)
print(f"Removed {file_name}")
os.remove('tmpsub.csv')
#except Exception as e:
# print(f"Error extracting {zip_path}: {e}")
except Exception as e:
print(f"Error extracting {zip_path}: {e}")
finally:
None

0 comments on commit 3648bfa

Please sign in to comment.