I have a directory structure which is composed of Countries, States or other subdivision, and Cities.
Here's a way to use pathlib objects in a dictionary to containerize the hierarchy
for example:
Also changed the get_dir_contents method to return all contents of directory. Individual entries can be tested for type by adding after line 62:
added qualifier:
-----------------------------------------------------------------------
Another method added Nov 29, 2020 (more polished)
I have one module for each project that lays out all of the directories, URL's and common file locations in a relative structure, using pathlib.
Here's a sample for a geocoding project:
the module is named GeoPaths.py and is imported by just about every other module in the project.
A neat feature of using something like this, is that you can run it on it's own in a copy of the project to
immediately set up your directory structure (it will create missing directories, but will leave already existing
directories alone):
GeoPaths.py
here's the code that would do that:
MyModule.py
Increment by one for each sublevel, and paths will automatically be adjusted for all source code in that subdirectory.
Here's a way to use pathlib objects in a dictionary to containerize the hierarchy
from pathlib import Path import os import inspect class CountryInfo: def __init__(self): os.chdir(os.path.dirname(__file__)) self.home = Path('.') self.home.mkdir(exist_ok=True) self.bpaths = { 'data': self.home / '..' / 'data', 'Country': { 'country': self.home / '..' / 'data' / 'Country', 'USA': { 'usa': self.home / '..' / 'data' / 'Country' / 'USA', 'Alaska': self.home / '..' / 'data' / 'Country' / 'USA' / 'Alaska', 'Alabama': self.home / '..' / 'data' / 'Country' / 'USA' / 'Alabama', 'Arkansas': self.home / '..' / 'data' / 'Country' / 'USA' / 'Arkansas', 'American Samoa': self.home / '..' / 'data' / 'Country' / 'USA' / 'AmericanSamoa', 'Arizona': self.home / '..' / 'data' / 'Country' / 'USA' / 'Arizona' # ... } } } self.create_all_directories(self.bpaths) def get_dir_contents(self, path): dir_dict = None if isinstance(path, Path) and path.exists(): entries = [entry for entry in path.iterdir()] dir_dict = {} for entry in entries: dd = dir_dict[f"'{entry.name}'"] = {} dd['type'] = self.get_type(entry) # print(f'entry: {entry.name}, type {type(entry)}') print(quit()) return dir_dict def get_dir_contents(self, path): if isinstance(path, Path) and path.exists(): return [entry for entry in path.iterdir()] return None def create_all_directories(self, path): for key, value in path.items(): if isinstance(value, dict): self.create_all_directories(value) elif isinstance(value, Path) and not value.is_file(): value.mkdir(exist_ok=True) def testit(): bp = CountryInfo() Arizona = bp.bpaths['Country']['USA']['Arizona'] Scottsdale = Arizona / 'Scottsdale' files = bp.get_dir_contents(Scottsdale) if files is not None: for file in files: print(f'{file}') else: print('Scottsdale directory is empty') if __name__ == '__main__': testit()results of example:
Output:../data/Country/USA/Arizona/Scottsdale/coslicense.pdf
../data/Country/USA/Arizona/Scottsdale/ct_BusinessLicences.csv
Since the elements of the dictionary are pathlib objects, they inherit all of the methods of pathlibfor example:
print(f'Scottsdale path: {Scottsdale.resolve()})will return the absolute path of the Scottsdale directory (I replaced root paths with ...):
Output:Scottsdale path: .../Data-2TB/BusinessLists/data/Country/USA/Arizona/Scottsdale
Upodate: Added some error checking, and a new method create_all_directories which will create empty directories if they don't already exist.Also changed the get_dir_contents method to return all contents of directory. Individual entries can be tested for type by adding after line 62:
for file in files: if file.is_dir(): print(f'{file.name} is a directory') elif file.is_file(): print(f'{file.name} is a regular file') elif file.is_symlink(): print(f'{file.name} is a symbolic link') # and so on, see pathlib (3.7 is most complete if running that python version) )docs for all possibilitiesUpdate Jul25: 07:36 EST
added qualifier:
and not value.is_file()to line 51, needed to prevent crash if value is path + file, url, etc.
-----------------------------------------------------------------------
Another method added Nov 29, 2020 (more polished)
I have one module for each project that lays out all of the directories, URL's and common file locations in a relative structure, using pathlib.
Here's a sample for a geocoding project:
the module is named GeoPaths.py and is imported by just about every other module in the project.
A neat feature of using something like this, is that you can run it on it's own in a copy of the project to
immediately set up your directory structure (it will create missing directories, but will leave already existing
directories alone):
GeoPaths.py
import os from pathlib import Path class GeoPaths: def __init__(self, depth=0): dir_depth = abs(depth) os.chdir(os.path.abspath(os.path.dirname(__file__))) self.homepath = Path('.') while dir_depth: self.homepath = self.homepath / '..' dir_depth -= 1 rootpath = self.homepath / '..' self.docspath = rootpath / 'docs' self.docspath.mkdir(exist_ok=True) self.testspath = rootpath / 'tests' self.testspath.mkdir(exist_ok=True) self.datapath = rootpath / 'data' self.datapath.mkdir(exist_ok=True) self.csvpath = self.datapath / 'csv' self.csvpath.mkdir(exist_ok=True) self.htmlpath = self.datapath / 'html' self.htmlpath.mkdir(exist_ok=True) self.jsonpath = self.datapath / 'json' self.jsonpath.mkdir(exist_ok=True) self.MasterAddressPath = self.datapath / 'MasterAddressDatabase' self.MasterAddressPath.mkdir(exist_ok=True) self.prettypath = self.datapath / 'pretty' self.prettypath.mkdir(exist_ok=True) self.tmppath = self.datapath / 'tmp' self.tmppath.mkdir(exist_ok=True) # Osm data is arranged by state and file type. # A rather longdirectory tree, but laid out here for ease of use in software self.osmpath = self.datapath / 'osm' self.osmpath.mkdir(exist_ok=True) self.geofabrik_datapath = self.osmpath / 'GeofabrikAndCensus' self.geofabrik_datapath.mkdir(exist_ok=True) # URL's self.TigerLineGeoDatabase: 'https://www.census.gov/geographies/mapping-files/time-series/geo/tiger-geodatabase-file.html' self.qgis_plugins = 'https://plugins.qgis.org/plugins/?page=1&&' self.osmfilelink = 'https://ftp.osuosl.org/pub/openstreetmap/planet/' self.geofabrikserver = 'https://download.geofabrik.de/north-america.html' # Common files: self.geofabrikjson = self.jsonpath / 'GeofabrikLinks.json' if __name__ == '__main__': GeoPaths()Before running the script, my directory structure for a new project looks like this:
Output:├── src
│ └── GeoPaths.py
└── venv
...
After running GeoPaths.py directory structure looks like this:$ . ./venv/bin/activate (venv)$ python src/GeoPaths.py
Output:.
├── data
│ ├── csv
│ ├── html
│ ├── json
│ ├── MasterAddressDatabase
│ ├── osm
│ │ └── GeofabrikAndCensus
│ ├── pretty
│ └── tmp
├── docs
├── src
│ └── GeoPaths.py
├── tests
└── venv
...
Now, assume you have a module named MyModule.py in the src diretory, and you want to open a json file named sillyfile.json.here's the code that would do that:
MyModule.py
from GeoPaths import GeoPaths import json class MySillyClass: def __init__(self): self.gpaths = GeoPaths() self.jsonfile = self.gpaths.jsonpath / 'sillyfile.json' def create_dict(self): sillydict = { 'Cowboys': '21', 'GreenBayPackers': '7' } with self.jsonfile.open('w') as fp: json.dump(sillydict, fp) def read_it_back(self): with self.jsonfile.open() as fp: read_sillydict = json.load(fp) for key, value in read_sillydict.items(): print(f"{key}: {value}") def main(): mcc = MySillyClass() mcc.create_dict() mcc.read_it_back() if __name__ == '__main__': main()Results of running this script:
Output:Cowboys: 21
GreenBayPackers: 7
The depth attribute in GeoPaths.py can be used when code in in a subdirectory of src.Increment by one for each sublevel, and paths will automatically be adjusted for all source code in that subdirectory.