A Neat way to use pathlib

**Larz60+** · (This post was last modified: Nov-29-2020, 08:12 AM by Larz60+.)

I have a directory structure which is composed of Countries, States or other subdivision, and Cities.
Here's a way to use pathlib objects in a dictionary to containerize the hierarchy

from pathlib import Path
import os
import inspect


class CountryInfo:
    def __init__(self):
        os.chdir(os.path.dirname(__file__))

        self.home = Path('.')
        self.home.mkdir(exist_ok=True)

        self.bpaths = {
            'data': self.home / '..' / 'data',
            'Country':  {
                'country': self.home / '..' / 'data' / 'Country',
                'USA': {
                    'usa': self.home / '..' / 'data' / 'Country' / 'USA',
                    'Alaska': self.home / '..' / 'data' / 'Country' / 'USA' / 'Alaska',
                    'Alabama': self.home / '..' / 'data' / 'Country' / 'USA' / 'Alabama', 
                    'Arkansas': self.home / '..' / 'data' / 'Country' / 'USA' / 'Arkansas',
                    'American Samoa': self.home / '..' / 'data' / 'Country' / 'USA' / 'AmericanSamoa',
                    'Arizona': self.home / '..' / 'data' / 'Country' / 'USA' / 'Arizona'
                    # ...
                }
            }
        }
        self.create_all_directories(self.bpaths)

    def get_dir_contents(self, path):
        dir_dict = None
        if isinstance(path, Path) and path.exists():
            entries = [entry for entry in path.iterdir()]
            dir_dict = {}
            for entry in entries:
                dd = dir_dict[f"'{entry.name}'"] = {}
                dd['type'] = self.get_type(entry)
                # print(f'entry: {entry.name}, type {type(entry)}')
            print(quit())
        return dir_dict

    def get_dir_contents(self, path):
        if isinstance(path, Path) and path.exists():
            return [entry for entry in path.iterdir()]
        return None

    def create_all_directories(self, path):
        for key, value in path.items():
            if isinstance(value, dict):
                self.create_all_directories(value)
            elif isinstance(value, Path)  and not value.is_file():
                value.mkdir(exist_ok=True)

def testit():
    bp = CountryInfo()

    Arizona = bp.bpaths['Country']['USA']['Arizona']
    Scottsdale = Arizona / 'Scottsdale'

    files = bp.get_dir_contents(Scottsdale)
    if files is not None:
        for file in files:
            print(f'{file}')
    else:
        print('Scottsdale directory is empty')


if __name__ == '__main__':
    testit()

results of example:

Output:../data/Country/USA/Arizona/Scottsdale/coslicense.pdf
../data/Country/USA/Arizona/Scottsdale/ct_BusinessLicences.csv

Since the elements of the dictionary are pathlib objects, they inherit all of the methods of pathlib
for example:

print(f'Scottsdale path: {Scottsdale.resolve()})

will return the absolute path of the Scottsdale directory (I replaced root paths with ...):

Output:
Scottsdale path: .../Data-2TB/BusinessLists/data/Country/USA/Arizona/Scottsdale

Upodate: Added some error checking, and a new method create_all_directories which will create empty directories if they don't already exist.

Also changed the get_dir_contents method to return all contents of directory. Individual entries can be tested for type by adding after line 62:

        for file in files:
            if file.is_dir():
               print(f'{file.name} is a directory')
            elif file.is_file():
               print(f'{file.name} is a regular file')
            elif file.is_symlink():
               print(f'{file.name} is a symbolic link')
               # and so on, see pathlib (3.7 is most complete if running that python version) )docs for all possibilities

Update Jul25: 07:36 EST
added qualifier:

 and not value.is_file()

to line 51, needed to prevent crash if value is path + file, url, etc.

-----------------------------------------------------------------------
Another method added Nov 29, 2020 (more polished)

I have one module for each project that lays out all of the directories, URL's and common file locations in a relative structure, using pathlib.
Here's a sample for a geocoding project:

the module is named GeoPaths.py and is imported by just about every other module in the project.
A neat feature of using something like this, is that you can run it on it's own in a copy of the project to
immediately set up your directory structure (it will create missing directories, but will leave already existing
directories alone):

GeoPaths.py

import os
from pathlib import Path
 
 
class GeoPaths:
    def __init__(self, depth=0):
        dir_depth = abs(depth)
        os.chdir(os.path.abspath(os.path.dirname(__file__)))
 
        self.homepath = Path('.')
 
        while dir_depth:
            self.homepath = self.homepath / '..'
            dir_depth -= 1
 
        rootpath = self.homepath / '..'
 
        self.docspath = rootpath / 'docs'
        self.docspath.mkdir(exist_ok=True)
 
        self.testspath = rootpath / 'tests'
        self.testspath.mkdir(exist_ok=True)
 
        self.datapath = rootpath / 'data'
        self.datapath.mkdir(exist_ok=True)
 
        self.csvpath = self.datapath / 'csv'
        self.csvpath.mkdir(exist_ok=True)
 
        self.htmlpath = self.datapath / 'html'
        self.htmlpath.mkdir(exist_ok=True)
 
        self.jsonpath = self.datapath / 'json'
        self.jsonpath.mkdir(exist_ok=True)
         
        self.MasterAddressPath = self.datapath / 'MasterAddressDatabase'
        self.MasterAddressPath.mkdir(exist_ok=True)
 
        self.prettypath = self.datapath / 'pretty'
        self.prettypath.mkdir(exist_ok=True)
 
        self.tmppath = self.datapath / 'tmp'
        self.tmppath.mkdir(exist_ok=True)
 
        # Osm data is arranged by state and file type.
        # A rather longdirectory tree, but laid out here for ease of use in software
        self.osmpath = self.datapath / 'osm'
        self.osmpath.mkdir(exist_ok=True)
 
        self.geofabrik_datapath = self.osmpath / 'GeofabrikAndCensus'
        self.geofabrik_datapath.mkdir(exist_ok=True)
 
        # URL's
        self.TigerLineGeoDatabase: 'https://www.census.gov/geographies/mapping-files/time-series/geo/tiger-geodatabase-file.html'
        self.qgis_plugins = 'https://plugins.qgis.org/plugins/?page=1&&'
        self.osmfilelink = 'https://ftp.osuosl.org/pub/openstreetmap/planet/'
        self.geofabrikserver = 'https://download.geofabrik.de/north-america.html'
 
        # Common files:
        self.geofabrikjson = self.jsonpath / 'GeofabrikLinks.json'
 
 
if __name__ == '__main__':
    GeoPaths()

Before running the script, my directory structure for a new project looks like this:

Output:├── src
│   └── GeoPaths.py
└── venv
    ...

After running GeoPaths.py directory structure looks like this:

$ . ./venv/bin/activate
(venv)$ python src/GeoPaths.py

Output:.
├── data
│   ├── csv
│   ├── html
│   ├── json
│   ├── MasterAddressDatabase
│   ├── osm
│   │   └── GeofabrikAndCensus
│   ├── pretty
│   └── tmp
├── docs
├── src
│   └── GeoPaths.py
├── tests
└── venv
    ...

Now, assume you have a module named MyModule.py in the src diretory, and you want to open a json file named sillyfile.json.
here's the code that would do that:

MyModule.py

from GeoPaths import GeoPaths
import json
 
 
class MySillyClass:
    def __init__(self):
        self.gpaths = GeoPaths()
        self.jsonfile = self.gpaths.jsonpath / 'sillyfile.json'
 
    def create_dict(self):
        sillydict = {
            'Cowboys': '21',
            'GreenBayPackers': '7'
        }
 
        with self.jsonfile.open('w') as fp:
            json.dump(sillydict, fp)
 
    def read_it_back(self):
        with self.jsonfile.open() as fp:
            read_sillydict = json.load(fp)
        for key, value in read_sillydict.items():
            print(f"{key}: {value}")    
 
def main():
    mcc = MySillyClass()
    mcc.create_dict()
    mcc.read_it_back()
 
 
if __name__ == '__main__':
    main()

Results of running this script:

Output:Cowboys: 21
GreenBayPackers: 7

The depth attribute in GeoPaths.py can be used when code in in a subdirectory of src.
Increment by one for each sublevel, and paths will automatically be adjusted for all source code in that subdirectory.

A Neat way to use pathlib

User Panel Messages

Announcements