User:Klaus/Nearest Geohash Calculator

From Geohashing
< User:Klaus
Revision as of 22:12, 17 June 2015 by imported>Klaus (added scripts)
(diff) ← Older revision | Latest revision (diff) | Newer revision → (diff)

Usage

This 2 scripts allow you to calculate all geohashes since 2000 and sort them by proximity to given coordinates.

WARNING: Be careful with this script! Although I tried to make it 30W-compliant, I cannot guarantee it will output the correct coordinates, because I didn't test it very thoroughly...

$ ./download.sh 
$ ./near_geohashes.py 48.53 9.06 | sort -n | head
0.27736097407641647 km near, hash(2000-07-01-10393.09), fractions: 0.530534, 0.057562
0.931403603845664 km near, hash(2012-05-08-13035.85), fractions: 0.537845, 0.056814
1.4481599500109752 km near, hash(2000-04-19-10748.97), fractions: 0.542218, 0.055098
1.5380279505319578 km near, hash(2011-10-25-11807.96), fractions: 0.543855, 0.057970
1.5466280272701214 km near, hash(2002-01-07-10261.33), fractions: 0.515947, 0.060932
1.6463321261935906 km near, hash(2014-03-04-16321.71), fractions: 0.544166, 0.055151
2.0142084389512194 km near, hash(2015-05-03-17859.27), fractions: 0.511782, 0.057891
2.0832446179837687 km near, hash(2009-05-20-8502.48), fractions: 0.538606, 0.076697
2.1918247664166084 km near, hash(2011-10-29-12207.34), fractions: 0.549622, 0.063616
2.2048214599537057 km near, hash(2007-06-27-13336.93), fractions: 0.535808, 0.041019

So actually the nearest hash to 48.53,9.06 ever since 2000-01-01 was on 2000-07-01 and only ~277 metres away!

download.sh

This bash script downloads the list of DowJones values from Yahoo:

#!/bin/bash

year=$(date +%Y)
mon=$(date +%m)
dat=$(date +%d)

# download data from yahoo, then:
# remove first line ("Date,Open,High,Low,Close,Volume,Adj Close")
rm -f dowjones.csv
wget -o /dev/null -O - "https://ichart.yahoo.com/table.csv?s=%5EDJI&a=09&b=1&c=1928&d=$mon&e=$dat&f=$year&g=d&ignore=.csv"  | tail -n+2 | cut -d ',' -f 1-2 > dowjones.csv

near_geohashes.py

#!/usr/bin/python
# vim: tabstop=8 expandtab shiftwidth=4 softtabstop=4


from math import radians, cos, sin, asin, sqrt
from sys import argv, exit
from datetime import datetime, date, timedelta
from time import strptime
from bisect import bisect_left
from hashlib import md5


dj = {} # this will contain the date -> dowjones values!
lat,lon = 0,0
latPrefix,lonPrefix = 0,0

# https://stackoverflow.com/questions/4913349/haversine-formula-in-python-bearing-and-distance-between-two-gps-points/4913653#4913653
def haversine(lon1, lat1, lon2, lat2):
    """
    Calculate the great circle distance between two points 
    on the earth (specified in decimal degrees)
    """
    # convert decimal degrees to radians 
    lon1, lat1, lon2, lat2 = map(radians, [lon1, lat1, lon2, lat2])

    # haversine formula 
    dlon = lon2 - lon1 
    dlat = lat2 - lat1 
    a = sin(dlat/2)**2 + cos(lat1) * cos(lat2) * sin(dlon/2)**2
    c = 2 * asin(sqrt(a)) 
    r = 6371 # Radius of earth in kilometers. Use 3956 for miles
    return c * r

def loadDowJones(inputfile):
    global dj
    with open(inputfile) as f:
        lines = [l.strip('\n') for l in f.readlines()]
        for l in lines:
            dat, dowjones = l.split(',')
            # convert the date string to a "date" object
            dat = datetime.strptime(dat, '%Y-%m-%d').date()
            # round the DowJones
            dowjones = round(float(dowjones), 2)
            dj[dat] = dowjones

def daterange(start_date, end_date):
    for n in range(int ((end_date - start_date).days)):
        yield start_date + timedelta(n)

def nearest(fractionLat, fractionLon):
    minimalDistance = float("inf") # infinity is larger than everything else!
    # calculate the minimum distance to all of the 9 adjacent graticules geohashes
    for i in range(-1,2):     # i is in -1 0 1
        for j in range(-1,2): # j is in -1 0 1
            minimalDistance = min(minimalDistance, haversine(lat, lon, latPrefix + fractionLat + i, lonPrefix + fractionLon + j))
    return minimalDistance

def calculateFractions():
    for d in (daterange(date(2000,1,1), date.today())):
        significantDJdate = d
        # if we are east of 30W and the date is after or equal to
        # 2008-05-27 when the 30W rule was introduced
        # -> use the previous day!
        if lon > -30 and d >= date(2008,5,27):
            significantDJdate -= timedelta(days=1)
        # go back, until we have a DJ value!
        while significantDJdate not in dj:
            significantDJdate -= timedelta(days=1)
        s = "%s-%.2f" % (d, dj[significantDJdate])
        md5Output = md5(s.encode('ascii')).digest()
        fractionA = int.from_bytes(md5Output[0:8],  byteorder='big', signed=False) / (256**8)
        fractionB = int.from_bytes(md5Output[8:16], byteorder='big', signed=False) / (256**8)
        distance = nearest(fractionA, fractionB)
        print("%s km near, hash(%s), fractions: %f, %f" % (distance, s, fractionA, fractionB))

if __name__ == "__main__":
    if len(argv) != 3:
        print("usage: %s <lat> <lon>" % argv[0])
        exit(1)
    # print warning...
    #print("WARNING: this script will work only for east-of-W30 locations like Europe!")
    lat, lon = float(argv[1]), float(argv[2])
    latPrefix = int(lat)
    lonPrefix = int(lon)
    loadDowJones("dowjones.csv")
    calculateFractions()