Commit 945bc29f authored by matt24smith's avatar matt24smith
Browse files

data generation for testing, update network graph test, bathymetry and shore...

data generation for testing, update network graph test, bathymetry and shore distance now passing tests
parent 27f67130
Pipeline #4971 failed with stages
in 2 minutes and 21 seconds
......@@ -127,13 +127,14 @@ class DBQuery(UserDict):
cur.execute(
'SELECT * FROM sqlite_master WHERE type="index" and name=?',
[f'idx_{month}_t_x_y'])
[f'idx_{month}_m_t_x_y'])
if len(cur.fetchall()) == 0:
print(f'building dynamic index for month {month}...')
cur.execute(
f'CREATE INDEX IF NOT EXISTS idx_{month}_t_x_y '
f'ON ais_{month}_dynamic (time, longitude, latitude)')
f'CREATE INDEX IF NOT EXISTS idx_{month}_m_t_x_y '
f'ON ais_{month}_dynamic (mmsi, time, longitude, latitude)'
)
aisdatabase.conn.commit()
aisdatabase.conn.close()
......
''' collect vessel transits between zones (nodes), and aggregate various trajectory statistics '''
import os
import re
from multiprocessing import Pool
import pickle
from functools import partial, reduce
......@@ -59,8 +60,9 @@ staticinfo = lambda track: dict(
# collect aggregated statistics on vessel positional data
transitinfo = lambda track, zoneset: dict(
src_zone=f"{int(track['in_zone'][zoneset][0].split('Z')[1]):03}",
rcv_zone=f"{int(track['in_zone'][zoneset][-1].split('Z')[1]):03}",
src_zone=f"{re.sub('[^0-9]', '', track['in_zone'][zoneset][0])}",
#rcv_zone=f"{int(track['in_zone'][zoneset][-1].split('Z')[1]):03}",
rcv_zone=f"{re.sub('[^0-9]', '', track['in_zone'][zoneset][-1])}",
transit_nodes=
f"{track['in_zone'][zoneset][0]}_{track['in_zone'][zoneset][-1]}",
num_datapoints=len(track['time'][zoneset]),
......
......@@ -139,8 +139,9 @@ def TrackGen(
for rows in rowgen:
assert rows.size > 1 or rows != np.array(
None, dtype=object), 'cannot create track vectors from empty set'
if rows is None or (rows.size <= 1):
raise ValueError(
'cannot create vector from zero-size track segment')
if deduplicate_timestamps:
warnings.warn('timestamps deduplication is deprecated')
......@@ -153,10 +154,6 @@ def TrackGen(
rows[:, mmsi_col].astype(float).astype(int)[1:]))
rows = np.delete(rows, dupe_idx, axis=0)
if len(rows) == 0 or (len(rows) == 1 and rows[0] is None):
raise ValueError(
'cannot create vector from zero-size track segment')
tracks_idx = np.append(
np.append([0],
np.nonzero(rows[:, mmsi_col].astype(int)[1:] !=
......
__version__ = "1.0.17"
__version__ = "1.0.18"
......@@ -2,6 +2,12 @@
Changelog
=========
v1.0.18
-------
data generation for testing, update network graph test, bathymetry and shore distance now passing tests
v1.0.17
-------
......
import os
import pickle
from hashlib import sha256
from functools import reduce
from datetime import datetime
import numpy as np
from shapely.geometry import Polygon
from aisdb import zones_dir, rawdata_dir, dbpath
from aisdb.database.sqlfcn_callbacks import boxpoly
from aisdb.gis import shiftcoord, ZoneGeom, ZoneGeomFromTxt
from aisdb.gis import ZoneGeom, ZoneGeomFromTxt
from aisdb.proc_util import glob_files
from aisdb.database.sqlfcn_callbacks import in_timerange
from aisdb.database.dbqry import DBQuery
from aisdb.database.dbconn import DBConn
# from aisdb.database.sqlfcn_callbacks import boxpoly
arrayhash = lambda matrix, nbytes=2: sha256(
reduce(np.append, matrix).tobytes()).hexdigest()[nbytes * -8:]
def sample_track_pickle():
fpath = 'scripts/dfo_project/test_query_october.pickle'
if os.path.isfile(fpath):
with open(fpath, 'rb') as f:
return pickle.load(f)
maxlen = 0
maxmmsi = ''
tracks = {track['mmsi']: track for track in TrackGen(rows)}
for track in tracks.values():
if (m := len(track['lon'])) > maxlen:
maxlen = m
maxmmsi = track['mmsi']
print(track['mmsi'], m)
testrows = np.array([
[tracks[maxmmsi]['mmsi'] for _ in range(maxlen)],
tracks[maxmmsi]['time'],
tracks[maxmmsi]['lon'],
tracks[maxmmsi]['lat'],
tracks[maxmmsi]['cog'],
tracks[maxmmsi]['sog'],
[tracks[maxmmsi]['name'] for _ in range(maxlen)],
[tracks[maxmmsi]['type'] for _ in range(maxlen)],
],
dtype=object).T
with open(fpath, 'wb') as f:
pickle.dump(testrows, f)
return testrows
def sample_dynamictable_insertdata():
args = DBQuery(
start=datetime(2000, 1, 1),
end=datetime(2000, 2, 1),
callback=in_timerange,
)
args.check_idx()
db = DBConn(dbpath)
db.cur.execute(
'INSERT OR IGNORE INTO ais_200001_dynamic (mmsi, time, longitude, latitude, cog, sog) VALUES (000000001, 946702800, -60.994833, 47.434647238127695, -1, -1)'
)
db.cur.execute(
'INSERT OR IGNORE INTO ais_200001_dynamic (mmsi, time, longitude, latitude, cog, sog) VALUES (000000001, 946702820, -60.994833, 47.434647238127695, -1, -1)'
)
db.cur.execute(
'INSERT OR IGNORE INTO ais_200001_dynamic (mmsi, time, longitude, latitude, cog, sog) VALUES (000000001, 946702840, -60.994833, 47.434647238127695, -1, -1)'
)
db.conn.commit()
def sample_random_polygon(xscale=20, yscale=20):
......
......@@ -8,17 +8,24 @@ from aisdb.webdata.merge_data import (
merge_tracks_hullgeom,
merge_tracks_shoredist,
)
from aisdb.database.sqlfcn_callbacks import in_bbox_time_validmmsi
from aisdb.database.sqlfcn_callbacks import (
in_bbox_time_validmmsi,
in_timerange,
)
from aisdb.gis import Domain
from tests.create_testing_data import sample_gulfstlawrence_zonegeometry
from tests.create_testing_data import (
sample_dynamictable_insertdata,
sample_gulfstlawrence_zonegeometry,
)
def prepare_qry():
sample_dynamictable_insertdata()
z1 = sample_gulfstlawrence_zonegeometry()
domain = Domain('gulf domain', geoms={'z1': z1}, cache=False)
start = datetime(2021, 11, 1)
end = datetime(2021, 11, 2)
start = datetime(2000, 1, 1)
end = datetime(2000, 2, 1)
rowgen = DBQuery(
start=start,
......@@ -27,7 +34,7 @@ def prepare_qry():
xmax=domain.maxX,
ymin=domain.minY,
ymax=domain.maxY,
callback=in_bbox_time_validmmsi,
callback=in_timerange,
).gen_qry()
return rowgen
......
......@@ -2,8 +2,12 @@ from datetime import datetime, timedelta
from functools import partial
# import cProfile
from aisdb import dbpath
from aisdb.database.dbqry import DBQuery
from aisdb.database.sqlfcn_callbacks import in_bbox_time_validmmsi
from aisdb.database.sqlfcn_callbacks import (
in_bbox_time,
in_bbox_time_validmmsi,
)
from aisdb.gis import Domain
from aisdb.track_gen import (
fence_tracks,
......@@ -11,14 +15,18 @@ from aisdb.track_gen import (
TrackGen,
)
from aisdb.network_graph import serialize_network_edge
from tests.create_testing_data import zonegeoms_or_randompoly
from tests.create_testing_data import (
sample_dynamictable_insertdata,
sample_gulfstlawrence_zonegeometry,
)
def test_network_graph_pipeline():
# query configs
start = datetime(2021, 11, 1)
end = datetime(2021, 12, 1)
zonegeoms = zonegeoms_or_randompoly(randomize=True, count=10)
start = datetime(2000, 1, 1)
end = datetime(2000, 2, 1)
#zonegeoms = zonegeoms_or_randompoly(randomize=True, count=10)
zonegeoms = {'z1': sample_gulfstlawrence_zonegeometry()}
domain = Domain(name='test', geoms=zonegeoms, cache=False)
args = DBQuery(
start=start,
......@@ -27,9 +35,12 @@ def test_network_graph_pipeline():
xmax=domain.maxX,
ymin=domain.minY,
ymax=domain.maxY,
callback=in_bbox_time_validmmsi,
callback=in_bbox_time,
)
args.check_idx()
sample_dynamictable_insertdata()
# processing configs
distsplit = partial(
segment_tracks_encode_greatcircledistance,
......@@ -42,11 +53,11 @@ def test_network_graph_pipeline():
serialized = partial(serialize_network_edge, domain=domain)
# query db for points in domain bounding box
rowgen = args.gen_qry()
try:
_test = next(TrackGen(args.gen_qry()))
_test2 = next(geofenced(distsplit(TrackGen(rowgen))))
_test3 = next(serialized(geofenced(distsplit(TrackGen(rowgen)))))
_test2 = next(geofenced(distsplit(TrackGen(args.gen_qry()))))
_test3 = next(
serialized(geofenced(distsplit(TrackGen(args.gen_qry())))))
except ValueError as err:
print('suppressed error due to DBQuery returning empty rows:'
f'\t{err.with_traceback(None)}')
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment