#!/usr/bin/env python3.8
# encoding: utf-8
from __future__ import print_function, division
__doc__="""
Concatenate two or more GROT GPlates rotation files.
"""
__author__ = "Christian Heine; mailto:chhei@paleoearthlabs.org"
__license__ = """
grotcat.py -- gptools Python scripts for GPlates
Copyright (C) 2020 Christian Heine, PaleoEarthLabs.org
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>.
"""
__version__="2.0"
__copyright__ = "Copyright (c) Christian Heine, 2020."
from gptools import *
import argparse
import sys
from collections import defaultdict
# =========================================================================== #
# --- Set up command line options
# =========================================================================== #
__usage__ = "usage: %prog [options] inputfile"
# parser.add_argument("-f", "--file", dest="filename",
# help="write report to FILE", metavar="FILE")
parser = argparse.ArgumentParser( )
# parser = argparse.ArgumentParser( usage=__usage__, version=__version__ )
parser.add_argument( "-v", "-V", "--verbose", action="store_true", dest="VERBOSE", default=False,
help="make lots of noise [false]" )
parser.add_argument( 'grotFiles', type=argparse.FileType('r'), nargs='+' )
# parser.add_argument( "-f", 'outFile', type=argparse.FileType('w'), )
parser.add_argument( "-of", "--outfile", # action="store_true",
metavar="outFileName", dest="outFileName", default="GROT",
help="Provide the neame of the output file (no extension)" )
args = parser.parse_args()
print(args)
# #--- Output format options
# parser.add_argument( "-o", "--outformat", # action="store_true",
# metavar="outputFormat", dest="outformat", default="GROT",
# help="Chose from the following output format options: GROT [default], ROT [PLATES rotationfile format]" )
#
#
# parser.add_argument( "-c", "--concatenate",
# metavar="ConCat", dest="concatenateGrot", default="ConcatenatedRotationFiles",
# help="""Concatenate mode for GROT files. Requires to provide a list of *.grot files
# which should be combined into a single monolithic *.grot mothership file.
#
# This option requires at least 2 input files. wildcards such as *.grot or *.rot can be used.
# """ )
#--- OUTPUT FILE NAMING
# # TODO write routine for automatic output file name generation in different case
# out = concatenatedBasename + ".grot"
# outfile = open(out, 'w')
# err = concatenatedBasename + "_errors.out"
# errorfile = open( err, 'w' )
# plateacros = concatenatedBasename + "_plateacronyms.out"
# acronymfile = open( plateacros, 'w' )
# entryID = 0
#
# for line in rotLines:
# entryID += 1
# inputRawRotDict[ entryID ] = readROT( line )
def main():
"""Run script on command line """
fileCount = 0
rotData = [] #--- list holding dictionaries of headers of MPRS data of input files
headerData = [] #--- List holding dictionaries of headers of input files
refData = [] #--- list holding dictionaries of ref sections of input files
mPIDlist = [] #--- Global list of all moving plate ids
majorCollisionList = []
#--- Loop over all input files
for grotFile in args.grotFiles:
fileCount += 1
print("------------------------------")
print("-- Reading file", grotFile)
headerDict, movPlateRotSeqDict, referenceDict = grot_reader( grotFile )
#--- Generate a list of all available MPRS plate ids from all files.
mPIDlist.append( [ *movPlateRotSeqDict.keys() ] )
rotData.append( movPlateRotSeqDict )
refData.append( referenceDict )
headerData.append( headerDict )
#--- Basic stats:
print( len(headerData) )
# print( mPIDlist )
#--- Make sure that the MPID are all unique
for rotFile in mPIDlist:
if len(set( rotFile )) != len(rotFile):
print("Mismatch between unique MPIDs and MPIDs in rotation file! ")
print( len(set( rotFile )))
print( len(rotFile) )
#--- Processing rotation data
print( "="*80, '\n' )
print(" Processing rotation data\n")
# mpidSet = set() #--- a set of unique plate ids
mergedMPRSDict = {} #--- Final dictionary of the MPRS merged.
# TODO Use logging module for messaging
#--- Generate set of intersecting mPIDs
intersectionPIDs = set( mPIDlist[0] )
for i in mPIDlist[1:]:
# print(i)
intersectionPIDs.intersection_update( i )
print( "Intersecting MPIDs:\n", intersectionPIDs )
print( len(intersectionPIDs) )
print("-"*80)
#--- Generate a set of mPIDs which are not in either file.
nonIntersectingPlateIDs = set( mPIDlist[0] )
# print( "Non-intersecting MPIDs:", nonIntersectingPlateIDList)
for i in mPIDlist[1:]:
nonIntersectingPlateIDs.symmetric_difference_update( i )
print( "Non-intersecting MPIDs:\n", nonIntersectingPlateIDs )
print( len(nonIntersectingPlateIDs) )
print("-"*80)
#--- Assemble global mprs dictionary with plateids which are unique to the files
#--- This does not require any further selection/testing - for now.
for inputMPRSdict in rotData:
for mPID in inputMPRSdict.keys():
if mPID in nonIntersectingPlateIDs:
print("-non intersecting PID:", mPID )
mergedMPRSDict[ mPID ] = inputMPRSdict[ mPID ]
# print("______"*80)
# print( mergedMPRSDict)
# print( len(mergedMPRSDict) )
print("_"*80)
print("Looping over intersection plateid list")
print("_"*80)
#--- Assemble global mprs dictionary with those plateids which are intersecting
count = 0
for mPID in intersectionPIDs:
print("-(-" * 25)
print(" ==> MPID:", mPID)
# if mPID == 101:
# print("+++++")
# print( inputMPRSdict[ mPID ] )
# print("+++++")
# print("refRotDict")
# print(refRotDict[ mPID ] )
# sys.exit(0)
#--- Set up entry in global dictionary to fill subsequent info
mergedMPRSDict[ mPID ] = {}
# if count == 10:
# break
# count += 1
#--- Retrieve header info from first MPRS dict in collection
# print(" -- header and testing")
refRotDict = rotData[0]
stageRotList = [ rotData[0][ mPID ]['StageRots'] ]
for dictElem in ['MPRS:code','MPRS:name', 'PP', 'C']:
print( refRotDict[mPID][ dictElem ] )
mergedMPRSDict[ mPID ][ dictElem ] = refRotDict[ mPID ][ dictElem ]
#--- Make sure the stage rotation key exists in the merged dict
mergedMPRSDict[ mPID ]['StageRots'] = []
#--- Now loop over remaining ones, testing whether the data from header is the same
#--- as in existing one
# print("testing header data against existing")
for inputMPRSdict in rotData[1:]:
for dictElem in ['MPRS:code','MPRS:name', 'PP', 'C']:
if inputMPRSdict[ mPID ][ dictElem] != mergedMPRSDict[ mPID ][ dictElem ]:
print(" --| no match ")
print(inputMPRSdict[ mPID ][ dictElem], mergedMPRSDict[ mPID ][ dictElem ])
#--- For stage rotations, we add all available ones to a nested list for comparison
# for inputMPRSdict in rotData[1:]:
addStageRotData = inputMPRSdict[ mPID ]['StageRots']
stageRotList.append( addStageRotData )
# print("StageRot list:")
# print(stageRotList)
#--- Compare stage rotations
ages = []
# ages = [ e[0] for rf in stageRotList for e in rf]
#--- Store the index of the element with it for later quick access
print(" --|-------------------------------------------")
print(' ==> Evaluating stage rotation age overlap')
for inputMPRSdict in rotData:
mprsAges = []
ageDict = {}
for stageAge in inputMPRSdict[mPID]['StageRots']:
ageIndex = inputMPRSdict[mPID]['StageRots'].index(stageAge)
ageDict[ stageAge[0] ] = ageIndex
# print("This is at position", ageIndex)
# mprsAges.append( stageAge[0] )
# ages.append( mprsAges )
ages.append( ageDict )
# print('mprsages\n', mprsAges)
print(" ==> Agedict: ", ages)
#--- Generate set of intersection stage rotation ages from dictionaries
#--- Difference/non-intersecting stage rotation ages:
#--- In these cases we need to merge data
intersectingStages = set( ages[0] )
symDiffStages = set( ages[0] )
# print(ages)
for i in ages[1:]:
# print(i)
intersectingStages.intersection_update( set(i) )
symDiffStages.symmetric_difference_update( set(i) )
print(' ==> Intersecting stages: ', intersectingStages )
print(' ==> Sym diff stages: ', symDiffStages )
print(" --|------------------------------------------")
#--- Process intersecting and non-intersecting stages to assemble merged file
for stages in stageRotList:
# print(stages)
for stage in stages:
if stage[0] in symDiffStages:
mergedMPRSDict[ mPID ]['StageRots'].append( stage )
intersectStageList = []
for i in intersectingStages:
# print('Merged dict\n', mergedMPRSDict[ mPID ] )
#--- Test whether the entries are exactly the same
#--- Loop over the second and larger entries to find the equivalent rotations
stageRotGroupIndex = 1 #--- temp solution to just provide index to get list
for stage in stageRotList[1:]:
accessIndex = stageRotList.index( stage )
# print("Age", i)
# print("index of age in agedict", ages[0][ i ] )
# print("stage", stage)
# print("Access index", accessIndex)
firstStageRotData = stageRotList[ 0 ][ ages[0][i] ]
stageRotData = stage[ ages[stageRotGroupIndex][ i ] ]
# print("==>")
# print(firstStageRotData)
# print(stageRotData)
stageRotGroupIndex += 1
#--- TODO assemble list/dictionary with content to chose from
#--- allow interactive pick or override?
#--- First check for stage rotation parameters to fit
if firstStageRotData[0:5] == stageRotData[0:5]:
# print("match")
#--- in this case we simply stick with the first one
#-- Check comments
#--- Dict comparison
if len( firstStageRotData[5] ) + len( stageRotData[5] ) == 0:
# print("no stage rot metadata")
newStageMetaData = {}
else:
newStageMetaData = defaultdict(list)
#--- TODO - remove doubling up using a set
for d in (firstStageRotData[5], stageRotData[5]):
for key, value in d.items():
print(key,value)
try:
newStageMetaData[key].append( *value )
except:
newStageMetaData[key] = value
print(" ==> NEW STAGE METADATA:", newStageMetaData)
# firstDictLen = len( firstStageRotData[6] )
# dictLen = len( stageRotData[6] )
mergedStageData = firstStageRotData[0:5]
mergedStageData.append( newStageMetaData )
# mergedStageData = tmpStageData
print(" ==> Merged stage data: ", mergedStageData)
# mergedMPRSDict[ mPID ]['StageRots'].append( mergedStageData )
intersectStageList.append( mergedStageData )
else:
#--- if the XOVER occurs at split time
try:
#--- If there is a perfect match between the rotation pole data we just merge
if firstStageRotData[5]['XOVER']:
intersectStageList.append( firstStageRotData )
intersectStageList.append( stageRotData )
# print(intersectStageList)
#--- If there is differing precision in the XOVER, we chose the one with higher
#--- precision
# else:
#--- If there is no XOVER and there is just difference in precision we chose the higher pre
#--- to proceed
except KeyError:
# else:
print(" ==> OUCH!! ")
print(" Your moving plate rotation sequences contain non-matching rotation poles!")
print(" You will need to correct this manually before you can merge files.\n Bailing out!\n")
print("XOVER? first stg,", firstStageRotData)
print("XOVER?,stg rot ", stageRotData)
# errorDesc = ""
majorCollisionList.append( [mPID, ages[0], firstStageRotData[0:5], stageRotData[0:5]] )
sys.exit(0)
# print(intersectStageList)
#--- Finally, make sure that we insert the intersecting mprs stages at the right time:
#--- Generate list of all stage rotation ages in mprs
print(" --|-----------------------------------------")
print(" ==> Merging intersecting MPRS ")
# for i in intersectingStages:
# print(i)
# for i in symDiffStages:
# print(i)
symDiffStageAges = [ i[0] for i in mergedMPRSDict[ mPID ]['StageRots']]
symDiffStageAgeIdx = [ mergedMPRSDict[ mPID ]['StageRots'].index(i) for i in mergedMPRSDict[ mPID ]['StageRots']]
# print(symDiffStageAges, symDiffStageAgeIdx)
symDiffStageAgeDict = dict( zip(symDiffStageAges, symDiffStageAgeIdx) )
print(symDiffStageAgeDict)
#--- Find index where to insert intersecting stage rotation ages:
#--- https://stackoverflow.com/questions/40688545/inserting-value-into-a-list-of-int-just-before-the-rightmost-item-greater-than-t
#--- TODO IMplement as function
# rightmost = 0
# print("INSERTING INTERSECTING AGES")
# print(mergedMPRSDict[ mPID ]['StageRots'])
finalStageRotList = []
for intSecAge in intersectStageList:
print("Intersecting entry:", intSecAge)
for i, val in enumerate( mergedMPRSDict[ mPID ]['StageRots'] ):
# print()
print(' ==> Enumerating', i, val[0])
if val[0] > intSecAge[0]:
# print("insert here:", val,i,intSecAge[0])
mergedMPRSDict[ mPID ]['StageRots'].insert(i, intSecAge)
break
# rightmost = i
#======================================================================#
#================== OUTPUT
#======================================================================#
print("="*80)
print("== GENERATING OUTPUT ")
print("="*80)
#--- Generate a new list of PIDs which is sorted by cardinality for the PLateIDs > 100
#--- because the Hotspots/abs ref should always come first
# print(mPIDlist)
# print(len(mPIDlist))
newMovPIDList = [ *mergedMPRSDict ]
print(newMovPIDList)
print(len(newMovPIDList))
absRefPIDs = sorted( i for i in newMovPIDList if i < 100 )
cardinalPIDList0 = [ i for i in newMovPIDList if i >= 100 ]
cardinalPIDList1 = list( map(str, cardinalPIDList0) )
# if VERBOSE:
# print(cardinalPIDList1)
cardinalPIDList1.sort()
newPIDList = absRefPIDs + list( map(int, cardinalPIDList1) )
# if VERBOSE:
print("=" * 80)
print("= PlateID list in shortlex: ")
print(newPIDList)
# theOutFile = "MergedFiles.grot"
outfile = open( args.outFileName + '.grot', 'w')
print( headerData )
for headKey,headValue in headerData[0].items():
print(f'{headKey}"{headValue}"')
outfile.write( f'{headKey}"{headValue}"\n' )
#--- Put a divider in to visually separate header from body data
outfile.write("#" + "-"*79 + "\n")
# for headerData[1]
# f'{PlateID1:>8}{Age:11.5f}{Lat:10.5f}{Lon:11.5f}{Angle:11.5f}{PlateID2:>8}'
# outfile.write( headerData[1] )
OUTFILEFORMAT="GROT"
#--- Write out into new rotation file
for PlateID1 in newPIDList:
print("========================================================")
print( "===> PlateID1: ", PlateID1 )
if PlateID1 == 999:
break
plateIdDict = mergedMPRSDict
stageList = plateIdDict[ PlateID1 ]
print(plateIdDict[ PlateID1 ])
try:
# print("try- mplatecode", mprsDict[ PlateID1 ])
mPlateCode = mergedMPRSDict[ PlateID1 ][ "MPRS:code" ]
except KeyError:
# print("except- mplatecode", plateIdDict[ PlateID1 ][ 0 ][ 5 ])
mPlateCode = mergedMPRSDict[ PlateID1 ][ 0 ][ 5 ][ 2 ]
try:
mPlateName = mergedMPRSDict[ PlateID1 ][ "MPRS:name" ]
except KeyError:
mPlateName = 'FIXME'
#-- FIXME: we're lacking a FPID in here.
try:
fPlateCode = mergedMPRSDict[ PlateID1 ][ "FPID:code" ]
except KeyError:
fPlateCode = mergedMPRSDict[ PlateID1 ]['StageRots'][ 0 ][ 4 ]
movPlateRotSequence = '> @MPRS:pid"{0}" @MPRS:code"{1}" @MPRS:name"{2}"\n> @PP"{3}" @C"Sequence generated by grotcat.py"\n'.format(
PlateID1, mPlateCode, mPlateName, mergedMPRSDict[ PlateID1 ][ "PP" ]) #, datetime.now().isoformat()
outfile.write( movPlateRotSequence )
# print( movPlateRotSequence )
#--- Loop over individual, sorted stage rotations for a single plateID
print(" Loop over individual sorted stage rots ")
for stage in mergedMPRSDict[ PlateID1 ]['StageRots'] : #
# if VERBOSE:
# print( " Stage rotation age:", stage[0] )
# print( " Stage:", stage)
# print( " ", "-"*60)
#--- Stage rotation attributes including comment dictionary
# FIXME: This all needs to go back into a class - example: http://zetcode.com/python/fstring/
Age = stage[ 0 ]
Lat = stage[ 1 ]
Lon = stage[ 2 ]
Angle = stage[ 3 ]
PlateID2 = stage[ 4 ]
stageRotationCommentDict = stage[ 5 ] # dictionary of comment metadata
# rotationSource = stage[ 6 ]
# rotationInactive = stage[ 7 ]
#--- Formatting
# pid1out = str( PlateID1 ).ljust( 8 ) # TODO: adjust for max permissible plateID Length in GPlates
# ageout = str( "%.5f" % Age ).rjust( 10 )
# latout = str( "%.5f" % Lat ).rjust( 9 )
# lonout = str( "%.5f" % Lon ).rjust( 10 )
# angleout = str( "%.5f" % Angle ).rjust( 10 )
# pid2out = str( PlateID2 ).rjust( 8 )
formattedCommentData = format_grot_mprs_comment( stageRotationCommentDict )
formattedStageRotationData = f'{PlateID1:>8}{Age:11.5f}{Lat:10.5f}{Lon:11.5f}{Angle:11.5f}{PlateID2:>8}'
# print( formattedCommentData )
# print( formattedStageRotationData )
#--- Formatting the output
# if rotationInactive: # if true do not write line out - in PaleoGIS they use double negation: Inactive:true
#
# if OUTFILEFORMAT == "GROT":
#
# formattedStageRotation = "#" + " ".join( [formattedStageRotationData, formattedCommentData] ) + "\n"
#
# else: # if false write commented line
# print( " Skipping line" )
# formattedStageRotation = "999 0.0 0.0 0.0 0.0 999 # " + " ".join( [formattedStageRotationData, formattedCommentData] ) + "\n"
#
# else:
# if OUTFILEFORMAT == "GROT":
formattedStageRotation = " ".join( [formattedStageRotationData, formattedCommentData] ) + "\n"
#
# else:
# print(" Using PLATES4 syntax")
# formattedStageRotation = " ! ".join( [formattedStageRotationData, formattedCommentData] ) + "\n"
# if VERBOSE:
# print( formattedStageRotation )
#---
#--- Testing for zero-rotations at time > 0 Ma
if float( Age ) > 0.0 and Lat == 0.0 and Lat == 0.0 and Angle == 0.0:
if VERBOSE:
print("Stage sequence %s | Zero rotation at %s Ma" % (stage, Age) )
zeroRotations.append( "Zero rotation at %.2f Ma | " % float(Age) + formattedStageRotation )
# if VERBOSE:
# print( formattedStageRotation )
#--- Skip those future prediction rotation poles and finally write full
#--- stage pole + comments to file
if not float( Age ) < 0.0 :
outfile.write( formattedStageRotation )
else:
futureRotations.append( "Future rotation skipped | " + formattedStageRotation )
#--- ADD BIBLIOGRAPHY SECTION
for biblioDict in refData:
outfile.write( create_grot_bibinfo_section( biblioDict ) )
outfile.close()
#--- PLATAE ID AND STAGE OVERLAP LOG FILE
#--- this file collects info to debug the merging.
if len(majorCollisionList) > 0 :
print("="*80)
print("MAJOR ISSUES")
with open( args.outFileName + '_issues.txt', 'w') as f:
f.write("# These PIDS encountered collisions during merge in their stage rotation part\n")
f.write("# mPID, \n")
for i in majorCollisionList:
# [mPID, ages[0], firstStageRotData[0:5], stageRotData[0:5]]
f.write( f"{i}\n" )
# print()
# issuefile.close()
print("DONE!")
pass
if __name__ == "__main__":
main()
# EOF