File gptools.py from the latest check-in


#!/usr/bin/env python3.9
from __future__ import print_function, division

__doc__ = """
gptools.py

Functions for reformatting rotation files for GPlates and PaleoGIS.

See the rotconv, rotcat scripts for usage or details. The readme also contains
valuable info.

Required:
    The Python DBF module: https://pypi.org/project/dbf/
"""

# =========================================================================== #
# --- Import modules
# =========================================================================== #

import sys
import os
import os.path
import re
from decimal import *
from collections import OrderedDict
import glob
from pprint import pprint
from datetime import datetime
from dataclasses import dataclass

try:
    import dbf

except:
    print( "Error importing dbf module" )
    print( "Please install this module as it equired for reading dbase rotation file" )
    print( "Try pip install dbf")
    sys.exit(0)


# --- class stagedump
class stagedump(object):
    """Class containing methods for moving plate rotation extraction"""
    def __init__(self, arg):
        super( stagedump, self ).__init__()
        self.arg = arg

class AutoVivification(dict):
    """Implementation of perl's autovivification feature."""
    def __getitem__(self, item):
        try:
            return dict.__getitem__(self, item)
        except KeyError:
            value = self[item] = type(self)()
            return value

#--- Keywords used in GROT syntax, these can be mixed case in the input file
# grotAtributesDict = {
#         "DOI": "Digital object identifier",      # seems reasonably consistently used.
#         "ABSAGE": "absolute age" ,               # sometimes embedded in phrase
#         "AU": "Author",                          # can occur multiple times, but often has & as marker instead. Builds AU database in header.
#         "COMMENT": "Comment",                          # free form comment
#         "REF": "Reference",                      # This is the bibtex equivalent of a citation key
#         "TIME": "Modification timestamp",           # should be either isoD (YYYY-MM-DD) or
#         "CHRONID": "Magnetic polarity chron ID", # Should be in the list of chrons -> also needs to be generated.
#         "XO_YS": "Cross over",                   # I guess there's also a few things which can be done with this.
#         "XO_YF": "Cross over",                   # I guess there's also a few things which can be done with this.
#         "XO_SF": "Cross over",
#         "XOVER": "Cross over",                     # Cross over in mprs
#         "GTS": "Geological Time Scale"
#     }
#TODO: make sure that the names used in the grotAttr dict are consistent - can this be turned into a template which is later filled in the crunching?

majorPlateIDsDict = {
    0:   ["ABS", "Absolute reference"],
    1:   ["AHS", "Atlantic hotspots"],
    2:   ["PPH1", "Present day Pacific hotspots"],
    3:   ["PPH2", "Present day Pacific hotspots"],
    4:   ["TPWC", "True Polar Wander correction"],
    8:   ["RHS", "Reunion hotspots"],
    9:   ["KHS", "Kerguelen hotspots"],
    9:   ["THS", "Tristan hotspot"],
    11:  ["SHH", "Santa Helena hotspot"],
    12:  ["GMH", "Great Meteor hotspot"],
    28:  ["IHS", "Iceland hotspot"],
    101: ["NAM", "North America"],
    201: ["SAM", "South America"],
    301: ["EUR", "Europe"],
    401: ["SIB", "Siberia"],
    501: ["IND", "India"],
    601: ["SCC", "South China Craton"],
    701: ["AFR", "Africa"],
    714: ["NWA", "NW Africa"],
    715: ["NUB", "Nubian Africa"],
    801: ["AUS", "Australia"],
    802: ["ANT", "Antarctica"],
    901: ["PAC", "Pacific"]
}

def makeBibSection( refList, doiList ):
    """makeBibSection generates a simple bibliography section at
    the end of the GROT file in the form of:
    
    # @REF @DOI
    
    """
    # TODO
    # as we process the dois from the project, we can build a ref database in a bib file or bibjson format.
    # this means we have to fuse ref key (@REF) and doi (@DOI)
    # curl -LH "Accept: text/bibliography; style=bibtex" http://dx.doi.org/10.1038/nrd842
    # return bibDict
    pass

def makeAuthorSection( authorList  ):
    """makeAuthorSection generates a simple list of authors which will be
    added in the header of the file.
    """
    # authorBlock = ''
    # for author in authorList:
    #     authorBlock + author
    #
    # return authorBlock
    pass

# class grot( object ):
#     """docstring for grotH"""
#     def __init__(self, arg):
#         super(grotH, self).__init__()
#         self.arg = arg
#   

def grot_reader( reader ):
    """Ingests a GROT file and returns a set of dictionaries with data"""
    # assemble dictionaries for header, MPRS and bibliographies
    #
    # compare dict keys with each other, merge
    #
    # for each plateid compare
    #     mprs header
    #     mpr sequences by line
    #         the key here is age as PID1 is the same
    #         if the same age - are the rotations the same?
    #         if the same age - are the lon lat angle fpid are the same?
    
    line = reader.readline()
    print(line)
    count = 0
    while line and count < 15 :
        count += 1
        # print("Loop 0 - ", line)
        # print("COUNT LOOP 0: ", count)

        headerSection = True
        while headerSection:
            # print("Loop 1 - ", count, line)
            line, headerDict = read_header( reader, line )
            
            # print("Loop 2 after readheader: ", line)
            if line.startswith(">"):
                headerSection = False
                # print("OUT OF HEADER - to next section")
                print("-"* 80)
        
        #--- Read MPRS data and retrieve dictionary
        while line and not line.startswith("# @BIBINFO:references"): #
        
            # print("Loop 2", count, line)
            line, mprsDict = read_mprs(reader, line)
        
        #--- Read bibliographic section 
        # refSection = True
        while line:
            # print("Loop 3", count, line)
            line, referenceDict = read_references(reader, line)
            
            if not line:
                # refSection = False
                break
        
    # print(line)
    print("="*80)
    # print("HEADER\n")
    # print(headerDict)
    # print("="*80)
    # print("MPRS\n")
    # print(mprsDict[701], mprsDict[101], mprsDict[201])
    # print("="*80)
    # print("References\n")
    # print(referenceDict)
    return headerDict, mprsDict, referenceDict
pass

def read_header(reader, line):
    """process the grot header"""

    headerDict = {}
        
    #--- This line is special as the value is wrapped in quotation marks
    # print("Loop 1 in read_header - ", line.strip(), line.startswith("@"))
    headerDict['@GPLATESROTATIONFILE:version'] =  line.strip().strip('@GPLATESROTATIONFILE:version"').rstrip('"')

    #--- Continue reading 
    line = reader.readline()
    
    #-- The first line encountered with a > sign is MPRS
    while line and not line.startswith(">"):
        # headerSection = True
        # print("Loop 1.1 - ", line)

        try:
            k,v = line.strip().split('"', 1)
            print(k,v)
            headerDict[ k ] = v.strip('"').strip()
        except ValueError:
            headerDict[ k ] = "FIXME"
            
        line = reader.readline()
        # print("Loop 1.1 next line:", line)
    return line, headerDict

pass

def read_mprs(reader, line):
    """Reads the MPRS section of the GROT file and returns a nested dictionary.  
    {"MPRS:pid": 2, "MPRS:code": 'bar', 'name': 'PAc hs', age:{plat, plon, pangle, fpid, commnt:{}}}    
    > @MPRS:pid 2 @MPRS:code PHS @MPRS:name Pacific hotspot
    > @PP PHS-PAC @C Sequence generated by rotconv.py
       2   0.00000 90.00000   0.00000   0.00000     901 @C Pacific hotspot-Pacific
       2   0.78000 49.30000 -49.50000  -1.02000     901 @C PHS-PAC @REF Wessel_++_2008 @ABSAGE       
    represented as globaldict{ mpid-int: {mprs.pp mprs.name mprs.comment, age:{[polelon, polelat, poleangle, fpid, comment.c, comment.abs, comment.*]} }
    """
    #--- Global dictionary with all MPRS data
    mprsDict = {} # FIXME - try and turn this in to defaultdict.
    print("Processing MPRS section")
    
    while line and not line.startswith("# @BIBINFO:references"): #and mprsSection: #not line.startswith("@REF"): # and  # 
        
        mprsDict, line = read_mprsData( reader, line, mprsDict )
        # print("Loop 2.2 -- MPRS:", line[:50] )
    
    return line, mprsDict
    pass

def read_mprsData( reader, line, mprsDict ):
    """Processes moving plate rotation sequence header info"""
    # FIXME This needs to be merged with the rotation sequence reader as it is really the same code.
    grotTags = [
        "C", # one string can be repeated multiple times
        "AU", # List of authors, can contain multiple authors
        "D", # Timestamp - date
        "TIMESTAMP",
        "CHRONID", # should contain magn chron desighnation
        "XOVER", # True if there
        "REF", # reference key
        "DOI", # DOI key with one or more entries, separated by '|'
        "FITRECON", # TRUE if there
        "ABSAGE", # True if there
        "GTS"
    ]
    
    # print("CURRENT LINE:", line[:50])
    while line and not line.startswith("# @BIBINFO:references"): # and not re.match("#-+|#\s-+", line):
        # print("Loop 2.3 -- MPRS:", line[:50] )
        # print("Loop 2.3 -- MPRS:", line[:100] )

        mprsHead = False
        headerLineCount = 0
        while line.startswith("> "):
            headerLineCount += 1
            if headerLineCount == 1:
                mprsBlock = {}
                commentDict = {}
                
            # print("  -- MPRS Header")
            mprsHead = True
            headerInfoList = line.strip(">").split("@")[1:]
            # print(headerInfoList)
            headerInfoList = [x.strip() for x in headerInfoList]
            # print(headerInfoList)
            for elem in headerInfoList:
                # print(elem)
                k,v = elem.split('"',1)
                mprsBlock[k] = v.strip('"')
            
            line = reader.readline()
            # print("  -- next line:", line[:50])

        if mprsHead:
            headerMovPID = int( mprsBlock.pop('MPRS:pid') )
            mprsDict[ headerMovPID ] = mprsBlock
            mprsDict[ headerMovPID ]['StageRots'] = []

        # print("  Loop 2.1.2:", line[:50])
        
        # FIXME: Turn commented lines into regular expression and make sure these get fed to disabled MPRS.
        # if line.startswith("#") or line.startswith("999 "):
        if re.match(r'^#\s*[0-9]+|999\b', line ) or re.match(r'^#-+|^#\s*-+', line ):
            print("       COMMENT OR DISABLED SEQUENCE")
            pass
        #     rotationInactive = True
        # else:
        #     rotationInactive = False

        # if re.match(r'^#-+|^#\s*-+', line ):
        #     print("Divider comment, skipping")
        #     pass
            
        else:
            #--- Add stages to MPR sequence and into global dictionary
            #--- This assumes that the input is properly formatted GROT syntax
            if '@' in line:
                
                rotData = line.strip().split('@')
                print(" --       CL:" , rotData)
                stageMovPID, age, poleLat, poleLon, poleAngle, fPID = rotation_assign_types( *rotData[0].split() )
                commentData = [x.strip() for x in rotData[1:]]
                #--- Reset the comment dictionary so that each line gets an empty dict.
                commentDict = {}
                for commentElem in commentData:
                    try:
                        k,v = commentElem.split('"')[:-1]
                        commentDict[ k ] = [v]
                    except ValueError:
                        #--- When encountering single word decorators e.g. 'ABSAGE'
                        #--- the assessment breaks and we need to create
                        # print("Keyword metadata")
                        commentDict[ commentElem ] = True
            else:
                stageMovPID, age, poleLat, poleLon, poleAngle, fPID = rotation_assign_types( *line.split() )
            
            #--- Test whether mPIDs match
            if headerMovPID != stageMovPID:
                print("Aborting moving Plate IDs don't match in MPRS")
                # print(headerMovPID, stageMovPID)
                sys.exit()
                        
            mprsDict[stageMovPID]['StageRots'].append( [age, poleLat, poleLon, poleAngle, fPID, commentDict ] )  # rotationInactive
            # print(' --  Comment:', comment)
            # print(' --  Rot seq:', stageMovPID, age, poleLat, poleLon, poleAngle, fPID )

        line = reader.readline()
        # print("  Loop 2.1.2 next line:", line[:50])
        
    return mprsDict, line
    pass

def read_references(reader, line):
    """Reads the Reference section of a GROT file and returns a dictionary"""
    print("-"*80)
    print("Processing reference section")
    
    referenceDict = {}
    
    while line:
        # print("Loop 3.1")
        # print("Ref data -- ", line)
        if line.startswith("#---") or line.startswith("# --") or line.startswith("# @REF"):
            line = reader.readline()
        try:
            refData = line.strip("# @REF").split("@DOI")
            refKey, doi = [x.strip(' ').strip() for x in refData]
            referenceDict[ refKey ] = doi
        except:
            print("-!! Problem reading reference data:")
            print("   ", line)
        
        line = reader.readline()
    return line, referenceDict
    pass

# @dataclass
# class Stagerot:
#     """docstring for Stagerot. Currently copied from RotOuput class"""
#     mpid: int
#     fpid: int
#     age: float
#     lat: float
#     lon: float
#     angle: float
#     comment: str = 'FIXME'
#     author: str = 'FIXME'
#     timestamp: str = 'FIXME'
#     ref: str = ''
#     doi: str = ''
#     src: str = ''
#     active: str = ''
#     absage: str = ''
#     crossover: str = ''
#     magchron: str = ''
#     fitflag: str = ''
#     gts: str = ''

# class RotOutput( object ):
#     """A new output class for the rotation file containing 7 different
#     parameters sourced from the original input rotation file"""
#
#     def __init__( self, pid1, age, lat, lon, angle, pid2, comment, author, \
#         timestamp, reference, doi, src, active, absage, crossover, magchron, fitflag, gts):
#         # print("COMDIC", cmtDict)
#         # super( RotOutput, self ).__init__()
#         # self.OUTFILEFORMAT = OUTFILEFORMAT # FIXME
#         self.pid1 = pid1
#         self.pid2 = pid2
#         self.age = age
#         self.lat = lat
#         self.lon = lon
#         self.angle = angle
#         self.comment = comment
#         self.author = author
#         self.timestamp = timestamp
#         self.ref = reference
#         self.doi = doi
#         self.src = src
#         self.active = active
#         self.absage = absage
#         self.crossover = crossover
#         self.magchron = magchron
#         self.fitflag = fitflag
#         self.gts = gts
#
#     def __str__( self ):
#         """Function to format rotation data and comments into coherent line output
#         Formatting of rotation file uses the following standard sequence
#
#         PID1   Age         Lat       Lon       Angle     PID2   ! Comment
#         999999 -0000.0000  -00.0000  -000.0000 -000.0000 999999 !
#
#         12345678|0123456789|012345678901234567890123456789012345
#                 |          |
#         The format allows for 6-digit PIDs, negative rot angles with up to 4
#         decimal digits."""
#
#         pid1out  = str( self.pid1           ).ljust( 8 )     # TODO: adjust for max permissible plateID Length in GPlates
#         ageout   = str( "%.5f" % self.age   ).rjust( 10 )
#         latout   = str( "%.5f" % self.lat   ).rjust( 9 )
#         lonout   = str( "%.5f" % self.lon   ).rjust( 10 )
#         angleout = str( "%.5f" % self.angle ).rjust( 10 )
#         pid2out  = str( self.pid2           ).rjust( 8 )
#
#         out = [ pid1out, ageout, latout, lonout, angleout, pid2out ] # TODO: change to format syntax
#
#         print(out)
#
#         if self.ref:
#             out.append( '@REF %s' % self.ref )
#         if self.doi:
#             out.append( '@DOI %s' % self.doi )
#         if self.comment:
#             out.append( '@C %s' % self.comment )
#         if self.author:
#             out.append( '@AU %s' % self.author )
#         if self.timestamp:
#             out.append( '@DATE %s' % self.timestamp )
#         if self.src and not self.src == '':
#             out.append('@C:model %s ' % self.src )
#         if self.absage:
#             out.append( '@ABSAGE' )
#         if self.crossover:
#             out.append('@XOVER')
#         if self.fitflag:
#             out.append('@FITRECON')
#         if self.magchron:
#             out.append('@CHRONID %s' % self.magchron)
#         if self.gts:
#             out.append('@GTS %s' % self.gts)

            # commentOut = ( "! %s | %s | %s " % (self.comment, self.author, self.modDate, self.ref) )
            # commentOut = ( "! %s | %s | %s " % (self.comment, self.author, self.modDate, self.ref) )

        # print(out)
        # return out
        # activeout = self.active
            
    # def format(oFF, out):
    #     """Cast rotation line data into format-specific string"""
    #
    #     if oFF == "GROT":
    #         if not active: # if true do not write line out
    #         #--- in PaleoGIS they use double negation: Inactive:true
    #             outLine =  " ".join( out ) + "\n"
    #
    #         else: # if false write commented line
    #             print( " Skipping line" )
    #             outLine = "999 0.0 0.0 0.0 0.0 999 # " + " ".join( out ) + "\n"
    #             # return "# " + " ".join( out ) + "\n"
    #     else:
    #
    #         print("  Converting to PLATES4 syntax")
    #         outLine =  " ".join(out[:6]) + " ! " + " ".join( out[6:] ) + "\n"
    #
        # return outLine


# def writePlateAcronymFile( plateDict, plateAcroFile ):
#     """
#     Report a mapping of plate acronyms, plateIDs and plate names with old and young ages as csv.
#     """
#     # TODO use tables for this representation
#     # plateAcroFile = open( plateAcroFile ,'w' )
#     plateAcroFile.write("# Dictionary of PlateIDs, acronyms and names\n")
#     plateAcroFile.write("# Generated by rotconv.py - %s\n" % str( datetime.now().isoformat() ) )
#     plateAcroFile.write("# PlateID | Acronym | Name\n")
#     plateAcroFile.write("#---------+---------+---------------------------------------------------\n")
#     for plate in plateDict:
#         # print( plateDict[plate] )
#         plateAcroFile.write("{:<10}| {:<8}| {:<50}\n".format( plate, plateDict[ plate ]['MPRS:code'], plateDict[ plate ]['MPRS:name'] ) )
#     # plateAcroFile.close()
#     pass

def generatePlateIdDict( rotData, uPIDs, mPIDD, VERBOSE):
    """Method to generate a dictionary of plateIDs. Heavy lifting.
    Returns the plateID dictionary which contains all processed rotation data from
    the input file as well as a moving plate rot sequence dictionary sorted
    by moving plate id
    """
    
    # pidDict = AutoVivification()
    pidDict = {}
    pidMprsDict = {} # PlateID1 based dictionary for moving plate rot sequence
                     # header metadata.
    
    #--- Set up a few  dictionaries for statistics:
    plateAcronymDict = {} # dictionary for plate acronym mappings
                          # capture acronym, plateID, plate name with PID as key
    refDict = {}
    
    # reconAgesSet = set() #--- Set of all reconstruction ages for metadata but also checks.
    
    # plateAcronyms = set()
    # fixPlateAcronyms = set()
    # movPlateNameKeys = set()
    # movPAL = []
    # commentGrotList = []
    
    print( "\n--- Assembling rotation dictionary " )
    
    if VERBOSE:
        print("\n --- List of plate ids in this rotation file: ")
        print("="*80)
        print()
        print(rotData)
        print()
        # print( "="*80 )
    
    prevPlateID1 = None
    theLine = 0

    #--- Process each line of the rotation file
    for rotLine in rotData:
        
        print("-"*78)
        theLine += 1
        
        #--- These are the mandatory arguments which should be present
        #--- for each stage.
        try:
            
            PlateID1 = rotData[ rotLine ][ 0 ]
            Age = rotData[ rotLine ][ 1 ]
            Lat = rotData[ rotLine ][ 2 ]
            Lon = rotData[ rotLine ][ 3 ]
            Angle = rotData[ rotLine ][ 4 ]
            PlateID2 = rotData[ rotLine ][ 5 ]
            
        except ValueError:
            print("\n Your rotation file doesn't seem to have the right syntax" )
            print(" I'm missing crucial data here. Please check your input data." )
            print( " Bailing out..." )
        
        if VERBOSE:
            print("> RECORD ", theLine )
            print("> PLATEID1:", PlateID1, "PLATEID2:", PlateID2)
            print("> RotLine ",  rotData[ rotLine ] )

        #--- Process the comment part of the rotation sequences
        try:
            commentDict = crunchComment( rotData[ rotLine ][ 6 ], Age, VERBOSE )
            
            #--- Assemble bib dict:
            if VERBOSE:
            #    print("CommentDict: ", commentDict)
               # print(commentDict["REF"])
               print("COMMENT DICT RETURNED: ", commentDict)
               # # print("Processed comments:", LineCommentList )
        
        except KeyError:
           # TODO Cleanup this as it is currently duplicated. Make sure to only write out the data we have and
           #      the fixme stuff is done in the postprocessing using missing elements.
            # [ commentAutor, commentDateTime, movPlateAcronym, fixedPlate, commentString ]
            # LineCommentList = [ 'Converted by rotconv.py', '', '', '',  'Not commented']
            print("KEYERROR", rotData[ rotLine ])
            plateName = 'FIXME'

        #----------------------------------------------------------------------
        #--- Generate bibliography data structure
        if "REF" in commentDict:
           # print("creating bib entry")
             # and "DOI" in commentDict:
           refKey = commentDict["REF"][0]
           
           try:
              refDict[ refKey ] = commentDict['DOI'][0]
           except:
              refDict[ refKey ] = 'FIXME'
        else:

            print("> Did not find any bibliographic information to process" )
            # refDict[ commentDict["REF"] ] = commentDict['DOI'][0]
        

        if Age == 0 or PlateID1 not in pidMprsDict: # or PlateID1 not in pidMprsDict:
            
            try:
                commentDict['MPRS:code']
            except KeyError:
                commentDict['MPRS:code'] = 'FIXME'
            
            if commentDict['MPRS:code'] == 'FIXME' and PlateID1 in mPIDD.keys():
                print("replacing FIXME with", mPIDD[ PlateID1 ][0])
                commentDict['MPRS:code'] = mPIDD[ PlateID1 ][0]
            
            try:
                commentDict['MPRS:name']
            except KeyError:
                commentDict['MPRS:name'] = 'FIXME'
            
            if commentDict['MPRS:name'] == 'FIXME' and PlateID1 in mPIDD.keys():
                print("replacing FIXME with", mPIDD[ PlateID1 ][1])
                movPlateName = mPIDD[ PlateID1 ][1]
            
            try:
                commentDict['FPID:code']
            except KeyError:
                commentDict['FPID:code'] = 'FIXME'
            
            if commentDict['FPID:code'] == 'FIXME' and PlateID2 in mPIDD.keys():
                print("replacing FIXME with", mPIDD[ PlateID2 ][0])
                commentDict['FPID:code'] = mPIDD[ PlateID2 ][0]
            
            #--- Add info to moving plate dictionary
            pidMprsDict[ PlateID1 ] = {}
            pidMprsDict[ PlateID1 ][ "MPRS:code" ] = commentDict['MPRS:code']
            pidMprsDict[ PlateID1 ][ "FPID:code" ]  =  commentDict['FPID:code']
            pidMprsDict[ PlateID1 ][ "MPRS:name" ] = commentDict['MPRS:name']
            # print("\n","+"*80)
            # print("   PID--- MPRS dict", pidMprsDict[ PlateID1 ])
            # print("\n","+"*80)
        
        try:
            rotationSource = rotData[ rotLine ][ 7 ]
        except:
            rotationSource = PLATEMODEL # if line is empty, take command line input
        
        try:
            rotationInactive = rotData[ rotLine ][ 8 ]
        except:
            rotationInactive = True
               
        if prevPlateID1 is None or PlateID1 not in pidDict.keys():
            
            print("PLATEID IS NONE or not yet existing")
            
            pidDict[ PlateID1 ] = []
            pidDict[ PlateID1 ].append( [ Age, Lat, Lon, Angle, PlateID2, commentDict, rotationSource, rotationInactive ] )
            # reconAgesSet.add( Age )
        
        else:
            
            pidDict[ PlateID1 ].append( [ Age, Lat, Lon, Angle, PlateID2, commentDict, rotationSource, rotationInactive ] )
            # reconAgesSet.add( Age )
        
        # if VERBOSE:
            # print "-"*60
            # print " Line:", rotLine, rotData[ rotLine ]
        prevPlateID1 = PlateID1
    
    # print(movPlateAcronyms)
    # print(fixPlateAcronyms)
    # print( "mov plates intersecting fix ones", movPlateAcronyms.intersection(fixPlateAcronyms) )
    # print("=========")
    # print( "mov plates diff fix ones", movPlateAcronyms.difference(fixPlateAcronyms) )
    # print("=========")
    # print(movPlateNameKeys)
    # print("\n+++++++++++++++++++++++++++")
    # print(commentGrotList)
    # print("\n"*5)
    # print(pidDict, pidMprs
    # print(pidDict[ PlateID1 ])
    
    #--- Check whether there is a rotation at zero ma.
    if not pidDict[ PlateID1 ][ 0 ][ 0 ] == 0:
        print("NO ROTATION AT 0 MA! - INSERTING: This is the first entry:", pidDict[ PlateID1 ][ 0 ])
        pidDict[ PlateID1 ].insert(0, [Decimal('0.0')] + pidDict[ PlateID1 ][ 0 ][1:])
        print(pidDict[ PlateID1 ])
        print("^^^^^^^^^________________")
        # print(pidDict[ PlateID1  ])
        # sys.exit()
    print(refDict)
    return pidDict, pidMprsDict, refDict  #, reconAgesSet
    pass

          #--- Issue here is that in PaleoGIS there might be multiple stageList with the same age but
          #--- inactive/active differences. This cannot be accounted for in the current
          #--- way of processing as stageList are used as uinique IDs.
#         #--- Make a new list to hold the new data
#         xoverStagelist = []
          #--- Go through the mprs and make sure that the individual rotations are sorted by age
          #--- and then plateid. We start with a fixed plateid at 0 or whatever the youngest
          #--- rotation sequence is. As we move to older stage rotations we check if the plate id
          #--- changes and then.
#         stageData = stageList.pop( 0 )
#         fPid = stageList[0][4]
#         stageAge = stageList[0][0]
#         print( 'FIXED:', fPid, stageAge )
#         xoverStagelist.append( stageData )
#
#         while stageData:
#             stageData = stageList.pop( 0 )
#             print( 'FIXED:', fPid)
#
#             if fPid == stageData[0][4]:
#                 xoverStagelist.append( stageData )
#             else:
#
#                 read stage - pid and age
#                 read next - pid diff age diff

#                 the current
#
#             try:
#                 nextStageData = stageList.pop( 0 )
#
#             if stageList[0][4] != fPID:
#
#                 if plateid is different to previous one, we likely have a crossover. then we need to check if the next rotation has the same age
#                 and plateID to the current one
#
#             else:
#                 xoverStagelist.append( stage )
#
#         # for stage in stageList:
#         #     fPID =
#         #     print(stage)
#
        # print( "Previous:", prevPlateID1, "Current:", PlateID1 )
        # print(commentDict)
        #
        # [ commentAutor, commentDateTime, movPlateAcronym, fixedPlate, commentString ]
        # commentDictValues = [  commentDict['AU'], commentDict['TIME'], commentDict['MPRS:code'], commentDict['FPID:code'], commentDict['COMMENT'] ]
 
# class rotationProc(object):
#     """ class for processing of rotation data"""
    # def __init__(self, arg):
    #     super(rotationProc, self).__init__()
    #     self.arg = arg
#  
# mprs.head mprs.stages
# line.plat, line.plon line.comment.

def rotation_assign_types( strPID1, strAge, strPlat, strPlon, strAngle, strPID2 ):
    """Reads strings from stage rotation data and returns the correct object types (e.g. float, int)"""
    
    decAge = Decimal( strAge )
    intPID1 = int( strPID1 )
    intPID2 = int( strPID2 )
    decPlat = Decimal( strPlat )
    decPlon = Decimal( strPlon )
    decAngle = Decimal( strAngle )
    
    return intPID1, decAge, decPlat, decPlon, decAngle, intPID2
    pass

def readROT( line ):
    """Parses input line from rotation file and splits it into smaller
    chunks for further processing, returning a list"""
    
    #-- Split lines in smaller logical chunks of rotation data and comments
    #-- utilising the exclamation mark.
    
    if line.startswith("#") or line.startswith("999 "):
        rotationInactive = True
    else:
        rotationInactive = False
    
    #--- This splits the line into the required 6 fields,
    #--- comments (whether separated by ! or not) are taken as one
    
    ( PlateID1_str, Age_str, Lat_str, Lon_str, Angle_str, PlateID2_str, LineComment ) = line.strip().split( None, 6 )
    
    PlateID1, Age, Lat, Lon, Angle, PlateID2 = rotation_assign_types( PlateID1_str, Age_str, Lat_str, Lon_str, Angle_str, PlateID2_str )
    
    #-- Try to get rid of multiple exclamation marks in front of comment section.
    #-- Also, PaleoGIS export has extra columns for 'Inactive' and 'source' appended to the comment.
    
    while LineComment.startswith( "!" ):
       
       try:
           LineComment = re.sub(r'^[\s|!]+', '', LineComment).replace("\r", '')
       
       except UnicodeDecodeError:
           LineComment = "%s " % UnicodeDecodeError
           ErrorFile.write( "Cannot read comment\n" )
        
    #--- Faking these parameters:
    #--- TODO: think about the inputfile parameter, for now we set this to an empty string.
    # rotationSource = "InputFile"
    rotationSource = ""
    
    return PlateID1, Age, Lat, Lon, Angle, PlateID2, LineComment, rotationSource, rotationInactive
    
    # #--- Write out head info without further processing
    # if PlateID1 == 999 and PlateID2 == 999:
        # print "Encountered header info: [%s, %s] %s" % (PlateID1, PlateID2, LineComment)
        # outfile.write(ReformattedLine)
        # Header = True
        # continue
    
    pass

#--- Read paleogis dbf file

def readDBF( dbfFieldNameList, dbfRecord, ErrorFile ):
    """Reads records from the paleogis dbf rotation file, using the DBF module
    and returns a formatted list back. In PaleoGIS a rotation sequence is
    active if the flag is "0". Uses the field name list to infer the right
    columns. As inactive and active rotations are mixed, there can be multiple
    rotation poles for the same age.
    """
    getcontext().prec = 6
    
    #--- Read information
    PlateID1 = int( dbfRecord[ dbfFieldNameList.index('plateid') ] )
    Age = Decimal( dbfRecord[ dbfFieldNameList.index('age') ] )
    Lat =  Decimal( dbfRecord[ dbfFieldNameList.index('lat') ] )
    Lon = Decimal( dbfRecord[ dbfFieldNameList.index('lon') ] )
    Angle = Decimal( dbfRecord[ dbfFieldNameList.index('angle') ] )
    
    try:
        PlateID2 = int( dbfRecord[ dbfFieldNameList.index('ref_plate') ] )
    except ValueError:
        PlateID2 = '0'
        ErrorFile.write( "%s - Missing PLATEID2, replaced with 0\n" % PlateID1 )
    
    try:
        LineComment = str( dbfRecord[ dbfFieldNameList.index('comment') ] )
        LineComment = re.sub(r'^[\s|!]+', '', LineComment)
    except UnicodeDecodeError:
        LineComment = "%s " % UnicodeDecodeError
        ErrorFile.write( "Cannot read comment\n" )
    
    #--- OPTIONAL FIELDS FOLLOW
    #--- The problem here is the non-standard nature of the PaleoGIS files.
    #--- convert the on/off switch from PaleoGIS. Here, 1 is inactive (True), 0 is active (False).
    #--- in Python True = 1 , False = 0
    
    try:
        rotationInactive = dbfRecord[ dbfFieldNameList.index('inactive') ]
        
        # if rotationInactive == True:
        #     print( "flipping inactive flag" )
            # rotationInactive = False
        #     rotationInactive = True
        #
        # else:
        #     # print( "Error reading inactive/active statement" )
            # rotationInactive = True
    
    except:
        print( "Cannot read rotation inactive" )
        rotationInactive = False
    
    try:
        rotationSource = str( dbfRecord[ dbfFieldNameList.index('reference') ] ).strip()
    
    except: # ValueError
        rotationSource = str( dbfRecord[ dbfFieldNameList.index('source') ] ).strip()
            
    return PlateID1, Age, Lat, Lon, Angle, PlateID2, LineComment, rotationSource, rotationInactive

def writeDBF( dbfFieldNameList, dbfRecord, ErrorFile ):
    
    """Write out a DBF file from existing rot or grot file.
    commented sequences will be disabled using the 'inactive' column
    
    ID plateid age lat lon angle inactive ref_plate comment
    
    This doesn't work as one cannot write a ID field when importing into arc.
    xso maybe use arcPy.
    """
    getcontext().prec = 6
    
    # Create a test table
    # table = dbf.Table('temptable', 'name C(30); age N(3,0); birth D')
        # for datum in (
        #         ('John Doe', 31, dbf.Date(1979, 9,13)),
        #         ('Ethan Furman', 102, dbf.Date(1909, 4, 1)),
        #         ('Jane Smith', 57, dbf.Date(1954, 7, 2)),
        #         ('John Adams', 44, dbf.Date(1967, 1, 9)),
        #         ):
        #     table.append(datum)
        # Field Types  -->  Python data types
  # Dbf
  #   Character       unicode
  #   Date            datetime.date
  #   Logical         boolean
  #   Memo            unicode (same as character)
  #   Numeric         if N(x, 0) int; if N(x, 1+) float
    # ID plateid age lat lon angle inactive ref_plate comment
    pass

# class commentProcessing(object):
#     """class holding subroutines for comment processing of rotation files"""
    # def __init__(self, arg):
        # super(commentProcessing, self).__init__()
        # self.arg = arg

def crunchComment( comment, rotage, VERBOSE ):
    """processes the comment data, return reformatted comment as list"""
    
    print("> Processing comment data:\n  ",  comment )

    #--- First, remove decoration from leading and trailing ends of comment string
    # comment = commentString.lstrip( "!" ).strip().replace("\r", '')
    # comment = re.sub(r'^[\s|!]+', '', comment).replace("\r", '')
    # comment = comment.strip("!")
    
    try:
        comment.replace("\t", "\s")
    except:
        pass
    
    # print("raw string: ", repr( comment ) )
    dictOfGrotAttribs = {}
    
    #--- Useful regexp's which will be utilised later
    #--- These are largely for GROT syntax metadata.
    auRegExp = re.compile(r'AU\b|AU\s', flags=re.I)
    cRegexp = re.compile(r'C\b', flags=re.I)
    chronRegexp = re.compile(r'CHRONID\b', flags=re.I)
    timestpRegexp = re.compile(r"T\b|DATE\b|TIME\b|TIMESTAMP\b", flags=re.I)
    gtsRegExp = re.compile(r'GTS\b|GTS\s', flags=re.I)
    # refRegexp = re.compile(r'REF\b', flags=re.I)
    # refRegexp = re.compile(r'REF\s\w+_.+_[0-9]{2,}', flags=re.I)
    
    #--- Simply search for a year and take the first occurrence
    # refRegexp = re.compile(r'[1-9][0-9]{1,}?(\b|;|,)?|\(((in\sreview)|(in\spress)|(inPrep)|(subm.+))\)(\b|,|;)', flags=re.I)
    
    # refNoYearRegexp = re.compile(r'', flags=re.I) #--- Simply search for a year and take the first occurrence
    #--- needs to take care of one refstring foo_bar_2023, foo_++_2024, foo_2023, foo bar_2023.
    #-- \w+\s(et)\s(al.)\s.+\(?[0-9]{2,}\)?|\w+\s&\s\w+.*\(?[0-9]{2,}\)?
    #-- "\w+_(.+_)?[0-9|\w]+?\b
    #--- "\w+_(.+_)?([0-9|\w+]+?)|(\(\w+)\b
    doiRegexp =       re.compile( r'10.\d{4,9}/[-._;()/:A-Z0-9]+' , flags = re.I )
    doiTag = re.compile( r'DOI*', flags = re.I )
    citeRegexp = re.compile( r'\w+\s(et)\s(al.)\s.+\(?[0-9]{2,}\)?|\w+\s&\s\w+.*\(?[0-9]{2,}\)?', flags = re.I )
    
    #--- Main comment processing follows. We assume three cases:
    #--- 1. no comment or just one char - this is close to useless so we substitute
    #---    everything with default fixmes
    #--- 2. If an '@' character is found, we assume some sanity and process
    #---    this for GROT syntax. In the leftover strings we will check for
    #---    basic patterns using regex
    #--- 3. PaleoGIS adds certain string to comments which also allows for
    #---    organised string splitting.
    #---
    #--- The leftover string for those cases will then be processed using
    #--- 'try'/'except' clauses. If no match is found, default 'fixme' will be
    #--- substituted
    
    #--- Next, if the comment is less than 1 or two chars, it means that it is
    #--- useless. We populate with defaults at the end of the routine
    
    # if comment == '' or re.match('^[A-Z]$', comment ,flags=re.I):
    #     print("Short comment - all params set to FIXME")
    
    #--- In all other cases we believe in the sanity of the authors.
    #--- First, scan for GROT syntax in comment string and extract those from
    #--- the string. In the case we find an '@' we assume that there's at least
    #--- a few attribute:value pairs in there.
    
    if "@" in comment:
        
        commentList = comment.split('@')
        print("> GROT processing:",  commentList )
        
        #--- Loop over the list of comments, split by '@'
        while commentList:
            
            #--- Now, evaluate the rawComment from the list against the grot Attributes from
            #--- the dictionary.
            comment = commentList.pop( ).strip()
            
            if VERBOSE:
                print( "   > Current comment data: ", comment )
            
            if comment.startswith( "REF" ):
                print("    - REF: ", comment)

                #--- We brutally assume that the first occurence of a year-like string in the sequence
                #--- terminates the refkey
                comment = comment.lstrip("REF").strip().strip('"').strip()
                print("    stripped: ", comment)
                # try:
                    # refList = re.search( refRegexp, comment )
                refList = comment.split(";")
                    # ref = comment[ :refPos.end() ]
                    # comment = comment[ refPos.end(): ].strip()
                # except:
                    # ref = comment

                # print(ref, comment)
                for theRef in refList:
                    try:
                        dictOfGrotAttribs['REF'] = [ theRef ]
                    except:
                        dictOfGrotAttribs['REF'].append( theRef )
                
                continue

            #--- If string starts with DOI tag in GROT case
            elif comment.startswith( "DOI" ):
                # if VERBOSE:
                print("     - DOI string detected")
                #     continue
                #--- Match strict DOI the hard way - no additions
                doiList = re.findall( doiRegexp, comment )
                try:
                    dictOfGrotAttribs['DOI'] = [ doiList[0] ]
                except:
                    for aDoi in doiList:
                        dictOfGrotAttribs['DOI'].append( aDoi )
                continue

            # else:
            #     splitPos = re.match( doiSloppyRegexp, comment ).end()
            #     rawDOI, rawExtra = comment[:splitPos], comment[splitPos:]
            #     if VERBOSE:
            #         print("   > Matching DOI plus something:", rawDOI, rawExtra)
            #     if 'DOI' in dictOfGrotAttribs:
            #         dictOfGrotAttribs['DOI'].append( rawDOI.lstrip('DOI"').strip('"')  )
            #     else:
            #         dictOfGrotAttribs['DOI'] = [  rawDOI.lstrip('DOI"').strip('"') ]
            # continue

            elif re.match( auRegExp, comment ):
                # if VERBOSE:
                # print(re.split( auRegExp, comment )[1].strip('"').strip(''))
                try:
                    dictOfGrotAttribs['AU'] = re.split( auRegExp, comment )[1].strip().strip('"').strip(',').strip().split('&')
                except:
                    dictOfGrotAttribs['AU'] = [ re.split( auRegExp, comment )[1].strip('"').strip(',').strip().split() ]

                # FIXME: get rid of extra spaces
                # [x.strip(' ') for x in dictOfGrotAttribs['AU']]
                continue
            
            elif re.match( cRegexp, comment ):
                # print("COMMENT -", theComment)
                dictOfGrotAttribs['COMMENT'] = [ comment.lstrip('C"').rstrip('"').strip() ]
                continue
            
            elif re.match( chronRegexp, comment ):
                print("    - CHRON:", re.split( chronRegexp, comment )[1].strip('"').strip() )
                dictOfGrotAttribs['CHRONID'] = re.split( chronRegexp, comment )[1].strip('"').strip()
                continue
            
            elif re.match( timestpRegexp, comment ):
                print("    - TIME --", comment, re.split( timestpRegexp, comment ) )
                dictOfGrotAttribs["TIMESTAMP"] = re.split( timestpRegexp, comment )[1].strip().strip('"')
                # print(dictOfGrotAttribs["TIMESTAMP"])
                continue
            
            elif re.match(r"XOVER\b|XO|XO_[A-Z]+\b", comment, flags=re.I):
                
                #--- pyGPlates crossover sync produces a few options - 
                #--- https://www.gplates.org/docs/pygplates/generated/pygplates.synchronise_crossovers.html?highlight=crossover#pygplates.synchronise_crossovers
                #--- @xo_ig : CrossoverType.ignore
                #--- @xo_ys : CrossoverType.synch_old_crossover_and_stages   -- All finite rotations in the old crossover sequence 
                #---                                                            will be synchronised (such that old stage rotations 
                #---                                                            are preserved). All finite rotations in the young 
                #---                                                            crossover sequence are preserved.
                #--- @xo_yf : CrossoverType.synch_old_crossover_only         -- Only the crossover finite rotation in the old crossover 
                #---                                                            sequence will be synchronised (such that the older 
                #---                                                            finite rotations are preserved). All finite rotations 
                #---                                                            in the young crossover sequence are preserved.
                #--- @xo_os : CrossoverType.synch_young_crossover_and_stages -- All finite rotations in the young crossover sequence 
                #---                                                            will be synchronised (such that young stage rotations 
                #---                                                            are preserved). All finite rotations in the old crossover 
                #---                                                            sequence are preserved. Note: This can result in non-zero 
                #---                                                            finite rotations at present day if the younger sequence 
                #---                                                            includes present day.
                #--- @xo_of : CrossoverType.synch_young_crossover_only       -- Only the crossover finite rotation in the young crossover 
                #---                                                            sequence will be synchronised (such that the younger 
                #---                                                            finite rotations are preserved). All finite rotations 
                #---                                                            in the old crossover sequence are preserved.
                
                dictOfGrotAttribs["XOVER"] = True
                
                if re.match(r"XO_IG+\b", comment, flags=re.I):
                    dictOfGrotAttribs["XOVER_TY"] = "ig"
                elif re.match(r"XO_YS+\b", comment, flags=re.I):
                    dictOfGrotAttribs["XOVER_TY"] = "ys"
                elif re.match(r"XO_YF+\b", comment, flags=re.I):
                    dictOfGrotAttribs["XOVER_TY"] = "yf" 
                elif re.match(r"XO_OS+\b", comment, flags=re.I):
                    dictOfGrotAttribs["XOVER_TY"] = "os"
                elif re.match(r"XO_OF+\b", comment, flags=re.I):
                    dictOfGrotAttribs["XOVER_TY"] = "of"
                else:
                    dictOfGrotAttribs["XOVER_TY"] = "un"
                continue
            
            elif re.match(r'ABSAGE\b', comment, flags=re.I):
                # print("ABSAGE")
                dictOfGrotAttribs["ABSAGE"] = True
                continue
            
            elif re.match(r'FITRECON\b', comment, flags=re.I):
                # print("ABSAGE")
                dictOfGrotAttribs["FITRECON"] = True
                continue
            
            elif re.match( gtsRegExp, comment):
                # print("ABSAGE")
                dictOfGrotAttribs["GTS"] = re.split( gtsRegExp, comment )[1].strip().strip('"')
                continue
            
            #--- CHECK FOR PLATE PAIR
            elif re.match(r"([A-Z0-9]{2,}|[0-9]{3})-([A-Z0-9]{2,}|[0-9]{3})\b", comment ):
                
                print("    - PLATE PAIR DETECTED: ", comment)
                comment = comment.strip().strip(',').strip('"')
                
                print("    > Comment: ", comment)
                
                #--- Strict match - only the plate pair
                if re.match(r"([A-Z0-9]{2,}|[0-9]{3})-([A-Z0-9]{2,}|[0-9]{3})$", comment, flags = re.I):
                    (movPlateAcronym, fixPlateAcronym) = comment.split("-")
                    if VERBOSE:
                        print("    - Strict match:", movPlateAcronym, fixPlateAcronym)
                else:
                    #--- Plate pair usually has no spaces, hence we can split the string after
                    #--- the plate pair using the first space.
                    (platePair, trailingComment) = comment.split(' ', 1)
                    movPlateAcronym, fixPlateAcronym = platePair.split("-")
                
                    if VERBOSE:
                        print("    - PP + extra: ", platePair, trailingComment)
                    # (movPlateName, fixPlateName) = trailingComment.split('-')
                    try:
                        if VERBOSE:
                            print("   - Splitting trailing comment string")
                        movPlateName, fixPlateName = trailingComment.split('-')
                        dictOfGrotAttribs['MPRS:name'] = movPlateName.strip()
                        dictOfGrotAttribs['FPID:name'] = fixPlateName.strip()
                    except:
                        dictOfGrotAttribs['COMMENT'] = trailingComment.strip()
                
                dictOfGrotAttribs['MPRS:code'] = movPlateAcronym.strip()
                dictOfGrotAttribs['FPID:code'] = fixPlateAcronym.strip()
                continue
            
            #--- Process comments outside of @REF tags
            elif re.search( citeRegexp, comment ):
               
               m = re.search( citeRegexp, comment )
               print("et al", m)
               
               try:
                  dictOfGrotAttribs['REF'] = [ comment[ m.start():m.end() ] ]
               except:
                  dictOfGrotAttribs['REF'] = [ comment[ m.start():m.end() ] ]
               
               theComment = comment[:m.start()] + comment[m.end():]
               print( comment )
               continue 

            else:
                print("    THIS IS LEFT -->", comment )
                dictOfGrotAttribs['C'] = comment.strip()
            
            # print("....... grotdict", dictOfGrotAttribs)
    

    # TODO: reference section at end of file: there is no need to have @ref and @doi in one line, in fact this is confusing when using multiple references.
    # TODO what happens if we find more than one REF, DOI, AU etc.
    
    #--------------------------------------------
    #--- PROCESS COMMENTS WITH NO "@" sign

    #--- If the file has been written by paleogis we find comments like these;
    #--- "Pole created with PaleoGIS by First.Last at DD/MM/YYYY HH:MM:SS [A/P]M"
    #--- or "Pole modeled by First.Last at DD/MM/YYY"
    #--- TODO: add regexp for achieveing match for full string  'Pole created with [PaleoGIS|MSAccess] by .+ at'
    #--- TODO: if somewhere in the sequence, a plate pair is found din-ita, then make sure to keep that info and pass it on to the genertor.
    #--- TODO: If there's one of the expresions below in there, make sure to use that as well. ie not startswith
    elif comment.startswith("Pole created with ") \
       or comment.startswith("Pole created in ") \
       or comment.startswith("Plate created ") \
       or comment.startswith("Pole modified by ") \
       or comment.startswith("Pole modeled by "):
       
       print("-> processing PaleoGIS comment")
       try:
          
          (comment, createInfo) = comment.split(" by ")
          # if VERBOSE:
          # print(comment, createInfo)
          junk, commentAuthor, commentDateTime, comment2 = re.split(r'(\w+\.\w+)\sat\s(\d{1,}\/\d{1,}\/\d{1,}\s\d{1,2}:\d{1,2}:\d{1,2}\s[A|P]M)', createInfo)
          print(junk, commentAuthor, commentDateTime, comment2)
          dictOfGrotAttribs['AU'] = [ commentAuthor ]
          dictOfGrotAttribs['TIMESTAMP'] = [ commentDateTime ]
          dictOfGrotAttribs['COMMENT'] = comment + comment2
          # print( commentAuthor, commentDateTime )
       
       except:
          dictOfGrotAttribs['COMMENT'] = comment.strip().strip('"').strip()
          print("Cannot determine date or author. Keep comment as is:", comment)
          
          # commentDateTime = date.today().isoformat() # Maybe not such a good thing to use the
                                                       # current day as this might lead to issues in version
                                                       # control or diffs.
    #--- Process the remaining comment string
    else:
        if VERBOSE:
            print( "> Processing non-GROT syntax:", comment)
        
        #--- We only encounter a plate pair entry - strict match
        try:
            (movPlateAcronym, fixPlateAcronym) = re.split(r"([A-Z0-9]{2,}|[0-9]{3})-([A-Z0-9]{2,}|[0-9]{3})\b$", comment.strip() )[1:]
            if VERBOSE:
                print( "Comment:", comment, "M plate:", movPlateAcronym, "FPlate:", fixPlateAcronym )

            dictOfGrotAttribs['MPRS:code'] = movPlateAcronym
            dictOfGrotAttribs['FPID:code'] = fixPlateAcronym
        except:
            if VERBOSE:
                print("    - No plate pair with acronyms")
        
        try:
            (movPlateAcronym, fixPlateAcronym, comment) = re.split(r"([A-Z]{2,}|[A-Z0-9]{3,})-([A-Z]{2,}|[A-Z0-9]{3})\b", comment )[1:]
            print( "    - Trying more complete ", comment )
            dictOfGrotAttribs['MPRS:code'] = movPlateAcronym
            dictOfGrotAttribs['FPID:code'] = fixPlateAcronym
            
            #--- Go for the rghtmost one as this is likely the separator.
            try:
                (movPlateName, fixPlateName) = re.split(r'fixed\sto|\s-\s|-', comment, flags=re.I)
                dictOfGrotAttribs['MPRS:name'] = movPlateName.strip()
                dictOfGrotAttribs['FPID:name'] = fixPlateName.strip()
            except:

                print("    - Cannot determine plate names")
        except:
           print("    - No luck, will try simpler things")
        #    print(comment)
        
        #--- Case where there is only one plate acronym -
        #--- Some of the PaleoMap rot files have PPP PlateName
        try:
            _, movPlateAcronym, comment = re.split(r'([A-Z]{3,})\b', comment.strip(), 1 )
            dictOfGrotAttribs['MPRS:code'] = movPlateAcronym.strip()
        except:
            print("    - Struggling with this one:", comment)
        
        # continue
                # dictOfGrotAttribs['C'] =  trailingComment
    
        # print("+++++\n", comment, "----------\n")
    
        #---- Simple tests - tyring to find keywords in comment string
        #--- TODO requires check that this should not be done on GROT syntax
    
    
        XOs = [ "cross-over", 'crossover', 'cross over' ] #--- Take care 
        if any( x in comment for x in XOs):
           dictOfGrotAttribs["XOVER"] = True
        # else:
        #     dictOfGrotAttribs["XOVER"] = False
        
    #--- Test for chron information which is not encoded in @CHRONID tags c27o AN 22
    try:
        chronMatch = re.search(r'(AN\s)?[c|m]([1-9])+?[n|r|y|o]{1,}[0-9nyor]+?\b', flags=re.I)
        dictOfGrotAttribs['CHRON'] = chronMatch[0]
    except:
        print("    - No chron match found using simple regexp outside GROT syntax")
    
    #--- Test for stray references in the comment, using regexp.
    #--- Check for references not encoded with @REF
    try:
       # re.search(r'\w+\s(et)\s(al.)\s.+[0-9]{2,}', comment, flags=re.I):
       #--- Should match LastName et al.... or Name 1 & Name 2 (2002)
       m = re.search( citeRegexp, comment )
       # print("et al or simple author", m)
       
       if not 'REF' in dictOfGrotAttribs:
          dictOfGrotAttribs['REF'] = [ comment[ m.start():m.end() ] ]
       else:
          dictOfGrotAttribs['REF'].append( comment[ m.start():m.end() ] )
       
       comment = comment[:m.start()] + comment[m.end():]
    
    #--- As last resort, everything is just whacked into the comment string
    except:
       
       dictOfGrotAttribs['COMMENT'] = comment.strip().strip('"').strip()
    
    # print("\nCOMMENT - ", comment)
    
    #--- This ends the comment string processing.
    #--- In the case the key values are not filled, we test against this
    #--- and populate with defaults if empty.
    #--- Note that not all info might be populated - say in a comment line
    #--- of a MPRS there might not be an @AU string.
    if VERBOSE:
        print("    > Dict of comment attribs:\n     ", dictOfGrotAttribs)
    
    try:
        dictOfGrotAttribs['AU']
    except:
         dictOfGrotAttribs['AU'] = ''
    
    try:
        dictOfGrotAttribs['TIMESTAMP']
    except:
        dictOfGrotAttribs['TIMESTAMP'] = ''
    
    try:
        dictOfGrotAttribs['MPRS:name']
    except:
        dictOfGrotAttribs['MPRS:name'] = 'FIXME'
    
    try:
        dictOfGrotAttribs['MPRS:code']
    
    except:
        dictOfGrotAttribs['MPRS:code'] = 'FIXME'
    
    try:
        dictOfGrotAttribs['FPID:code']
    except:
        dictOfGrotAttribs['FPID:code'] = 'FIXME'
    
    try:
        dictOfGrotAttribs['COMMENT']
    except:
        dictOfGrotAttribs['COMMENT'] = ''
    try:
        dictOfGrotAttribs['GTS']
    except:
        dictOfGrotAttribs['GTS'] = ''
    
    # commentList = [ commentAuthor, commentDateTime, movPlateAcronym, fixPlateAcronym, extractedComment ]
    # dictOfGrotAttribs['FPID:code'] = fixPlateAcronym
    print("Dict of comment attribs:", dictOfGrotAttribs )
    
    # TODO: this needs to be restructed in the sense that the dictOfGrotAttribs is the only thing being fed back.
    # return commentList, movPlateName, dictOfGrotAttribs, commentList
    return dictOfGrotAttribs
    pass

def format_mprs_header( outFF, movPlateID, movPlateCode, movPlateName, fixedPlate ):
    """moving plate rotation sequence - header formatting"""
    # FIXME: Move into new MPRS output class 
    # > @MPRS:pid"101" @MPRS:code"NAM" @MPRS:name"North America"
    # > @PP"NAM-NWA" @REF"Mueller.99" @DOI"10.1016/S1874-5997(99)80036-7" @GTS"GEEK07"
    
    if outFF == "GROT":
        formattedMPRSheader = '> @MPRS:pid"{0}" @MPRS:code"{1}" @MPRS:name"{2}"\n> @PP"{1}-{3}" @C"Sequence generated by rotconv.py"\n'.format(
            movPlateID, movPlateCode, movPlateName, fixedPlate) #, datetime.now().isoformat() 
    elif outFF == "PGIS":
        formattedMPRSheader = '999 0.0 0.0 0.0 0.0 999 ! @MPRS:pid"{0}"  @MPRS:code"{1}" @MPRS:name"{2}" @PP"{1}-{3}" @C"Generated by rotconv.py"\n'.format(
            movPlateID, movPlateCode, movPlateName, fixedPlate) #, datetime.now().isoformat()
    else:
        formattedMPRSheader = '999 0.0 0.0 0.0 0.0 999 ! @MPRS:pid"{0}"  @MPRS:code"{1}" @MPRS:name"{2}" @PP"{1}-{3}" @C"Generated by rotconv.py"\n'.format(
            movPlateID, movPlateCode, movPlateName, fixedPlate) #, datetime.now().isoformat()
    
    return formattedMPRSheader
    pass

def format_grot_mprs_comment( commentDictionary ):
    """Returns a properly formatted stage rotation metadata string adhereing to GROT syntax.
    Requires a dictionary with stage rotation metadata."""
    
    commentData = [] # TODO: change to format syntax

    #--- Process comment dictionary
    
    try:
        comment = commentDictionary['COMMENT']
    except KeyError:
        comment = ''
    if comment:
        commentData.append( '@C"%s"' % comment )
    
    try:
        if len( commentDictionary['AU'] ) == 1:
            author = commentDictionary['AU'][0]
        else:
            author = '|'.join( commentDictionary['AU'] )
    except KeyError:
        author = ''
        
    if author:
        commentData.append( '@AU"%s"' % author )
    
    try:
       # try:
          # print(cmtDct['TIMESTAMP'])
          # if len(commentDictionary['TIMESTAMP']) == 1:
      timestamp = commentDictionary['TIMESTAMP']
          # else:
          #    timestamp = " ".join( commentDictionary['TIMESTAMP'] )
          # print(timestamp)
       # except IndexError:
       #    print( commentDictionary['TIMESTAMP'] )
       #    timestamp = commentDictionary['TIMESTAMP']
    except KeyError:
        timestamp = ''

    if timestamp:
        commentData.append( '@TIMESTAMP"%s"' % timestamp )
    
    try:
        if len( commentDictionary['DOI'] ) == 1:
            doi = commentDictionary['DOI'][0]
        else:
            doi = ' '.join(commentDictionary['DOI'])
    except KeyError:
        doi=''            

    try:
        if len( commentDictionary['REF'] ) == 1:
            refkey = commentDictionary['REF'][0]
        else:
            refkey = '|'.join(commentDictionary['REF'])
    except KeyError:
        refkey = ''
    
    if refkey and doi:
        commentData.append( '@REF"%s"' % refkey )
    else:
        if doi:
            commentData.append( '@DOI"%s"' % doi )
        if refkey:
            commentData.append( '@REF"%s"' % refkey )
        
    try:
        absage = commentDictionary['ABSAGE']
    except KeyError:
        absage = False
    if absage:
        commentData.append( '@ABSAGE' )
    
    try:
        xover = commentDictionary['XOVER']
        
    except KeyError:
        xover = False
    
    try:
        xoverty = commentDictionary['XOVER_TY']
    
    except KeyError:
        xoverty = False
    
    if xover and xoverty:
        commentData.append( '@XO' + '_' + xoverty )
        
    elif xover and not xoverty:
        commentData.append( '@XO' + '_' + 'UN' )
    else:
        pass
        
    try:
        # if len(commentDictionary['CHRONID']) == 1:
        chronid = commentDictionary['CHRONID']
        # else:
        #     chronid = "|".join( commentDictionary['CHRONID'])
    except KeyError:
        chronid = ''
    if chronid:
        commentData.append('@CHRONID"%s"' % chronid)
    
    try:
        fitrec = commentDictionary['FITRECON']
    except KeyError:
        fitrec = ''
    if fitrec:
        commentData.append('@FIT')
    
    try:
        gts = commentDictionary['GTS']
        
    except KeyError:
        gts = ''
    if gts:
        commentData.append('@GTS"%s"' % gts)
    
    if len( commentData ) == 0:
        print("   - No comment data")
        formattedStageRotationMetadata = ''
    else:
        formattedStageRotationMetadata = " ".join( commentData )
    
    return formattedStageRotationMetadata
    pass

def create_grot_bibinfo_section( bibliographyDictionary ):
    """Creates a formatted bibliography section as string to be 
    inserted at the end of output file. Currently all entries prefixed by '#'. """
    # '# {:-<35}| {:-<40}'.format('centered', 'test')
    
    formattedBibliographySection = "#" + "-"*79 + "\n" 
    formattedBibliographySection += "# @BIBINFO:references \n"
    
    for ref in bibliographyDictionary.keys():
        
        # if ref == 'DOI': 
        if bibliographyDictionary[ ref ] != 'FIXME':
            formattedBibliographySection += '# @REF {:<35} @DOI https://doi.org/{:<35}\n'.format( ref, bibliographyDictionary[ ref ])
        else:
            formattedBibliographySection += '# @REF {:<35} @DOI {:<35}\n'.format( ref, bibliographyDictionary[ ref ])
                    
    return formattedBibliographySection
    pass

class outputReporting( ):
    """docstring for outputReporting"""
    def __init__(self, arg):
        super(outputReporting, self).__init__()
        self.arg = arg

        
    def makeHeader( inFileName, OUTFF ):
        """Generate file header sequence """
        
        # OUTFILEFORMAT = "GROT"
        
        #--- TODO CLI input here - choice for defaults or let user specify on cli.
        
        # if not inFileName:
        inFileName = "GPlates rotation file"
        # if not creator:
        creator = "Created by rotconv.py"
        # if not creatorEmail:
        creatorEmail = ""
        # if not creatorURL:
        creatorURL = ""
        # if not creatorAffil:
        creatorAffil = ""
        # if not rightsLic:
        rightsLic = ""
        # if not rightsURL:
        rightsURL = ""
        # if not dateCr:
        #    dateCr = "%"
        # if not dateMod:
        #    dateMod = ""
        # if not coverTemp :
        coverTemp  = "0-600 Ma"
        dateCr = str( datetime.now().isoformat() )
        dateMod = str( datetime.now().isoformat() )
        
        if OUTFF == "GROT":
            header = """@GPLATESROTATIONFILE:version"1.0"
@GPML:namespace"http://www.earthbyte.org/Resources/GPGIM/public/"
@DC:namespace"http://purl.org/dc/elements/1.1/"
@DC:title"{0}"
@DC:creator:name"{1}"
@DC:creator:email"{2}"
@DC:creator:url"{3}"
@DC:creator:affiliation"{4}"
@DC:rights:license"{5}"
@DC:rights:url"{6}"
@DC:date:created"{7}"
@DC:date:modified"{8}"
@DC:coverage:temporal"{9}"
@DC:comment"File generated by rotconv.py"
#==============================================================================
""".format( inFileName, creator, creatorEmail, creatorURL, creatorAffil, rightsLic, rightsURL, dateCr, dateMod, coverTemp )
        
        else:
            header = """999 0.0 0.0 0.0 0.0 999 !@GPLATESROTATIONFILE:version"1.0"
999 0.0 0.0 0.0 0.0 999 !@GPML:namespace"http://www.earthbyte.org/Resources/GPGIM/public/"
999 0.0 0.0 0.0 0.0 999 !@DC:namespace"http://purl.org/dc/elements/1.1/"
999 0.0 0.0 0.0 0.0 999 !@DC:title" GPlates rotation file"
999 0.0 0.0 0.0 0.0 999 !@DC:creator:name""
999 0.0 0.0 0.0 0.0 999 !@DC:creator:email""
999 0.0 0.0 0.0 0.0 999 !@DC:creator:url"http://www.gplates.org"
999 0.0 0.0 0.0 0.0 999 !@DC:creator:affiliation""
999 0.0 0.0 0.0 0.0 999 !@DC:rights:license""
999 0.0 0.0 0.0 0.0 999 !@DC:rights:url""
999 0.0 0.0 0.0 0.0 999 !@DC:date:created""
999 0.0 0.0 0.0 0.0 999 !@DC:date:modified""
999 0.0 0.0 0.0 0.0 999 !@DC:coverage:temporal""
999 0.0 0.0 0.0 0.0 999 !@DC:comment"File generated by rotconv.py"
999 0.0 0.0 0.0 0.0 999 !=======================================================
""".format( inFileName, creator, creatorEmail, creatorURL, creatorAffil, rightsLic, rightsURL, dateCr, dateMod, coverTemp )
        
        return header
        pass
    
    def plateStats( ):
        """
            Return statistics for global rotation file:
         Number of plates, number of MPRSs, number of crosovers,
        """
        pass

#--- Keywords used in GROT syntax, these can be mixed case in the input file
# grotAtributesDict = {
#         "DOI": "Digital object identifier",      # seems reasonably consistently used.
#         "ABSAGE": "absolute age" ,               # sometimes embedded in phrase
#         "AU": "Author",                          # can occur multiple times, but often has & as marker instead. Builds AU database in header.
#         "COMMENT": "Comment",                          # free form comment
#         "REF": "Reference",                      # This is the bibtex equivalent of a citation key
#         "TIME": "Modification timestamp",           # should be either isoD (YYYY-MM-DD) or
#         "CHRONID": "Magnetic polarity chron ID", # Should be in the list of chrons -> also needs to be generated.
#         "XO_YS": "Cross over",                   # I guess there's also a few things which can be done with this.
#         "XO_YF": "Cross over",                   # I guess there's also a few things which can be done with this.
#         "XO_SF": "Cross over",
#         "XOVER": "Cross over",                     # Cross over in mprs
#         "GTS": "Geological Time Scale"
#     }
#TODO: make sure that the names used in the grotAttr dict are consistent - can this be turned into a template which is later filled in the crunching?

def detect_xover( stageRotList, mPID ):
    """Reads a list of stage rotations and the associated dictionary
    and iterates over content in sequence. Checks whether the next
    fPID is different to current one and the ages are the same.
    In case of a crossover not yet marked, it will correct the metadata
    entries in the dictionary.
    
    Entries to MPRS dictionary will be made, the @XO_ tag will only
    be added to the younger end of the cross over.
    """
    
    for i in stageRotList:
        
        curPosition = stageRotList.index(i)
        nextPosition = curPosition + 1
        currentFPID = i[4]
        currentAge = i[0]
        #--- Read next fPID, skip silently if this is the last rotation.
        try:
            nextFPID = stageRotList[ nextPosition ][4]
            nextAge = stageRotList[ nextPosition ][0]
            
        except IndexError:
            print("Last rot")
            pass
        
        if currentFPID != nextFPID and currentAge == nextAge:
            print("Detected xover")
            print(i)
            print(currentFPID, nextFPID, currentAge, nextAge)

            try:
                #--- Check whether this is already captured in metadata
                if i[5]["XOVER"]:
                    print("XOVER already captured in metdata")
                    stageRotList[ curPosition ][5]["XOVER"] = True 
                    stageRotList[ curPosition ][5]["XOVER_TY"] = ''
                    # stageRotList[ nextPosition ][5]["XOVER"] = True
                    # stageRotList[ nextPosition ][5]["XOVER_TY"] = ''
            except KeyError:
                #--- modify stagerotlist accordingly
                stageRotList[ curPosition ][5]["XOVER"] = True 
                stageRotList[ curPosition ][5]["XOVER_TY"] = ''
                # stageRotList[ nextPosition ][5]["XOVER"] = True 
                # stageRotList[ nextPosition ][5]["XOVER_TY"] = ''
                # print("xover not in metadata")
                # print(mPID, i[4], i[5])
                print( i, stageRotList[ nextPosition ] )
            # break
    pass


def paleogis_adjust_crossover_age( theAge ):
    """Adjust stage rotation cross overs for import into PaleoGIS."""
    
    #--- Output option PaleoGIS/PaleoPRO requires adjusting the crossovers
    #--- These applications cannot deal with stage rotations having the same ages
    #--- in a precise way. The most robust method is to slightly alter the 
    #--- younger crossover age by 0.001 Myrs (1kyr)
    
    print("> !! Cross over detected and output format is PGIS - younger end will be adjusted by 0.001 Myr")

    #--- TODO: currently we only test for xover in dictionary, assuming that if it is there, it is true.
    #--- this behaviour might change in the future.
    adjustedAge = theAge - Decimal(0.001)
    print( "> !! Initial xover age: ", theAge, "now adjusted to: ", adjustedAge )

    return adjustedAge

# EOF