#!/usr/bin/python """fromIcal.py -- interpret iCalendar data as RDF USAGE: python fromIcal.py [options] foo.ics > foo.rdf options: --base uri --noprotocol Supress SEQUENCE and DTSTAMP --noalarm Supress VALARMs --x include X- properties REFERENCES Internet Calendaring and Scheduling Core Object Specification (iCalendar) November 1998 http://www.ietf.org/rfc/rfc2445.txt http://www.w3.org/2002/12/cal/rfc2445 http://www.w3.org/2002/12/cal/rfc2445.html NOTE: We don't claim to implement the whole spec, nor to even have read all of it. We're taking a data-driven, test-driven approach to RFC2445 coverage/conformance. We start with a .ics file that we understand (because it came from a tool that acts as we expect in response to it or some such) and we implement the parts of the spec necessary to grok the data in that file. As we work on more test files, we cover (and carefully read) more parts of the spec. Building an RDF model: A quick look at iCalendar http://www.w3.org/2000/01/foo TimBL 2000/10/02 Python Style Guide Author: Guido van Rossum http://www.python.org/doc/essays/styleguide.html TODO - RDF API in place of SAX? - rename fromIcal.py? LICENSE RDF Calendar Workspace: http://www.w3.org/2002/12/cal/ Copyright 2002-2003 World Wide Web Consortium, (Massachusetts Institute of Technology, European Research Consortium for Informatics and Mathematics, Keio University). All Rights Reserved. This work is distributed under the W3C(R) Software License http://www.w3.org/Consortium/Legal/2002/copyright-software-20021231 in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. """ __version__ = "$Id: fromIcal.py,v 2.37 2023/12/31 16:23:15 timbl Exp $" from warnings import warn import codecs, quopri import XMLWriter from icslex import unbreak, parseLine, unesc, recurlex nextid = 0 def serial(): # we have to add unique IDs to UDIs as they are repeated for instances of a recurring thing global nextid nextid += 1 return "_" +str(nextid) def main(): import sys sx = XMLWriter.T(sys.stdout) # sx = XMLWriter.T(codecs.getwriter('utf-8')(sys.stdout)) base = None suppressed = ['X-'] while len(sys.argv) > 1: if sys.argv[1] == '--base': base = sys.argv[2] del sys.argv[1:3] elif sys.argv[1] == '--noprotocol': suppressed = suppressed + [ 'SEQUENCE', 'DTSTAMP'] del sys.argv[1:2] elif sys.argv[1] == '--x': del suppressed[0] elif sys.argv[1] == '--noalarm': suppressed = suppressed + [ 'Valarm'] del sys.argv[1:2] elif sys.argv[1] == '--notimezone': suppressed = suppressed + [ 'Vtimezone'] del sys.argv[1:2] elif sys.argv[1] == '--help': print(__doc__) return else: break interpret(sx, codecs.open(sys.argv[1], 'r', 'utf-8'), base, suppressed) class Namespace: def __init__(self, nsURI, names=()): self._n = nsURI self._names = names def bindAttr(self, pfx, attrs): if pfx: attrs['xmlns:%s' % pfx] = self._n else: attrs['xmlns'] = self._n def __getattr__(self, lname): if lname in self._names: return self._n+lname else: raise AttributeError(lname) def sym(self, lname): return self._n + lname RDF = Namespace('http://www.w3.org/1999/02/22-rdf-syntax-ns#') iCalendar = Namespace('http://www.w3.org/2002/12/cal/icaltzd#', ('dateTime', )) XMLSchema = Namespace('http://www.w3.org/2001/XMLSchema#', ('dateTime', 'date', 'double', 'integer', 'duration')) def interpret(sx, fp, base=None, suppressed =[]): lines = unbreak(fp) n, p, v = parseLine(next(lines)) if v != 'VCALENDAR': raise SyntaxError('Expected CALENDAR but found: %s' % (v)) calendars = [] findComponents(lines, v, calendars) attrs = {} RDF.bindAttr('rdf', attrs) iCalendar.bindAttr('', attrs) if base: attrs['xml:base'] = base sx.startElement('rdf:RDF', attrs) doComponents(sx, calendars, iCalendarDefs, suppressed = suppressed) sx.endElement('rdf:RDF') ######### # # Property Declarations: # ICALTOKEN -> ('rdfname', 'DEFAULT-TYPE', minCardinality, maxCardinality) # # In theory, these could be derived from the rfc2445-formal schema, but # they're copied by hand, so far. Reading from rfc2445-formal rather # than from rfc2445.txt ensures that we have the relevant information # formalized in machine-readable form, but also provides careful # review where a mechanized translation might mask some bugs. # # The default types are keyed in on-demand as we encounter # the properties in test data. Those that we haven't tested # produce a RuntimeError, telling us to add the default type # (and a test case for it!). # # Mappings to rdfname and DEFAULT-TYPE could be flattened, # but mappings to cardinalities depend on the containing component. # Cardinalities aren't used yet. (oops; forgot YouArentGonnaNeedIt) # # # We represent symbolic values as strings, i.e. # :transp "OPAQUE"; # where the calendar test suite currently uses URIs, i.e. # :transp :opaque; # Relevant code is marked with @@symbol. # # See also "How should I implement controlled vocabularies?" # in http://esw.w3.org/topic/PropertiesForNaming # as of 2003-04-18 16:10:57 # tzprop = { "DTSTART": ('dtstart', 'DATE-TIME', 1, 1), "TZOFFSETTO": ('tzoffsetto', 'TEXT', 1, 1), "TZOFFSETFROM": ('tzoffsetfrom', 'TEXT', 1, 1), "COMMENT": ('comment', 'TEXT', 0, None), "RDATE": ('rdate', 'DATE-TIME', 0, None), "RRULE": ('rrule', 'RECUR', 0, None), 'EXDATE': ('exdate', 'DATE-TIME', None, None), 'RECURRENCE-ID': ('recurrenceId', 'DATE-TIME', None, None), "TZNAME": ('tzname', 'TEXT', 0, None), } ValarmDefs = ('Valarm', {"ACTION": ('action', 'TEXT', 0, None), #@@symbol "ATTACH": ('attach', 'URI', 0, None), "ATTENDEE": ('attendee', 'CAL-ADDRESS', 0, None), "DESCRIPTION": ('description', 'TEXT', 0, 1), "DURATION": ('duration', 'DURATION', 0, None), "REPEAT": ('repeat', 0, None), "SUMMARY": ('summary', "TEXT", 0, None), "TRIGGER": ('trigger', "DURATION", 0, None), }, {}) iCalendarDefs = {'VCALENDAR': ('Vcalendar', {'CALSCALE': ('calscale', 'TEXT', 0, 1), 'METHOD': ('method', 'TEXT', 0, 1), 'VERSION': ('version', 'TEXT', 1, 1), 'PRODID': ('prodid', 'TEXT', 1, 1) }, {'VTIMEZONE': ('Vtimezone', {"TZID": ('tzid', 'TEXT', 1, 1), #hmm... fragid? "LAST-MODIFIED": ('lastModified', "DATE-TIME", 0, 1), "TZURL": ('tzurl', 'URI', 0, 1), }, {"STANDARD": ('standard', tzprop, {}), "DAYLIGHT": ('daylight', tzprop, {}) } ), 'VEVENT': ('Vevent', {"ATTACH": ('attach', 'URI', 0, None), "CATEGORIES": ('categories', "TEXT", 0, None), #@@list "SUMMARY": ('summary', "TEXT", 0, None), "DTEND": ('dtend', 'DATE-TIME', 0, None), "DTSTART": ('dtstart', 'DATE-TIME', 0, None), "DURATION": ('duration', 'DURATION', 0, None), "TRANSP": ('transp', 'TEXT', 0, None), #@@symbol "ATTENDEE": ('attendee', 'CAL-ADDRESS', 0, None), "CONTACT": ('contact', 0, None), "ORGANIZER": ('organizer', "CAL-ADDRESS", 0, None), "RELATED-TO": ('relatedTo', 'RELATIONSHIP', 0, None), # notes on rfc2445#sec4.8.4.6 Uniform Resource Locator # # This is very muddled modelling; url makes sense as # a value type, but not as a property name. It's a grab-bag # for concepts like foaf:homePage, dc:related (which # is another grab bag) etc. "URL": ('url', 'URI', 0, None), "UID": ('uid', "TEXT", 0, None), "EXRULE": ('exrule', 0, None), "CLASS": ('class', 'TEXT', 0, None), #@@symbol "RDATE": ('rdate', 'DATE-TIME', 0, None), "RRULE": ('rrule', 'RECUR', 0, None), 'EXDATE': ('exdate', 'DATE-TIME', None, None), 'RECURRENCE-ID': ('recurrenceId', 'DATE-TIME', None, None), "TRIGGER": ('trigger', "DURATION", 0, None), "CREATED": ('created', "DATE-TIME", 0, None), "DTSTAMP": ('dtstamp', 'DATE-TIME', 1, 1), "LAST-MODIFIED": ('lastModified', "DATE-TIME", 0, 1), "SEQUENCE": ('sequence', "INTEGER", 0, None), "REQUEST-STATUS": ('requestStatus', 0, None), "COMMENT": ('comment', 'TEXT', 0, None), "DESCRIPTION": ('description', "TEXT", 0, 1), 'GEO': ('geo', ('FLOAT',), None, None), "LOCATION": ('location', 'TEXT', 0, None), 'PRIORITY': ('priority', 'INTEGER', None, None), "RESOURCES": ('resources', 0, None), "STATUS": ('status', 'TEXT', 0, 1), }, {"VALARM": ValarmDefs } ), 'VTODO': ('Vtodo', {'ATTACH': ('attach', 'URI', None, None), 'ATTENDEE': ('attendee', 'CAL-ADDRESS', None, None), 'CATEGORIES' : ('categories', 'TEXT', None, None), 'CLASS': ('class', 'TEXT', None, None), 'COMMENT': ('comment', 'TEXT', None, None), 'COMPLETED': ('completed', 'DATE-TIME', None, None), 'CONTACT': ('contact', 'TEXT', None, None), 'CREATED': ('created', 'DATE-TIME', None, None), 'DESCRIPTION': ('description', 'TEXT', None, None), 'DTSTAMP': ('dtstamp', 'DATE-TIME', None, None), 'DTSTART': ('dtstart', 'DATE-TIME', None, None), 'DUE': ('due', 'DATE-TIME', None, None), 'DURATION': ('duration', 'DURATION', None, None), 'EXRULE': ('exrule', 'RECUR', None, None), 'GEO': ('geo', '@@', None, None), 'LAST-MODIFIED': ('lastModified', 'DATE-TIME', None, None), 'LOCATION': ('location', 'TEXT', None, None), 'ORGANIZER': ('organizer', 'CAL-ADDRESS', None, None), 'PERCENT-COMPLETE': ('percentComplete', 'INTEGER', None, None), 'PRIORITY': ('priority', 'INTEGER', None, None), 'RDATE': ('rdate', 'DATE-TIME', None, None), 'RELATED-TO': ('relatedTo', 'TEXT', None, None), 'REQUEST-STATUS': ('requestStatus', 'TEXT', None, None), 'RESOURCES': ('resources', 'TEXT', None, None), 'RRULE': ('rrule', 'RECUR', None, None), 'EXDATE': ('exdate', 'DATE-TIME', None, None), 'RECURRENCE-ID': ('recurrenceId', 'DATE-TIME', None, None), 'SEQUENCE': ('sequence', 'INTEGER', None, None), 'STATUS': ('status', 'TEXT', None, None), 'SUMMARY': ('summary', 'TEXT', None, None), 'TRIGGER': ('trigger', 'DURATION', None, None), 'UID': ('uid', 'TEXT', None, None), 'URL': ('url', 'URI', None, None)}, {"VALARM": ValarmDefs }, ) #@@others } ) } def doComponents(sx, components, compDecls, stripe=None, suppressed =[]): """interpret components stripe says whether we need a element or a parseType="Resource" attribute to fix up the striping. or None, in which case we need to declare X- namespaces raises KeyError for unknown component name. @@test this """ for name, props, subs in components: elt, propDecls, subDecls = compDecls[name] if elt in suppressed: continue attrs = {} if stripe == 'component': sx.startElement('component', {}) elif stripe == 'Resource': attrs['rdf:parseType'] = stripe else: bindX(attrs, props, components) try: i = lookup(props, 'UID') if not('#' in i): i = "#" + i attrs['rdf:about'] = i + serial() except KeyError: try: tzid = lookup(props, 'TZID') attrs['rdf:about'] = timeZoneNameSpace(tzid) + 'tz' except KeyError: pass sx.startElement(elt, attrs) doProperties(sx, '', props, propDecls, suppressed = suppressed) if elt == 'Vtimezone': doComponents(sx, subs, subDecls, 'Resource', suppressed = suppressed) else: doComponents(sx, subs, subDecls, 'component', suppressed = suppressed) sx.endElement(elt) if stripe == 'component': sx.endElement('component') def bindX(attrs, props, components): """ bind x: namespace per prodid hmm... use vendorid as prefix? """ try: prodid = lookup(props, 'PRODID') #@@ call unesc to parse \, except KeyError: warn("RFC2445 requires a prodid. none found") return def findVendorids(results, components): for name, props, subs in components: for n, p, v in props: if n[:2] == "X-": vendorid, lname = n[2:].split('-', 1) results.append(vendorid.lower()) findVendorids(results, subs) vendorids = [] findVendorids(vendorids, components) puri = prodURI(prodid) for vid in vendorids: attrs['xmlns:x-' + vid] = puri def lookup(props, k): for n, p, v in props: if n == k: return v raise KeyError(k) def prodURI(prodid): r"""turn prodid into a URI cf discussion starting with x-properties and namespaces posted by DanC at 2003-02-26 17:17 (+) http://rdfig.xmlhack.com/2003/02/26/2003-02-26.html#1046279854.884486 and continuing thru RDF calendar agenda item C: prodid support to ical2rdf.pl posted by libby at 2003-07-09 14:59 (+) http://rdfig.xmlhack.com/2003/07/09/2003-07-09.html#1057762764.179078 >>> prodURI("-//Apple Computer\, Inc//iCal 1.0//EN") 'http://www.w3.org/2002/12/cal/prod/Apple_Comp_e68fdb4cdcb3a2e8#' """ # was 628d9d8459c556fa from hashlib import sha256 # sha = hashlib.sha256 if prodid[:3] == "-//": prodid = prodid[3:] prodid = prodid.replace(' ', '_').replace("//", "_") digest = sha256(prodid.lower().encode('utf-8')).hexdigest() return 'http://www.w3.org/2002/12/cal/prod/' + \ prodid[:10] + '_' + digest[:16] + '#' def doProperties(sx, pfx, props, schema, suppressed =[]): """write each property as XML raises KeyError for properties that are neither X- properties nor in the schema """ for n, params, val in props: if n in suppressed: continue if n[:2] == "X-": if 'X-' in suppressed: continue vendorid, lname = n[2:].split('-', 1) id = 'x-' + vendorid.lower() + ':' + camelCase(lname) vtype, minc, maxc = 'TEXT', 0, None else: try: try: id, vtype, minc, maxc = schema[n] # @@ except ValueError: raise RuntimeError("missing default type: %s -> %s" %\ (n, schema[n])) except KeyError: continue #@@ raise RuntimeError("Bad default type: %s" %\ (n)) elt = '%s%s' % (pfx, id) for pn, pv in params: if pn == 'VALUE': vtype = pv.upper() if vtype == 'TEXT': doText(sx, elt, params, val) elif vtype == 'INTEGER': doInteger(sx, elt, params, val) elif vtype == 'DURATION': doDuration(sx, elt, params, val) elif vtype == 'DATE-TIME': doDateTime(sx, elt, params, val) elif vtype == 'DATE': doDate(sx, elt, params, val) elif vtype == 'RECUR': doRecur(sx, elt, params, val) elif vtype == 'CAL-ADDRESS': doCalAddress(sx, elt, params, val) elif vtype == 'URI': doURI(sx, elt, params, val) elif vtype == ('FLOAT',): doListOfFLOAT(sx, elt, params, val) else: warn("@@value type %s not implemented (%s: %s)" % (vtype, n, val)) def doInteger(sx, elt, params, val): sx.startElement(elt, {'rdf:datatype': XMLSchema.integer}) sx.characters(val, 0, len(val)) for pn, pv in params: if pn=='VALUE': pass else: raise ValueError("unexpected integer param %s=%s" % (pn, pv)) sx.endElement(elt) def doURI(sx, elt, params, val): sx.startElement(elt, {'rdf:resource': val}) for pn, pv in params: if pn=='VALUE': pass elif pn=='FMTTYPE': pass # @@ codeme else: raise ValueError("unexpected URI param %s=%s" % (pn, pv)) sx.endElement(elt) def doText(sx, elt, params, val): attrs = {} val = unesc(val) # @@ or only if not QUOTED-PRINTABLE ? for pn, pv in params: if pn=='VALUE': pass elif (pn, pv) == ('ENCODING', 'QUOTED-PRINTABLE'): val = quopri.decodestring(val) elif pn == "LANGUAGE": attrs[ 'xml:lang'] = pv else: warn("unexpected text param %s=%s on elt '%s'" % (pn, pv, elt)) sx.startElement(elt, attrs) sx.characters(val, 0, len(val)) sx.endElement(elt) # Hmm... we can't use dt:dateTime nor dt:duration as the property # here, because datatype properties are inverse functional, but # iCalendar DATE-TIME values have other properties, i.e. tzid. # # In a way, it's a good thing anyway, since using datatype properties # would take us out of OWL DL. def doDateTime(sx, elt, params, val): val = datePunc(val) tzid = None for pn, pv in params: if pn == 'VALUE': pass # delete this in doParams? elif pn == 'TZID': tzid = pv elif pn.startswith('X-'): pass else: raise ValueError("unexpected DT param %s=%s" % (pn, pv)) if val.endswith('Z'): sx.startElement(elt, {'rdf:datatype': XMLSchema.dateTime}) sx.characters(val, 0, len(val)) sx.endElement(elt) elif tzid: sx.startElement(elt, {'rdf:datatype': timeZoneNameSpace(tzid)+'tz'}) sx.characters(val, 0, len(val)) sx.endElement(elt) else: sx.startElement(elt, {'rdf:datatype': iCalendar.dateTime}) sx.characters(val, 0, len(val)) sx.endElement(elt) OlsonPfxs=('/softwarestudio.org/Olson_20011030_5/', '/softwarestudio.org/Olson_20010831_3/', '/softwarestudio.org/Olson_20011030_4/', '/softwarestudio.org/Olson_20020614_6/', '/softwarestudio.org/Olson_20011030_2/') TzdPfx='http://www.w3.org/2002/12/cal/tzd/' def timeZoneNameSpace(tzid): """map tzid into URI space rooted at TzdPfx rfc2445#sec4.8.3.1 says: "The presence of the SOLIDUS character (US-ASCII decimal 47) as a prefix, indicates that this TZID represents an unique ID in a globally defined time zone registry (when such registry is defined)." >>> timeZoneNameSpace('/softwarestudio.org/Olson_20011030_5/Europe/London') 'http://www.w3.org/2002/12/cal/tzd/Europe/London#' If an unknown registry is uses... >>> timeZoneNameSpace('/foo') Traceback (most recent call last): raise ValueError, "unknown global tzid:" + tzid ValueError: unknown global tzid:/foo We do some namespace squatting: in theory, the name 'Europe/London' could be used as a local reference for Chicago time. But in practice, this works: >>> timeZoneNameSpace('Europe/London') 'http://www.w3.org/2002/12/cal/tzd/Europe/London#' A bit more squatting: >>> timeZoneNameSpace('US/Eastern') 'http://www.w3.org/2002/12/cal/tzd/America/New_York#' @@raises RuntimeError for unrecognized local refs. """ if tzid.startswith('/'): for pfx in OlsonPfxs: if tzid.startswith(pfx): tzns = TzdPfx + tzid[len(pfx):] + '#' break else: raise ValueError("unknown global tzid:" + tzid) else: tzid = {'US/Eastern': 'America/New_York', 'US/Central': 'America/Chicago', # mountain? denver? 'US/Pacific': 'America/Los_Angeles', 'Canada/Eastern':'America/New_York', 'Canada/Mountain': 'America/Calgary', 'Brazil/East': 'America/Rio_de_Janiero', }.get(tzid, tzid) for area in ('Africa', 'Antarctica', 'Asia', 'Australia', 'Europe', 'Pacific', 'America', 'Arctic', 'Atlantic', 'Indian'): if tzid.startswith(area + '/'): tzns = TzdPfx + tzid + '#' break else: if tzid.startswith('GMT'): ## @@ check match GMT(+|-)[0-9]*4 tzns = TzdPfx + tzid + '#' elif tzid == 'Etc/UTC': tzns = TzdPfx + tzid + '#' else: raise RuntimeError("unsupported local timezone: " + tzid) return tzns def doDate(sx, elt, params, val): sx.startElement(elt, {'rdf:datatype': XMLSchema.date}) val = "%s-%s-%s" % (val[:4], val[4:6], val[6:8]) sx.characters(val, 0, len(val)) for pn, pv in params: if pn == 'VALUE': pass # delete this in doParams? else: raise ValueError("unexpected Date param %s=%s" % (pn, pv)) sx.endElement(elt) def doCalAddress(sx, elt, params, val): sx.startElement(elt, {'rdf:parseType': "Resource"}) val = val.replace("MAILTO:", 'mailto:') # rfc2445#sec4.3.3 weirdness # hmm... use or mention of the address? sx.startElement('calAddress', {'rdf:resource': val}) sx.endElement('calAddress') for pn, pv in params: if pn == 'VALUE': pass # delete this in doParams? elif pn == 'CN': sx.startElement('cn', {}) #@@ add to ical schema sx.characters(pv, 0, len(pv)) sx.endElement('cn') elif pn == 'DIR': sx.startElement('dir', {'rdf:resource': pv}) sx.endElement('cn') elif pn in ('CUTYPE', 'ROLE', 'RSVP', 'PARTSTAT', 'LANGUAGE'): pn = pn.lower() # lower is sufficient to camelCase pv = pv.upper() # @@symbol sx.startElement(pn, {}) sx.characters(pv, 0, len(pv)) sx.endElement(pn) elif pn == 'X-UID': pn = pn.lower() # @@@@ should be namespaced from the client software mfr sx.startElement(pn, {}) sx.characters(pv, 0, len(pv)) sx.endElement(pn) elif pn == 'SENT-BY': pass # @@ codeme elif pn == 'EMAIL': pass # @@ codeme elif pn == 'SCHEDULE-AGENT': pass # @@ codeme elif pn == 'SCHEDULE-FORCE-SEND': pass # @@ codeme elif pn.startswith('X-'): pass else: raise ValueError("unexpected address param %s=%s" % (pn, pv)) sx.endElement(elt) def doRecur(sx, elt, params, val): sx.startElement(elt, {'rdf:parseType': "Resource"}) for n, v in recurlex(val, downcase=False).items(): sx.startElement(n, type(v) is type(1) and \ {'rdf:datatype': XMLSchema.integer} or {}) sx.characters(str(v), 0, len(str(v))) sx.endElement(n) for pn, pv in params: if pn == 'VALUE': pass # delete this in doParams? else: raise ValueError("unexpected recur param %s=%s" % (pn, pv)) sx.endElement(elt) def doDuration(sx, elt, params, val): """duration is an odd beast in iCalendar. There is a duration property as well as a duration value type. We'll use cal:duration for the property. The DURATION value type is actually more than just a XMLSchema.duration; it also has a RELATED parameter. 