#!/usr/bin/python
"""fromIcal.py -- interpret iCalendar data as RDF
USAGE:
python fromIcal.py [options] foo.ics > foo.rdf
options:
--base uri
--noprotocol Supress SEQUENCE and DTSTAMP
--noalarm Supress VALARMs
--x include X- properties
REFERENCES
Internet Calendaring and Scheduling Core
Object Specification (iCalendar)
November 1998
http://www.ietf.org/rfc/rfc2445.txt
http://www.w3.org/2002/12/cal/rfc2445
http://www.w3.org/2002/12/cal/rfc2445.html
NOTE: We don't claim to implement the whole spec, nor to even have
read all of it. We're taking a data-driven, test-driven approach
to RFC2445 coverage/conformance. We start with a .ics file that we
understand (because it came from a tool that acts as we expect in
response to it or some such) and we implement the parts of the
spec necessary to grok the data in that file. As we work on more
test files, we cover (and carefully read) more parts of the spec.
Building an RDF model: A quick look at iCalendar
http://www.w3.org/2000/01/foo
TimBL 2000/10/02
Python Style Guide
Author: Guido van Rossum
http://www.python.org/doc/essays/styleguide.html
TODO
- RDF API in place of SAX?
- rename fromIcal.py?
LICENSE
RDF Calendar Workspace: http://www.w3.org/2002/12/cal/
Copyright 2002-2003 World Wide Web Consortium, (Massachusetts
Institute of Technology, European Research Consortium for
Informatics and Mathematics, Keio University). All Rights
Reserved. This work is distributed under the W3C(R) Software License
http://www.w3.org/Consortium/Legal/2002/copyright-software-20021231
in the hope that it will be useful, but WITHOUT ANY WARRANTY;
without even the implied warranty of MERCHANTABILITY or FITNESS FOR
A PARTICULAR PURPOSE.
"""
__version__ = "$Id: fromIcal.py,v 2.37 2023/12/31 16:23:15 timbl Exp $"
from warnings import warn
import codecs, quopri
import XMLWriter
from icslex import unbreak, parseLine, unesc, recurlex
nextid = 0
def serial(): # we have to add unique IDs to UDIs as they are repeated for instances of a recurring thing
global nextid
nextid += 1
return "_" +str(nextid)
def main():
import sys
sx = XMLWriter.T(sys.stdout)
# sx = XMLWriter.T(codecs.getwriter('utf-8')(sys.stdout))
base = None
suppressed = ['X-']
while len(sys.argv) > 1:
if sys.argv[1] == '--base':
base = sys.argv[2]
del sys.argv[1:3]
elif sys.argv[1] == '--noprotocol':
suppressed = suppressed + [ 'SEQUENCE', 'DTSTAMP']
del sys.argv[1:2]
elif sys.argv[1] == '--x':
del suppressed[0]
elif sys.argv[1] == '--noalarm':
suppressed = suppressed + [ 'Valarm']
del sys.argv[1:2]
elif sys.argv[1] == '--notimezone':
suppressed = suppressed + [ 'Vtimezone']
del sys.argv[1:2]
elif sys.argv[1] == '--help':
print(__doc__)
return
else:
break
interpret(sx, codecs.open(sys.argv[1], 'r', 'utf-8'), base, suppressed)
class Namespace:
def __init__(self, nsURI, names=()):
self._n = nsURI
self._names = names
def bindAttr(self, pfx, attrs):
if pfx:
attrs['xmlns:%s' % pfx] = self._n
else:
attrs['xmlns'] = self._n
def __getattr__(self, lname):
if lname in self._names:
return self._n+lname
else:
raise AttributeError(lname)
def sym(self, lname):
return self._n + lname
RDF = Namespace('http://www.w3.org/1999/02/22-rdf-syntax-ns#')
iCalendar = Namespace('http://www.w3.org/2002/12/cal/icaltzd#',
('dateTime', ))
XMLSchema = Namespace('http://www.w3.org/2001/XMLSchema#',
('dateTime', 'date', 'double', 'integer', 'duration'))
def interpret(sx, fp, base=None, suppressed =[]):
lines = unbreak(fp)
n, p, v = parseLine(next(lines))
if v != 'VCALENDAR':
raise SyntaxError('Expected CALENDAR but found: %s' % (v))
calendars = []
findComponents(lines, v, calendars)
attrs = {}
RDF.bindAttr('rdf', attrs)
iCalendar.bindAttr('', attrs)
if base:
attrs['xml:base'] = base
sx.startElement('rdf:RDF', attrs)
doComponents(sx, calendars, iCalendarDefs, suppressed = suppressed)
sx.endElement('rdf:RDF')
#########
#
# Property Declarations:
# ICALTOKEN -> ('rdfname', 'DEFAULT-TYPE', minCardinality, maxCardinality)
#
# In theory, these could be derived from the rfc2445-formal schema, but
# they're copied by hand, so far. Reading from rfc2445-formal rather
# than from rfc2445.txt ensures that we have the relevant information
# formalized in machine-readable form, but also provides careful
# review where a mechanized translation might mask some bugs.
#
# The default types are keyed in on-demand as we encounter
# the properties in test data. Those that we haven't tested
# produce a RuntimeError, telling us to add the default type
# (and a test case for it!).
#
# Mappings to rdfname and DEFAULT-TYPE could be flattened,
# but mappings to cardinalities depend on the containing component.
# Cardinalities aren't used yet. (oops; forgot YouArentGonnaNeedIt)
#
#
# We represent symbolic values as strings, i.e.
# :transp "OPAQUE";
# where the calendar test suite currently uses URIs, i.e.
# :transp :opaque;
# Relevant code is marked with @@symbol.
#
# See also "How should I implement controlled vocabularies?"
# in http://esw.w3.org/topic/PropertiesForNaming
# as of 2003-04-18 16:10:57
#
tzprop = {
"DTSTART": ('dtstart', 'DATE-TIME', 1, 1),
"TZOFFSETTO": ('tzoffsetto', 'TEXT', 1, 1),
"TZOFFSETFROM": ('tzoffsetfrom', 'TEXT', 1, 1),
"COMMENT": ('comment', 'TEXT', 0, None),
"RDATE": ('rdate', 'DATE-TIME', 0, None),
"RRULE": ('rrule', 'RECUR', 0, None),
'EXDATE': ('exdate', 'DATE-TIME', None, None),
'RECURRENCE-ID': ('recurrenceId', 'DATE-TIME', None, None),
"TZNAME": ('tzname', 'TEXT', 0, None),
}
ValarmDefs = ('Valarm',
{"ACTION": ('action', 'TEXT', 0, None), #@@symbol
"ATTACH": ('attach', 'URI', 0, None),
"ATTENDEE": ('attendee', 'CAL-ADDRESS', 0, None),
"DESCRIPTION": ('description', 'TEXT', 0, 1),
"DURATION": ('duration', 'DURATION', 0, None),
"REPEAT": ('repeat', 0, None),
"SUMMARY": ('summary', "TEXT", 0, None),
"TRIGGER": ('trigger', "DURATION", 0, None),
},
{})
iCalendarDefs = {'VCALENDAR':
('Vcalendar',
{'CALSCALE': ('calscale', 'TEXT', 0, 1),
'METHOD': ('method', 'TEXT', 0, 1),
'VERSION': ('version', 'TEXT', 1, 1),
'PRODID': ('prodid', 'TEXT', 1, 1)
},
{'VTIMEZONE':
('Vtimezone',
{"TZID": ('tzid', 'TEXT', 1, 1), #hmm... fragid?
"LAST-MODIFIED": ('lastModified', "DATE-TIME", 0, 1),
"TZURL": ('tzurl', 'URI', 0, 1),
},
{"STANDARD": ('standard', tzprop, {}),
"DAYLIGHT": ('daylight', tzprop, {})
}
),
'VEVENT':
('Vevent',
{"ATTACH": ('attach', 'URI', 0, None),
"CATEGORIES": ('categories', "TEXT", 0, None), #@@list
"SUMMARY": ('summary', "TEXT", 0, None),
"DTEND": ('dtend', 'DATE-TIME', 0, None),
"DTSTART": ('dtstart', 'DATE-TIME', 0, None),
"DURATION": ('duration', 'DURATION', 0, None),
"TRANSP": ('transp', 'TEXT', 0, None), #@@symbol
"ATTENDEE": ('attendee', 'CAL-ADDRESS', 0, None),
"CONTACT": ('contact', 0, None),
"ORGANIZER": ('organizer', "CAL-ADDRESS", 0, None),
"RELATED-TO": ('relatedTo', 'RELATIONSHIP', 0, None),
# notes on rfc2445#sec4.8.4.6 Uniform Resource Locator
#
# This is very muddled modelling; url makes sense as
# a value type, but not as a property name. It's a grab-bag
# for concepts like foaf:homePage, dc:related (which
# is another grab bag) etc.
"URL": ('url', 'URI', 0, None),
"UID": ('uid', "TEXT", 0, None),
"EXRULE": ('exrule', 0, None),
"CLASS": ('class', 'TEXT', 0, None), #@@symbol
"RDATE": ('rdate', 'DATE-TIME', 0, None),
"RRULE": ('rrule', 'RECUR', 0, None),
'EXDATE': ('exdate', 'DATE-TIME', None, None),
'RECURRENCE-ID': ('recurrenceId', 'DATE-TIME', None, None),
"TRIGGER": ('trigger', "DURATION", 0, None),
"CREATED": ('created', "DATE-TIME", 0, None),
"DTSTAMP": ('dtstamp', 'DATE-TIME', 1, 1),
"LAST-MODIFIED": ('lastModified', "DATE-TIME", 0, 1),
"SEQUENCE": ('sequence', "INTEGER", 0, None),
"REQUEST-STATUS": ('requestStatus', 0, None),
"COMMENT": ('comment', 'TEXT', 0, None),
"DESCRIPTION": ('description', "TEXT", 0, 1),
'GEO': ('geo', ('FLOAT',), None, None),
"LOCATION": ('location', 'TEXT', 0, None),
'PRIORITY': ('priority', 'INTEGER', None, None),
"RESOURCES": ('resources', 0, None),
"STATUS": ('status', 'TEXT', 0, 1),
},
{"VALARM": ValarmDefs }
),
'VTODO':
('Vtodo',
{'ATTACH': ('attach', 'URI', None, None),
'ATTENDEE': ('attendee', 'CAL-ADDRESS', None, None),
'CATEGORIES' : ('categories', 'TEXT', None, None),
'CLASS': ('class', 'TEXT', None, None),
'COMMENT': ('comment', 'TEXT', None, None),
'COMPLETED': ('completed', 'DATE-TIME', None, None),
'CONTACT': ('contact', 'TEXT', None, None),
'CREATED': ('created', 'DATE-TIME', None, None),
'DESCRIPTION': ('description', 'TEXT', None, None),
'DTSTAMP': ('dtstamp', 'DATE-TIME', None, None),
'DTSTART': ('dtstart', 'DATE-TIME', None, None),
'DUE': ('due', 'DATE-TIME', None, None),
'DURATION': ('duration', 'DURATION', None, None),
'EXRULE': ('exrule', 'RECUR', None, None),
'GEO': ('geo', '@@', None, None),
'LAST-MODIFIED': ('lastModified', 'DATE-TIME', None, None),
'LOCATION': ('location', 'TEXT', None, None),
'ORGANIZER': ('organizer', 'CAL-ADDRESS', None, None),
'PERCENT-COMPLETE': ('percentComplete', 'INTEGER', None, None),
'PRIORITY': ('priority', 'INTEGER', None, None),
'RDATE': ('rdate', 'DATE-TIME', None, None),
'RELATED-TO': ('relatedTo', 'TEXT', None, None),
'REQUEST-STATUS': ('requestStatus', 'TEXT', None, None),
'RESOURCES': ('resources', 'TEXT', None, None),
'RRULE': ('rrule', 'RECUR', None, None),
'EXDATE': ('exdate', 'DATE-TIME', None, None),
'RECURRENCE-ID': ('recurrenceId', 'DATE-TIME', None, None),
'SEQUENCE': ('sequence', 'INTEGER', None, None),
'STATUS': ('status', 'TEXT', None, None),
'SUMMARY': ('summary', 'TEXT', None, None),
'TRIGGER': ('trigger', 'DURATION', None, None),
'UID': ('uid', 'TEXT', None, None),
'URL': ('url', 'URI', None, None)},
{"VALARM": ValarmDefs },
)
#@@others
}
)
}
def doComponents(sx, components, compDecls, stripe=None, suppressed =[]):
"""interpret components
stripe says whether we need a element
or a parseType="Resource" attribute to fix up the striping.
or None, in which case we need to declare X- namespaces
raises KeyError for unknown component name. @@test this
"""
for name, props, subs in components:
elt, propDecls, subDecls = compDecls[name]
if elt in suppressed:
continue
attrs = {}
if stripe == 'component':
sx.startElement('component', {})
elif stripe == 'Resource':
attrs['rdf:parseType'] = stripe
else:
bindX(attrs, props, components)
try:
i = lookup(props, 'UID')
if not('#' in i): i = "#" + i
attrs['rdf:about'] = i + serial()
except KeyError:
try:
tzid = lookup(props, 'TZID')
attrs['rdf:about'] = timeZoneNameSpace(tzid) + 'tz'
except KeyError:
pass
sx.startElement(elt, attrs)
doProperties(sx, '', props, propDecls, suppressed = suppressed)
if elt == 'Vtimezone':
doComponents(sx, subs, subDecls, 'Resource', suppressed = suppressed)
else:
doComponents(sx, subs, subDecls, 'component', suppressed = suppressed)
sx.endElement(elt)
if stripe == 'component':
sx.endElement('component')
def bindX(attrs, props, components):
""" bind x: namespace per prodid
hmm... use vendorid as prefix?
"""
try:
prodid = lookup(props, 'PRODID') #@@ call unesc to parse \,
except KeyError:
warn("RFC2445 requires a prodid. none found")
return
def findVendorids(results, components):
for name, props, subs in components:
for n, p, v in props:
if n[:2] == "X-":
vendorid, lname = n[2:].split('-', 1)
results.append(vendorid.lower())
findVendorids(results, subs)
vendorids = []
findVendorids(vendorids, components)
puri = prodURI(prodid)
for vid in vendorids:
attrs['xmlns:x-' + vid] = puri
def lookup(props, k):
for n, p, v in props:
if n == k:
return v
raise KeyError(k)
def prodURI(prodid):
r"""turn prodid into a URI
cf discussion starting with
x-properties and namespaces
posted by DanC at 2003-02-26 17:17 (+)
http://rdfig.xmlhack.com/2003/02/26/2003-02-26.html#1046279854.884486
and continuing thru
RDF calendar agenda item C: prodid support to ical2rdf.pl
posted by libby at 2003-07-09 14:59 (+)
http://rdfig.xmlhack.com/2003/07/09/2003-07-09.html#1057762764.179078
>>> prodURI("-//Apple Computer\, Inc//iCal 1.0//EN")
'http://www.w3.org/2002/12/cal/prod/Apple_Comp_e68fdb4cdcb3a2e8#'
"""
# was 628d9d8459c556fa
from hashlib import sha256
# sha = hashlib.sha256
if prodid[:3] == "-//": prodid = prodid[3:]
prodid = prodid.replace(' ', '_').replace("//", "_")
digest = sha256(prodid.lower().encode('utf-8')).hexdigest()
return 'http://www.w3.org/2002/12/cal/prod/' + \
prodid[:10] + '_' + digest[:16] + '#'
def doProperties(sx, pfx, props, schema, suppressed =[]):
"""write each property as XML
raises KeyError for properties that are neither X-
properties nor in the schema
"""
for n, params, val in props:
if n in suppressed:
continue
if n[:2] == "X-":
if 'X-' in suppressed: continue
vendorid, lname = n[2:].split('-', 1)
id = 'x-' + vendorid.lower() + ':' + camelCase(lname)
vtype, minc, maxc = 'TEXT', 0, None
else:
try:
try:
id, vtype, minc, maxc = schema[n] # @@
except ValueError:
raise RuntimeError("missing default type: %s -> %s" %\
(n, schema[n]))
except KeyError:
continue #@@
raise RuntimeError("Bad default type: %s" %\
(n))
elt = '%s%s' % (pfx, id)
for pn, pv in params:
if pn == 'VALUE':
vtype = pv.upper()
if vtype == 'TEXT':
doText(sx, elt, params, val)
elif vtype == 'INTEGER':
doInteger(sx, elt, params, val)
elif vtype == 'DURATION':
doDuration(sx, elt, params, val)
elif vtype == 'DATE-TIME':
doDateTime(sx, elt, params, val)
elif vtype == 'DATE':
doDate(sx, elt, params, val)
elif vtype == 'RECUR':
doRecur(sx, elt, params, val)
elif vtype == 'CAL-ADDRESS':
doCalAddress(sx, elt, params, val)
elif vtype == 'URI':
doURI(sx, elt, params, val)
elif vtype == ('FLOAT',):
doListOfFLOAT(sx, elt, params, val)
else:
warn("@@value type %s not implemented (%s: %s)" % (vtype, n, val))
def doInteger(sx, elt, params, val):
sx.startElement(elt, {'rdf:datatype': XMLSchema.integer})
sx.characters(val, 0, len(val))
for pn, pv in params:
if pn=='VALUE': pass
else:
raise ValueError("unexpected integer param %s=%s" % (pn, pv))
sx.endElement(elt)
def doURI(sx, elt, params, val):
sx.startElement(elt, {'rdf:resource': val})
for pn, pv in params:
if pn=='VALUE':
pass
elif pn=='FMTTYPE': pass # @@ codeme
else:
raise ValueError("unexpected URI param %s=%s" % (pn, pv))
sx.endElement(elt)
def doText(sx, elt, params, val):
attrs = {}
val = unesc(val) # @@ or only if not QUOTED-PRINTABLE ?
for pn, pv in params:
if pn=='VALUE': pass
elif (pn, pv) == ('ENCODING', 'QUOTED-PRINTABLE'):
val = quopri.decodestring(val)
elif pn == "LANGUAGE":
attrs[ 'xml:lang'] = pv
else:
warn("unexpected text param %s=%s on elt '%s'" % (pn, pv, elt))
sx.startElement(elt, attrs)
sx.characters(val, 0, len(val))
sx.endElement(elt)
# Hmm... we can't use dt:dateTime nor dt:duration as the property
# here, because datatype properties are inverse functional, but
# iCalendar DATE-TIME values have other properties, i.e. tzid.
#
# In a way, it's a good thing anyway, since using datatype properties
# would take us out of OWL DL.
def doDateTime(sx, elt, params, val):
val = datePunc(val)
tzid = None
for pn, pv in params:
if pn == 'VALUE': pass # delete this in doParams?
elif pn == 'TZID':
tzid = pv
elif pn.startswith('X-'):
pass
else:
raise ValueError("unexpected DT param %s=%s" % (pn, pv))
if val.endswith('Z'):
sx.startElement(elt, {'rdf:datatype': XMLSchema.dateTime})
sx.characters(val, 0, len(val))
sx.endElement(elt)
elif tzid:
sx.startElement(elt, {'rdf:datatype': timeZoneNameSpace(tzid)+'tz'})
sx.characters(val, 0, len(val))
sx.endElement(elt)
else:
sx.startElement(elt, {'rdf:datatype': iCalendar.dateTime})
sx.characters(val, 0, len(val))
sx.endElement(elt)
OlsonPfxs=('/softwarestudio.org/Olson_20011030_5/',
'/softwarestudio.org/Olson_20010831_3/',
'/softwarestudio.org/Olson_20011030_4/',
'/softwarestudio.org/Olson_20020614_6/',
'/softwarestudio.org/Olson_20011030_2/')
TzdPfx='http://www.w3.org/2002/12/cal/tzd/'
def timeZoneNameSpace(tzid):
"""map tzid into URI space rooted at TzdPfx
rfc2445#sec4.8.3.1 says:
"The presence of the SOLIDUS character (US-ASCII
decimal 47) as a prefix, indicates that this TZID represents an
unique ID in a globally defined time zone registry (when such
registry is defined)."
>>> timeZoneNameSpace('/softwarestudio.org/Olson_20011030_5/Europe/London')
'http://www.w3.org/2002/12/cal/tzd/Europe/London#'
If an unknown registry is uses...
>>> timeZoneNameSpace('/foo')
Traceback (most recent call last):
raise ValueError, "unknown global tzid:" + tzid
ValueError: unknown global tzid:/foo
We do some namespace squatting: in theory,
the name 'Europe/London' could be used as a local reference
for Chicago time. But in practice, this works:
>>> timeZoneNameSpace('Europe/London')
'http://www.w3.org/2002/12/cal/tzd/Europe/London#'
A bit more squatting:
>>> timeZoneNameSpace('US/Eastern')
'http://www.w3.org/2002/12/cal/tzd/America/New_York#'
@@raises RuntimeError for unrecognized local refs.
"""
if tzid.startswith('/'):
for pfx in OlsonPfxs:
if tzid.startswith(pfx):
tzns = TzdPfx + tzid[len(pfx):] + '#'
break
else:
raise ValueError("unknown global tzid:" + tzid)
else:
tzid = {'US/Eastern': 'America/New_York',
'US/Central': 'America/Chicago',
# mountain? denver?
'US/Pacific': 'America/Los_Angeles',
'Canada/Eastern':'America/New_York',
'Canada/Mountain': 'America/Calgary',
'Brazil/East': 'America/Rio_de_Janiero',
}.get(tzid, tzid)
for area in ('Africa', 'Antarctica', 'Asia', 'Australia',
'Europe', 'Pacific', 'America', 'Arctic', 'Atlantic',
'Indian'):
if tzid.startswith(area + '/'):
tzns = TzdPfx + tzid + '#'
break
else:
if tzid.startswith('GMT'):
## @@ check match GMT(+|-)[0-9]*4
tzns = TzdPfx + tzid + '#'
elif tzid == 'Etc/UTC':
tzns = TzdPfx + tzid + '#'
else:
raise RuntimeError("unsupported local timezone: " + tzid)
return tzns
def doDate(sx, elt, params, val):
sx.startElement(elt, {'rdf:datatype': XMLSchema.date})
val = "%s-%s-%s" % (val[:4], val[4:6], val[6:8])
sx.characters(val, 0, len(val))
for pn, pv in params:
if pn == 'VALUE': pass # delete this in doParams?
else:
raise ValueError("unexpected Date param %s=%s" % (pn, pv))
sx.endElement(elt)
def doCalAddress(sx, elt, params, val):
sx.startElement(elt, {'rdf:parseType': "Resource"})
val = val.replace("MAILTO:", 'mailto:') # rfc2445#sec4.3.3 weirdness
# hmm... use or mention of the address?
sx.startElement('calAddress', {'rdf:resource': val})
sx.endElement('calAddress')
for pn, pv in params:
if pn == 'VALUE': pass # delete this in doParams?
elif pn == 'CN':
sx.startElement('cn', {}) #@@ add to ical schema
sx.characters(pv, 0, len(pv))
sx.endElement('cn')
elif pn == 'DIR':
sx.startElement('dir', {'rdf:resource': pv})
sx.endElement('cn')
elif pn in ('CUTYPE', 'ROLE', 'RSVP', 'PARTSTAT', 'LANGUAGE'):
pn = pn.lower() # lower is sufficient to camelCase
pv = pv.upper() # @@symbol
sx.startElement(pn, {})
sx.characters(pv, 0, len(pv))
sx.endElement(pn)
elif pn == 'X-UID':
pn = pn.lower() # @@@@ should be namespaced from the client software mfr
sx.startElement(pn, {})
sx.characters(pv, 0, len(pv))
sx.endElement(pn)
elif pn == 'SENT-BY':
pass # @@ codeme
elif pn == 'EMAIL':
pass # @@ codeme
elif pn == 'SCHEDULE-AGENT':
pass # @@ codeme
elif pn == 'SCHEDULE-FORCE-SEND':
pass # @@ codeme
elif pn.startswith('X-'):
pass
else:
raise ValueError("unexpected address param %s=%s" % (pn, pv))
sx.endElement(elt)
def doRecur(sx, elt, params, val):
sx.startElement(elt, {'rdf:parseType': "Resource"})
for n, v in recurlex(val, downcase=False).items():
sx.startElement(n, type(v) is type(1) and \
{'rdf:datatype': XMLSchema.integer} or {})
sx.characters(str(v), 0, len(str(v)))
sx.endElement(n)
for pn, pv in params:
if pn == 'VALUE': pass # delete this in doParams?
else:
raise ValueError("unexpected recur param %s=%s" % (pn, pv))
sx.endElement(elt)
def doDuration(sx, elt, params, val):
"""duration is an odd beast in iCalendar. There is a duration
property as well as a duration value type. We'll use cal:duration
for the property.
The DURATION value type is actually more than just a
XMLSchema.duration; it also has a RELATED parameter.
So for
TRIGGER;VALUE=DURATION;RELATED=START:-PT15M
we'll write
{ ?E cal:trigger [ rdf:value "-PT15M"^^xsdt:duration;
cal:related "START"] }
"""
sx.startElement(elt, {'rdf:parseType': "Resource"})
sx.startElement('rdf:value', {'rdf:datatype': XMLSchema.duration})
sx.characters(val, 0, len(val))
sx.endElement('duration')
for pn, pv in params:
if pn == 'VALUE': pass # delete this in doParams?
elif pn.startswith('X-'):
pass
elif pn == 'RELATED':
pv = pv.upper() # rfc2445#sec2 names
sx.startElement('related', {})
sx.characters(pv, 0, len(pv))
sx.endElement('related')
else:
raise ValueError("unexpected duration param %s=%s" % (pn, pv))
sx.endElement(elt)
# We considered generalizing this to lists of
# other sorts of values but declined on the
# YouArentGonnaNeedIt principle.
def doListOfFLOAT(sx, elt, params, val):
x, y = val.split(';')
for pn, pv in params:
if pn=='VALUE': pass
else:
raise ValueError("unexpected float param %s=%s" % (pn, pv))
#ugh... can't use parseType="Collection" with literal values
#http://www.w3.org/2000/03/rdf-tracking/#rdfxml-literals-in-collections
sx.startElement(elt, {'rdf:parseType': "Resource"})
sx.startElement('rdf:first', {'rdf:datatype': XMLSchema.double})
sx.characters(x, 0, len(x))
sx.endElement('rdf:first')
sx.startElement('rdf:rest', {'rdf:parseType': "Resource"})
sx.startElement('rdf:first', {'rdf:datatype': XMLSchema.double})
sx.characters(y, 0, len(y))
sx.endElement('rdf:first')
sx.startElement('rdf:rest',
{'rdf:resource':
'http://www.w3.org/1999/02/22-rdf-syntax-ns#nil'})
sx.endElement('rdf:rest')
sx.endElement('rdf:rest')
sx.endElement(elt)
def datePunc(val):
"""convert RFC2445 date to ISO/W3C/XML date form
>>> datePunc('19701025T020000')
'1970-10-25T02:00:00'
>>> datePunc('20020630T230353Z')
'2002-06-30T23:03:53Z'
"""
return "%s-%s-%sT%s:%s:%s%s" % (val[:4], val[4:6], val[6:8],
val[9:11], val[11:13], val[13:15],
val[15:])
def camelCase(n, initialCap=0):
"""
>>> camelCase("VEVENT", 1)
'Vevent'
>>> camelCase("LAST-MODIFIED")
'lastModified'
>>> camelCase("LOCATION")
'location'
"""
words = [w.lower() for w in n.split('-')]
def ucfirst(w):
return w[0].upper() + w[1:]
if initialCap:
return ''.join(map(ucfirst, words))
else:
return words[0] + ''.join(map(ucfirst, words[1:]))
def findComponents(lines, container, components=[]):
"""return a list of (name, props, subcomponents) and remaining lines
# @@TODO: make this a generator
"""
props = []
subs = []
while 1:
try:
n, p, v = parseLine(next(lines), downcase=False)
except StopIteration:
break
#print "finding...", n, p, v
#print >>sys.stderr, "began", container, n, p, v
if n == 'END':
# @@hmm... found an extra space after END:DAYLIGHT
# in test/20030410querymtg.ics
# where did that come from? allow it, or fix test data?
v = v.rstrip().upper()
if v != container:
raise ValueError('expected "%s" but found "%s"' % \
(container, v))
components.append((container, props, subs))
return
elif n == 'BEGIN':
findComponents(lines, v, subs)
else:
props.append((n, p, v))
def _test():
import sys
from pprint import pprint
import doctest, fromIcal
doctest.testmod(fromIcal)
lines = unbreak(open(sys.argv[1]))
n, p, v = parseLine(next(lines))
c = []
findComponents(lines, v, c)
pprint(c)
#unittest.main()
if __name__ == '__main__':
import sys
if sys.argv[1] == '--test':
del sys.argv[1]
_test()
else:
main()
# $Log: fromIcal.py,v $
# Revision 2.37 2023/12/31 16:23:15 timbl
# chek for escaping of semicolons
#
# Revision 2.36 2023/12/30 16:19:46 timbl
# remove extra UTF-8
#
# Revision 2.35 2023/12/29 17:19:09 timbl
# get rid of tabs
#
# Revision 2.34 2012/08/29 17:58:44 timbl
# Seems XSLT file defined thge NS as being icalSpec not ical
#
# Revision 2.33 2007/02/20 14:58:45 timbl
# explain what we found if not CALENDAR
#
# Revision 2.32 2006/04/21 16:46:46 connolly
# demote unexpected param on text to warning, to deal with x2v's charset hack
#
# Revision 2.31 2006/04/11 20:29:00 connolly
# finished factoring out icslex stuff: unbreak, parseLine
# findComponents is now more straightforwardly recursive
#
# Revision 2.30 2006/04/09 06:02:39 connolly
# changeset: 7:5f8c551b2de38fb115789dfe7cbca0288a978f61
# tag: tip
# user: Dan Connolly
# date: Sun Apr 9 01:01:32 2006 -0500
# files: icslex.py
# description:
# add bymonthday to recurlex
#
#
# changeset: 6:32c567b22753c64f71c8de298adb87bad91ef567
# user: Dan Connolly
# date: Sun Apr 9 00:54:59 2006 -0500
# files: icsxml.py
# description:
# use utf-8 to read files; kludge a couple more fields that the template assumes
#
#
# changeset: 5:12370cd5ad97cd5cea04e7ed4d5f6b55c0ac39ff
# user: Dan Connolly
# date: Sun Apr 9 00:54:13 2006 -0500
# files: icslex.py
# description:
# make interval explict; use utf-8 to read files
#
#
# changeset: 4:0f319182ea4d6ee8a8b7f2ef042683323b75658d
# user: Dan Connolly
# date: Sun Apr 9 00:37:07 2006 -0500
# files: icsxml.py
# description:
# works in one case, with a couple kludges
#
#
# changeset: 3:3e542292c8040d0dab310748ef07ffbce0a15b4a
# user: Dan Connolly
# date: Sun Apr 9 00:36:43 2006 -0500
# files: icslex.py
# description:
# date, recur lex details
#
#
# changeset: 2:c2881393d0156b9263d760e98953ece6ba7591a6
# user: Dan Connolly
# date: Sun Apr 9 00:01:33 2006 -0500
# files: icslex.py
# description:
# - parsing collections of properties as a dict/JSON object works
# - names are downcased by default
# - formatted docs per rst/epydoc
#
#
# changeset: 1:ecc1ad118fc61abb55e9634d15921483134f3328
# user: Dan Connolly
# date: Sat Apr 8 22:06:28 2006 -0500
# files: icslex.py
# description:
# unbreak works
#
#
# changeset: 0:ec6eb270779b1ae046b9dd04be92034375392722
# user: Dan Connolly
# date: Sat Apr 8 21:50:45 2006 -0500
# files: icslex.py
# description:
# parseLine tests pass
#
# Revision 2.29 2005/11/09 23:10:48 connolly
# - changed the way duration values are modelled
# The iCalendar DURATION value type is actually more than just a
# XMLSchema.duration; it also has a RELATED parameter.
# So for
# TRIGGER;VALUE=DURATION;RELATED=START:-PT15M
# we'll write
# { ?E cal:trigger [ rdf:value "-PT15M"^^xsdt:duration;
# cal:related "START"] }
#
# - fixed test data to have rdf:datatype on integer
# values, to match the schema (which matches the RFC)
#
# - fixed schema to show DATE-TIME properties q+, ...)
# as DatatypeProperties
# (there are little/no tests for PERIOD; beware)
#
# - scraped more details about property parameters (e.g. partstat, cn,
# cutype, ...) and rrule parts (freq, interval, ...) from the RFC so
# that they show up as links in the hypertext version and as RDF
# properties in the schema. likewise timezone components (standard,
# daylight)
# - side effect: added some whitespace in rfc2445.html
#
# - demoted x- properties
# - removed x- properties from .rdf versions of test data
# this allows the round-trip tests to pass
# - fromIcal.py doesn't output them unless you give the --x option
#
# - added Makefile support for consistency checking with pellet
#
# - demoted blank line diagnostic in fromIcal.py to a comment
#
# - silenced some left-over debug diagnostics in slurpIcalSpec.py
#
# - fixed test/test-created.rdf; added it to fromIcalTest.py list
#
# Revision 2.28 2005/09/08 00:43:49 connolly
# avoid double hashes in ID
#
# Revision 2.27 2005/04/22 14:16:56 connolly
# fix problems found when converting all the timezone files
# in evolution-data-server_1.0.4-1_i386.deb:
# - handle RDATE
# - handle multiple OlsonPfxs
#
# Revision 2.26 2005/04/04 21:17:14 connolly
# fix initialization of iCalendar namespace
#
# Revision 2.25 2005/03/30 15:35:21 connolly
# new namespace for timezones-as-datatypes design: icaltzd
#
# Revision 2.24 2005/02/26 03:20:47 connolly
# fromIcal.py
# - revert the uid: trick; back to uids as fragids
# - timezones as datatypes in dates, dateTimes
# - Valarm supported in Vtodo as well as Vevent
# (@@need test smaller than MozMulipleVcalendars.ics)
# - re-indented Vtodo decls while I was at it
# - case-fold END:xyz
#
# fromIcalTest.py
# - base in http space
# - new tag-bug case
#
# test/*.rdf
# - base in http space
# - timezones as datatypes
#
# test/cal-regression.n3
# - moved tests that don't use X- first
# - got rid of initRDF
#
# test/cal-retest.py
# - replace ical2rdf.pl with fromIcal.py
# - base in http space
#
# test/cal-spec-examples.n3 new
#
# test/graphCompare.n3 oops; extra debug crud
#
# Revision 2.23 2005/02/10 21:39:00 timbl
# COUNT, LANGUAGE, X-UID, QUOTED-PRINTABLE under DanC's supervision
#
# Revision 2.22 2005/02/02 21:54:45 timbl
# added --noalarm option - kindofa hack - take 2
#
# Revision 2.21 2005/02/02 21:51:46 timbl
# added --noalarm option - kindofa hack
#
# Revision 2.20 2005/02/02 21:39:20 timbl
# sync
#
# Revision 2.19 2005/02/01 15:29:43 timbl
# hack to CREATED to add default type DATE-TIME.
#
# Revision 2.18 2005/02/01 15:26:53 timbl
# pre hack to CREATED default type.
#
# Revision 2.17 2005/01/28 04:07:49 timbl
# Event URIs now absolute. Added --noprotocol and --help options
#
# Revision 2.16 2004/09/30 14:16:01 connolly
# parseLine was buggy in the case of ; in values
#
# Revision 2.15 2004/04/14 21:31:26 connolly
# added --base support so we can test with fragids
#
# Revision 2.14 2004/04/14 21:12:13 connolly
#
# - revamped doDateTime: use datatypes for dateTime values
# - added __getattr__ to Namespace class
# - make well-known tzids into URIs in 2002/12/cal space
# - make UID into fragid
# - make local tzid into fragid
#
# Revision 2.13 2004/04/08 14:09:11 connolly
# priority on VEVENT fixed
#
# Revision 2.12 2004/04/07 18:27:17 connolly
# use real datatypes for list of floats, i.e. geo
#
# Revision 2.11 2004/04/07 18:10:22 connolly
# convert list of float, as in GEO
#
# Revision 2.10 2004/03/25 04:00:59 connolly
# allow recurrenceId wherever rrule can go
# handle WKST in recur values
#
# Revision 2.9 2004/03/25 03:45:09 connolly
# handle UNTIL in rrule
# added EXDATE to compDecls wherever RRULE occurs
#
# Revision 2.7 2004/03/23 14:59:28 connolly
# allow missing PRODID
#
# Revision 2.6 2004/03/10 21:59:31 connolly
# calendar schema is now generated from the RFC
#
# Revision 2.5 2004/02/29 14:52:11 connolly
# todo support in fromIcal; value type label in schema
#
# Revision 2.4 2004/02/12 07:17:05 connolly
# - handle URI value type
# - a few more default value type declarations
#
# Revision 2.3 2004/02/12 06:30:48 connolly
# - doText unescapes text values per rfc2445#sec4.3.11
# - LAST-MODIFIED applies to VEVENT (fixed typo in RFC)
# - default type added for COMMENT
# - disabled UID->fragid conversion cuz it interferes with graph comparison
# - handle DIR parameter on CAL-ADDRESS value type
#
# Revision 2.2 2004/02/11 22:04:10 connolly
# slightly nicer XML writer
#
# Revision 2.1 2004/02/11 16:40:23 connolly
# finish renaming icalWebize.py to fromIcal.py
#
# Revision 2.0 2004/02/11 16:37:48 connolly
# copied from icalWebize.py
#
# Revision 1.11 2004/02/09 23:09:11 connolly
# - more refined X- property handling
# - generate fragids for uid, TZID
# - fixed utf-8 reading (thx MK for move test data!)
# - noted RDF API, name change TODOs per discussion with timbl
# - allow blank lines, space after names, (warn about possible confict with RFC)
# - pass over VALUE parameter in doText
#
# Revision 1.10 2004/02/08 19:14:39 connolly
# handles all of cal01.ics, though not quite the same way
# that ical2rdf.pl does
# - elaborated comment about declarations and rfc2445-formal
# - elaborated @@symbol comment
# - fixed tzoffsetfrom decl bug
# - handle X- properties
# - handle DATE value type
# - changed calAddress to use, rather than mention, ala ical2rdf.pl
# - handle (and test) Z on dateTimes
# - more uniform handling of case insensitivity (still not tested)
#
# Revision 1.9 2004/02/08 05:17:56 connolly
# working except X- props
# symbols are still text; not (yet?) interpreted as URIs
# treating Valarm as a class; leading toward changing it this way
#
# Revision 1.8 2004/02/08 04:28:43 connolly
# removed i: prefix
# enough property declarations with value types for cal01.ics
# added elements and parseType to fix striping
# factored out text property code
#
# Revision 1.7 2004/02/08 00:10:00 connolly
# data-driven processing of components
#
# Revision 1.6 2004/02/07 06:28:35 connolly
# note a bug with unescaping TEXT values
#
# Revision 1.5 2004/02/07 03:40:17 connolly
# note intention to use generators
# rearrange module docstring
#
# Revision 1.4 2004/02/06 07:41:41 connolly
# syntax bug in one of the "not implemented" warnings
#
# Revision 1.3 2004/02/06 07:13:24 connolly
# started support for VEVENT
# - TODO: reformat dates
# - non-text properties; e.g. organizer
# moved main() before Namespace class
#
# Revision 1.2 2004/02/06 06:56:38 connolly
# lines are arranged into components
# calendar object and its properties are written as RDF
#
# Revision 1.1 2004/02/06 05:22:11 connolly
# parses lines into name, params, value
#