Skip to content

Commit

Permalink
import pinyin4j and sparta-xml source code
Browse files Browse the repository at this point in the history
  • Loading branch information
belerweb committed Apr 22, 2013
1 parent 0d43721 commit 83da65e
Show file tree
Hide file tree
Showing 72 changed files with 37,663 additions and 0 deletions.
9 changes: 9 additions & 0 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,15 @@
<maven.compiler.target>${maven.compiler.source}</maven.compiler.target>
</properties>

<dependencies>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.11</version>
<scope>test</scope>
</dependency>
</dependencies>

<build>
<plugins>
<plugin>
Expand Down
151 changes: 151 additions & 0 deletions src/main/java/com/hp/hpl/sparta/BuildDocument.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,151 @@
package com.hp.hpl.sparta;

/** This class returns the DOM Document created by parsing XML.
<blockquote><small> Copyright (C) 2002 Hewlett-Packard Company.
This file is part of Sparta, an XML Parser, DOM, and XPath library.
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public License
as published by the Free Software Foundation; either version 2.1 of
the License, or (at your option) any later version. This library
is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE.</small></blockquote>
@see <a "href="doc-files/LGPL.txt">GNU Lesser General Public License</a>
@version $Date: 2003/05/12 19:49:29 $ $Revision: 1.3 $
@author Sergio Marti
*/

class BuildDocument implements DocumentSource, ParseHandler {

public BuildDocument() {
this(null);
}

public BuildDocument(ParseLog log) {
log_ = (log == null) ? DEFAULT_LOG : log;
}

public void setParseSource(ParseSource ps) {
parseSource_ = ps;
doc_.setSystemId(ps.toString());
}

public ParseSource getParseSource() {
return parseSource_;
}

public String toString() {
if (parseSource_ != null)
return "BuildDoc: " + parseSource_.toString();
else
return null;
}

public String getSystemId() {
if (parseSource_ != null)
return parseSource_.getSystemId();
else
return null;
}

public int getLineNumber() {
if (parseSource_ != null)
return parseSource_.getLineNumber();
else
return -1;
}

/** The parsed document. */
public Document getDocument() {
return doc_;
}

public void startDocument() {}

public void endDocument() {
/* DEBUG
if (currentElement_ != null)
log_.warning("EndDocument: currentElement is not null",
getSystemId(), getLineNumber());
*/
}

public void startElement(Element element) {
if (currentElement_ == null) {
doc_.setDocumentElement(element);
} else {
currentElement_.appendChild(element);
}
currentElement_ = element;
}

public void endElement(Element element) {
/* DEBUG
if (isCENull())
return;
if (element != currentElement_) {
log_.warning("EndElement (" + element.getTagName() +
") does not match currentElement (" +
currentElement_.getTagName() + ")", getSystemId(),
getLineNumber());
return;
}
*/

currentElement_ = (Element) currentElement_.getParentNode();
}

public void characters(char[] buf, int offset, int len) {
/* DEBUG
if (isCENull())
return;
*/

Element element = currentElement_;
if (element.getLastChild() instanceof Text) {
Text text = (Text) element.getLastChild();
text.appendData(buf, offset, len);
} else {
Text text = new Text(new String(buf, offset, len));
element.appendChildNoChecking(text);
}
}


private final ParseLog log_;

private Element currentElement_ = null;
private final Document doc_ = new Document();
private ParseSource parseSource_ = null;
}

// $Log: BuildDocument.java,v $
// Revision 1.3 2003/05/12 19:49:29 eobrain
// Remove unused method.
//
// Revision 1.2 2002/10/30 16:40:27 eobrain
// appendChild no longer throws DOMException
//
// Revision 1.1.1.1 2002/08/19 05:04:02 eobrain
// import from HP Labs internal CVS
//
// Revision 1.6 2002/08/18 04:30:54 eob
// Make class package-private so as not to clutter up the javadoc.
//
// Revision 1.5 2002/08/17 19:04:36 eob
// Add copyright and other formatting and commenting in preparation for
// release to SourceForge.
//
// Revision 1.4 2002/08/15 22:51:37 eob
// Sparta node constructors no longer needs document
//
// Revision 1.3 2002/08/05 20:04:31 sermarti
//
// Revision 1.2 2002/08/01 23:29:17 sermarti
// Much faster Sparta parsing.
// Has debug features enabled by default. Currently toggled
// in ParseCharStream.java and recompiled.
//
// Revision 1.1 2002/07/25 21:10:15 sermarti
// Adding files that mysteriously weren't added from Sparta before.
96 changes: 96 additions & 0 deletions src/main/java/com/hp/hpl/sparta/CharCircBuffer.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
package com.hp.hpl.sparta;

/**
* Circular character buffer used to store parsing history for debug
* purposes.
<blockquote><small> Copyright (C) 2002 Hewlett-Packard Company.
This file is part of Sparta, an XML Parser, DOM, and XPath library.
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public License
as published by the Free Software Foundation; either version 2.1 of
the License, or (at your option) any later version. This library
is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE.</small></blockquote>
@see <a "href="doc-files/LGPL.txt">GNU Lesser General Public License</a>
@version $Date: 2003/07/17 21:36:29 $ $Revision: 1.2 $
@author Sergio Marti
*/


class CharCircBuffer {

CharCircBuffer(int n) {
buf_ = new int[n];
}

void enable() {
enabled_ = true;
}

void disable() {
enabled_ = false;
}

void addInt(int i) {
addRaw(i + 0x10000);
}

void addChar(char ch) {
addRaw(ch);
}

private void addRaw(int v) {
if (enabled_) {
buf_[next_] = v;
next_ = (next_ + 1) % buf_.length;
++total_;
}
}

void addString(String s) {
char[] chars = s.toCharArray();
int slen = chars.length;
for (int i = 0; i < slen; ++i)
addChar(chars[i]);
}

public String toString() {
StringBuffer result = new StringBuffer(11 * buf_.length / 10);
int first_ = total_ < buf_.length ? buf_.length - total_ : 0;
for (int i = first_; i < buf_.length; ++i) {
int ii = (i + next_) % buf_.length;
int v = buf_[ii];
if (v < 0x10000)
result.append((char) v);
else
result.append(Integer.toString(v - 0x10000));
}
return result.toString();
}

private final int[] buf_; //Stores either the chars or the integers+0x10000
private int next_ = 0;
private int total_ = 0;
private boolean enabled_ = true;
}


// $Log: CharCircBuffer.java,v $
// Revision 1.2 2003/07/17 21:36:29 eobrain
// Use integer arithmetic instead of floating-point arithmetic which is
// not supported in the J2ME we were using on a Nokia phone.
//
// Revision 1.1.1.1 2002/08/19 05:04:02 eobrain
// import from HP Labs internal CVS
//
// Revision 1.2 2002/08/18 04:31:45 eob
// Add copyright and other formatting and commenting in preparation for
// release to SourceForge.
//
// Revision 1.1 2002/08/01 23:29:17 sermarti
// Much faster Sparta parsing.
// Has debug features enabled by default. Currently toggled
// in ParseCharStream.java and recompiled.
53 changes: 53 additions & 0 deletions src/main/java/com/hp/hpl/sparta/DOMException.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
package com.hp.hpl.sparta;

/**
* Thrown when problem constructing the DOM.
<blockquote><small> Copyright (C) 2002 Hewlett-Packard Company.
This file is part of Sparta, an XML Parser, DOM, and XPath library.
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public License
as published by the Free Software Foundation; either version 2.1 of
the License, or (at your option) any later version. This library
is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE.</small></blockquote>
@see <a "href="doc-files/LGPL.txt">GNU Lesser General Public License</a>
@version $Date: 2002/08/19 05:04:01 $ $Revision: 1.1.1.1 $
@see org.w3c.dom.DOMException
@author Eamonn O'Brien-Strain
*/
public class DOMException extends Exception {
public DOMException(short code, String message) {
super(message);
this.code = code;
}

public short code;
//public static final short INDEX_SIZE_ERR = 1;
public static final short DOMSTRING_SIZE_ERR = 2;
public static final short HIERARCHY_REQUEST_ERR = 3;
//public static final short WRONG_DOCUMENT_ERR = 4;
//public static final short INVALID_CHARACTER_ERR = 5;
//public static final short NO_DATA_ALLOWED_ERR = 6;
//public static final short NO_MODIFICATION_ALLOWED_ERR = 7;
public static final short NOT_FOUND_ERR = 8;
//public static final short NOT_SUPPORTED_ERR = 9;
//public static final short INUSE_ATTRIBUTE_ERR = 10;

}

// $Log: DOMException.java,v $
// Revision 1.1.1.1 2002/08/19 05:04:01 eobrain
// import from HP Labs internal CVS
//
// Revision 1.3 2002/08/18 04:32:20 eob
// Add copyright and other formatting and commenting in preparation for
// release to SourceForge.
//
// Revision 1.2 2002/05/23 21:02:12 eob
// Add DOMSTRING_SIZE_ERR code.
//
// Revision 1.1 2002/01/04 18:28:11 eob
// initial
58 changes: 58 additions & 0 deletions src/main/java/com/hp/hpl/sparta/DefaultParseHandler.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
package com.hp.hpl.sparta;

/** This class provides a default ParseHandler that does nothing.
Users should subclass it and overload the necessary methods.
<blockquote><small> Copyright (C) 2002 Hewlett-Packard Company.
This file is part of Sparta, an XML Parser, DOM, and XPath library.
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public License
as published by the Free Software Foundation; either version 2.1 of
the License, or (at your option) any later version. This library
is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE.</small></blockquote>
@see <a "href="doc-files/LGPL.txt">GNU Lesser General Public License</a>
@version $Date: 2002/08/19 05:04:01 $ $Revision: 1.1.1.1 $
@author Sergio Marti
*/
public class DefaultParseHandler implements ParseHandler {

private ParseSource parseSource_ = null;

public void setParseSource(ParseSource ps) {
parseSource_ = ps;
}

public ParseSource getParseSource() {
return parseSource_;
}

public void startDocument() throws ParseException {}


public void endDocument() throws ParseException {}

public void startElement(Element element) throws ParseException {}

public void endElement(Element element) throws ParseException {}

public void characters(char[] buf, int off, int len) throws ParseException {}

}

// $Log: DefaultParseHandler.java,v $
// Revision 1.1.1.1 2002/08/19 05:04:01 eobrain
// import from HP Labs internal CVS
//
// Revision 1.3 2002/08/18 04:32:40 eob
// Add copyright and other formatting and commenting in preparation for
// release to SourceForge.
//
// Revision 1.2 2002/08/17 00:54:14 sermarti
//
// Revision 1.1 2002/08/01 23:29:17 sermarti
// Much faster Sparta parsing.
// Has debug features enabled by default. Currently toggled
// in ParseCharStream.java and recompiled.
Loading

0 comments on commit 83da65e

Please sign in to comment.