📄 unparser.java
字号:
/*
* (c) Copyright 2000, 2001, 2002, 2002, 2003, 2004, 2005, 2006, 2007 Hewlett-Packard Development Company, LP
* All rights reserved.
* [See end of file]
* $Id: Unparser.java,v 1.42 2007/01/11 11:09:39 jeremy_carroll Exp $
*/
package com.hp.hpl.jena.xmloutput.impl;
/*
* @author Jeremy Carroll
*
* Want todo List - easy efficiency gains in listSubjects() and
* modelListSubjects() by removing those subjects that we have already
* considered.
* - Set Default language during first pass.
*
*
* Notes on ID and BagID: Our preferences are follows: for a Stating with an
* explicit local ID we avoid explicitly constructing the reification, and try
* and use rule 6.12 with an idAttr. If the Stating is anonymous or non-local
* then we construct the reification explicitly.
*
*
* Notes: The following rules are not supported by the current Jena RDF parser:
* 6.8
*
*
* [6.1] RDF ::= ['<rdf:RDF>'] obj* ['</rdf:RDF>'] [6.2] obj ::= description |
* container [6.3] description ::= '<rdf:Description' idAboutAttr? bagIdAttr?
* propAttr* '/>' | '<rdf:Description' idAboutAttr? bagIdAttr? propAttr* '>'
* propertyElt* '</rdf:Description>' | typedNode [6.4] container ::= sequence |
* bag | alternative [6.5] idAboutAttr ::= idAttr | aboutAttr | aboutEachAttr
* [6.6] idAttr ::= ' ID="' IDsymbol '"' [6.7] aboutAttr ::= ' about="'
* URI-reference '"' [6.8] aboutEachAttr ::= ' aboutEach="' URI-reference '"' | '
* aboutEachPrefix="' string '"' [6.9] bagIdAttr ::= ' bagID="' IDsymbol '"'
* [6.10] propAttr ::= typeAttr | propName '="' string '"' (with embedded quotes
* escaped) [6.11] typeAttr ::= ' type="' URI-reference '"' [6.12] propertyElt
* ::= '<' propName idAttr? '>' value '</' propName '>' | '<' propName
* idAttr? parseLiteral '>' literal '</' propName '>' | '<' propName idAttr?
* parseResource '>' propertyElt* '</' propName '>' | '<' propName idRefAttr?
* bagIdAttr? propAttr* '/>'
*
* [daml.1 - 6.12 cont.] | '<' propName idAttr? parseDamlCollection '>' obj* '</'
* propName '>' [daml.2] parseDamlCollection ::= ' parseType="daml:collection"'
*
* [6.13] typedNode ::= '<' typeName idAboutAttr? bagIdAttr? propAttr* '/>' | '<'
* typeName idAboutAttr? bagIdAttr? propAttr* '>' propertyElt* '</' typeName
* '>' [6.14] propName ::= Qname [6.15] typeName ::= Qname [6.16] idRefAttr ::=
* idAttr | resourceAttr [6.17] value ::= obj | string [6.18] resourceAttr ::= '
* resource="' URI-reference '"' [6.19] Qname ::= [ NSprefix ':' ] name [6.20]
* URI-reference ::= string, interpreted per [URI] [6.21] IDsymbol ::= (any
* legal XML name symbol) [6.22] name ::= (any legal XML name symbol) [6.23]
* NSprefix ::= (any legal XML namespace prefix) [6.24] string ::= (any XML
* text, with "<", ">", and "&" escaped) [6.25] sequence ::= '<rdf:Seq'
* idAttr? '>' member* '</rdf:Seq>' | '<rdf:Seq' idAttr? memberAttr* '/>'
* [6.26] bag ::= '<rdf:Bag' idAttr? '>' member* '</rdf:Bag>' | '<rdf:Bag'
* idAttr? memberAttr* '/>' [6.27] alternative ::= '<rdf:Alt' idAttr? '>'
* member+ '</rdf:Alt>' | '<rdf:Alt' idAttr? memberAttr? '/>' [6.28] member
* ::= referencedItem | inlineItem [6.29] referencedItem ::= '<rdf:li'
* resourceAttr '/>' [6.30] inlineItem ::= '<rdf:li' '>' value </rdf:li>' | '<rdf:li'
* parseLiteral '>' literal </rdf:li>' | '<rdf:li' parseResource '>'
* propertyElt* </rdf:li>' [6.31] memberAttr ::= ' rdf:_n="' string '"' (where n
* is an integer) [6.32] parseLiteral ::= ' parseType="Literal"' [6.33]
* parseResource ::= ' parseType="Resource"' [6.34] literal ::= (any well-formed
* XML)
*
*/
import java.io.PrintWriter;
import java.util.*;
import org.apache.commons.logging.*;
import org.apache.xerces.util.XMLChar;
import com.hp.hpl.jena.iri.IRI;
import com.hp.hpl.jena.rdf.model.*;
import com.hp.hpl.jena.rdf.model.impl.*;
import com.hp.hpl.jena.shared.*;
import com.hp.hpl.jena.util.iterator.*;
import com.hp.hpl.jena.vocabulary.*;
/**
* An Unparser will output a model in the abbreviated syntax. *
*
* @version Release='$Name: $' Revision='$Revision: 1.42 $' Date='$Date:
* 2005/07/13 15:33:51 $'
*
*/
class Unparser {
static private Property LI = new PropertyImpl(RDF.getURI(), "li");
static private Property DESCRIPTION = new PropertyImpl(RDF.getURI(),
"Description");
static protected Log logger = LogFactory.getLog(Unparser.class);
/**
* Creates an Unparser for the specified model. The localName is the URI
* (typical URL) intended for the output file. No trailing "#" should be
* used. This will control the use of <I>ID</I> or <I>about</I> or
* <I>resource</I> on various rules.
*
* @param localName
* The intended URI of the output file. No trailing "#".
* @param m
* The model.
* @param w
* The output.
*/
Unparser(Abbreviated parent, String localName, Model m, PrintWriter w) {
setLocalName(localName);
prettyWriter = parent;
out = w;
model = m;
addTypeNameSpaces();
objectTable = new HashMap();
StmtIterator ss = m.listStatements();
try {
while (ss.hasNext()) {
Statement s = ss.nextStatement();
RDFNode rn = s.getObject();
if (rn instanceof Resource) {
increaseObjectCount((Resource) rn);
}
}
} finally {
ss.close();
}
try {
res2statement = new HashMap();
statement2res = new HashMap();
ClosableIterator reified = new MapFilterIterator(new MapFilter() {
public Object accept(Object o) {
Resource r = (Resource) o;
return (r.hasProperty(RDF.subject)
&& r.hasProperty(RDF.object) && r
.hasProperty(RDF.predicate)) ? r : null;
}
}, model.listSubjectsWithProperty(RDF.type, RDF.Statement));
while (reified.hasNext()) {
Resource r = (Resource) reified.next();
try {
/**
* This block of code assumes that really we are dealing
* with a reification. We may, on the contrary, be dealing
* with a random collection of triples that do not make
* sense.
*/
Statement subj = r.getRequiredProperty(RDF.subject);
Statement pred = r.getRequiredProperty(RDF.predicate);
Statement obj = r.getRequiredProperty(RDF.object);
RDFNode nobj = obj.getObject();
Resource rsubj = (Resource) subj.getObject();
Resource rpred = (Resource) pred.getObject();
Property ppred = model.createProperty(rpred.getURI());
Statement statement = model.createStatement(rsubj, ppred,
nobj);
res2statement.put(r, statement);
statement2res.put(statement, r);
} catch (Exception ignored) {
}
}
} finally {
ss.close();
}
}
/**
* Note: must work with uri being null.
*/
private void setLocalName(String uri) {
if (uri == null || uri.equals(""))
localName = "";
else
// try
{
IRI u = BaseXMLWriter.factory.create(uri);
u = u.create("");
localName = u.toString();
}
// catch (MalformedURIException e) {
// throw new BadURIException(uri, e);
// }
}
/**
* Should be called exactly once for each Unparser. Calling it a second time
* will have undesired results.
*/
void write() {
prettyWriter.workOutNamespaces();
wRDF();
/*
* System.out.print("Coverage = "); for (int i=0;i<codeCoverage.length;i++)
* System.out.print(" c[" + i + "] = " + codeCoverage[i]+ ";");
* System.out.println();
*/
}
/**
* Set a list of types of objects that will be expanded at the top-level of
* the file.
*
* @param types
* An array of rdf:Class'es.
*
*/
void setTopLevelTypes(Resource types[]) {
pleasingTypes = types;
pleasingTypeSet = new HashSet(Arrays.asList(types));
}
private String xmlBase;
void setXMLBase(String b) {
xmlBase = b;
}
/*
* THE MORE INTERESTING MEMBER VARIABLES. Note there are others scattered
* throughout the file, but those are only used by one or two methods.
*/
final private static String rdfns = RDF.type.getNameSpace();
final private static Integer one = new Integer(1);
private String localName;
private Map objectTable; // This is a map from Resource to Integer
// which indicates how many times each resource
// occurs as an object of a triple.
private Model model;
private PrintWriter out;
private Set doing = new HashSet(); // Some of the resources that
// are currently being written.
private Set doneSet = new HashSet(); // The triples that have been
// output.
private Set haveReified = new HashSet(); // Those local resources that
// are
// the id's of a reification, used to ensure that anonymous
// resources are made non-anonymous when reified in certain ways.
private Resource pleasingTypes[] = null;
private Set pleasingTypeSet = new HashSet();
final private Abbreviated prettyWriter;
private boolean avoidExplicitReification = true;
// We set this to false as we start giving up on elegance.
// Reification stuff.
Map res2statement;
Map statement2res;
/*
* The top-down recursive descent unparser. The methods starting in w all
* refer to one of the rules of the grammar, which they implement. boolean
* valued rules first check whether they are applicable and return false if
* not. Otherwise they create appropriate output (using recursive descent)
* and return true. Note all necessary checks are made before any output or
* any recursive descent. The void w- methods just implement the rule, which
* typically does not involve any choice.
*/
/*
* [6.1] RDF ::= ['<rdf:RDF>'] obj* ['</rdf:RDF>']
*/
private void wRDF() {
tab();
print("<");
print(prettyWriter.rdfEl("RDF"));
indentPlus();
printNameSpaceDefn();
if (xmlBase != null) {
setLocalName(xmlBase);
tab();
print("xml:base=" + quote(xmlBase));
}
print(">");
wObjStar();
indentMinus();
tab();
print("</");
print(prettyWriter.rdfEl("RDF"));
print(">");
tab();
}
/**
* All subjects get listed, for top level use only.
*/
private void wObjStar() {
Iterator rs = listSubjects();
while (rs.hasNext()) {
Resource r = (Resource) rs.next();
increaseObjectCount(r);
// This forces us to not be anonymous unless
// we are never an object. See isGenuineAnon().
wObj(r, true);
}
closeAllResIterators();
}
/*
* [6.12] propertyElt ::= '<' propName idAttr? '>' value '</' propName '>' | '<'
* propName idAttr? parseLiteral '>' literal '</' propName '>' | '<'
* propName idAttr? parseResource '>' propertyElt* '</' propName '>' | '<'
* propName idRefAttr? bagIdAttr? propAttr* '/>' [daml.1 - 6.12 cont.] | '<'
* propName idAttr? parseDamlCollection '>' obj* '</' propName '>' [daml.2]
* parseDamlCollection ::= ' parseType="daml:collection"'
*
* For daml collections we prefer the special syntax otherwise: We prefer
* choice 4 where possible, except in the case where the statement is
* reified and the object is not anonymous in which case we use one of the
* others (e.g. choice 1). For embedded XML choice 2 is obligatory. For
* untyped, anonymous resource valued items choice 3 is used. Choice 1 is
* the fall back.
*/
private boolean wPropertyElt(WType wt, Property prop, Statement s,
RDFNode val) {
return wPropertyEltCompact(wt, prop, s, val) || // choice 4
wPropertyEltDamlCollection(wt, prop, s, val) || // choice daml.1
wPropertyEltLiteral(wt, prop, s, val) || // choice 2
wPropertyEltResource(wt, prop, s, val) || // choice 3
wPropertyEltDatatype(wt, prop, s, val)
|| wPropertyEltValue(wt, prop, s, val);
// choice 1.
}
/*
* [6.12.4] propertyElt ::= '<' propName idRefAttr? bagIdAttr? propAttr*
* '/>'
*/
private boolean wPropertyEltCompact(WType wt, Property prop, Statement s,
RDFNode val) {
// Conditions
if (!(val instanceof Resource))
return false;
Resource r = (Resource) val;
if (!(allPropsAreAttr(r) || doing.contains(r)))
return false;
// '<' propName '/>' is 6.12.1 rather than 6.12.4
// and it becomes an empty string value.
// Whether this is a mistake or not is debatable.
// We avoid the construction.
if ((!hasProperties(r)) && isGenuineAnon(r))
return false;
// Write out
done(s);
tab();
print("<");
wt.wTypeStart(prop);
indentPlus();
wIdRefAttrOpt(s, r);
if (!doing.contains(r)) {
wPropAttrAll(r);
} else if (isGenuineAnon(r)) {
// ???
error("Genuine anon resource in cycle?");
}
indentMinus();
print("/>");
return true;
}
/*
* [6.12.2] propertyElt ::= '<' propName idAttr? parseLiteral '>' literal '</'
* propName '>'
*/
private boolean wPropertyEltLiteral(WType wt, Property prop, Statement s,
RDFNode r) {
if (prettyWriter.sParseTypeLiteralPropertyElt)
return false;
if (!((r instanceof Literal) && ((Literal) r).isWellFormedXML())) {
return false;
}
// print out.
done(s);
tab();
print("<");
wt.wTypeStart(prop);
wIdAttrReified(s);
maybeNewline();
wParseLiteral();
maybeNewline();
print(">");
print(((Literal) r).getLexicalForm());
print("</");
wt.wTypeEnd(prop);
print(">");
return true;
}
private boolean wPropertyEltDatatype(WType wt, Property prop, Statement s,
RDFNode r) {
if (!((r instanceof Literal) && ((Literal) r).getDatatypeURI() != null)) {
return false;
}
// print out.
done(s);
tab();
print("<");
wt.wTypeStart(prop);
wIdAttrReified(s);
maybeNewline();
wDatatype(((Literal) r).getDatatypeURI());
maybeNewline();
print(">");
print(Util.substituteEntitiesInElementContent(((Literal) r)
.getLexicalForm()));
print("</");
wt.wTypeEnd(prop);
print(">");
return true;
}
/*
* [6.12.3] propertyElt ::= '<' propName idAttr? parseResource '>'
* propertyElt* '</' propName '>'
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -