⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 jdbcdocumentcollection.java

📁 MG4J (Managing Gigabytes for Java) is a free full-text search engine for large document collections
💻 JAVA
📖 第 1 页 / 共 2 页
字号:
	public Document document( final int index ) throws IOException {		final MutableString title = new MutableString();		return factory.getDocument( stream( index, title ), metadata( index, title ) );	}		/** Returns the document associated with a given database identifier.	 * 	 * @param id a database identifier.	 * @return the associated document.	 */	public int id2doc( final int id ) {		return id2doc.get( id );	}		/** Returns the database identifier associated with a given document.	 * 	 * @param doc a document index.	 * @return the associated database identifier.	 */	public int doc2id( final int doc ) {		ensureDocumentIndex( doc );		return doc2id[ doc ];	}	/** Creates metadata with the given title; if the title is not available, it is fetched from the database.	 * 	 * @param index a document index.	 * @param title a suggested title, or <code>null</code>.	 * @return the metadata for the document <code>index</code>.	 */	protected Reference2ObjectMap<Enum<?>,Object> metadata( final int index, CharSequence title ) {		final Reference2ObjectArrayMap<Enum<?>,Object> metadata = new Reference2ObjectArrayMap<Enum<?>,Object>( 2 );		if ( title == null ) {			try {				Connection connection = DriverManager.getConnection( dbUri );				Statement s = connection.createStatement();				ResultSet rs = s.executeQuery( buildQuery( idSpec + "=" + doc2id[ index ] ) );				if ( ! rs.next() ) throw new IllegalStateException( "Id " + doc2id[ index ] + " is no longer in the database" );				title = rs.getString( 2 );			}			catch ( SQLException e ) {				throw new RuntimeException( e );			}		}		metadata.put( MetadataKeys.TITLE, title );		metadata.put( MetadataKeys.URI, Integer.toString( doc2id[ index ] ) );		return metadata;	}	public Reference2ObjectMap<Enum<?>,Object> metadata( final int index ) {		ensureDocumentIndex( index );		return metadata( index, null );	}			public InputStream stream( final int index ) throws IOException {		return stream( index, null );	}	private InputStream getStreamFromResultSet( final ResultSet rs, final MutableString title ) throws SQLException {		final InputStream[] a = new InputStream[ rs.getMetaData().getColumnCount() - 2 ]; // -2 for id and title		for( int i = 0; i < a.length; i++ ) {			a[ i ] = rs.getBinaryStream( i + 3 );			if ( a[ i ] ==  null ) a[ i ] = NullInputStream.getInstance();		}		if ( title != null ) title.replace( rs.getString( 2 ) );		return MultipleInputStream.getStream( a );	}		private InputStream stream( final int index, final MutableString title ) throws IOException {		ensureDocumentIndex( index );		try {			Connection connection = DriverManager.getConnection( dbUri );			Statement s = connection.createStatement();			ResultSet rs = s.executeQuery( buildQuery( idSpec + "=" + doc2id[ index ] ) );			if ( ! rs.next() ) throw new IllegalStateException( "Id " + doc2id[ index ] + " is no longer in the database" );			return getStreamFromResultSet( rs, title );		}		catch ( SQLException e ) {			throw new IOException( e.toString() );		}	}	/** An iterator over the whole collection that performs a single DBMS transaction. */		// ALERT: this is actually VERY inefficient, as metadata() makes a query.	protected class JdbcDocumentIterator extends AbstractDocumentIterator {		private final Connection connection;		private final ResultSet rs;		private final MutableString title = new MutableString();		private int index = 0;		private JdbcDocumentIterator() throws SQLException {			connection = DriverManager.getConnection( dbUri );			Statement s = connection.createStatement();			rs = s.executeQuery( buildQuery( null ) );		}		public Document nextDocument() throws IOException {			try {				if ( ! rs.next() ) return null;				while( rs.getInt( 1 ) < doc2id[ index ] ) rs.next();				if ( rs.getInt( 1 ) > doc2id[ index ] ) throw new IllegalStateException( "Row with id " + doc2id[ index ] + " is missing" );				return factory.getDocument( getStreamFromResultSet( rs, title ), metadata( index++, title ) );			}			catch ( SQLException e ) {				throw new IOException( e.toString() );			}		}		public void close() throws IOException {			super.close();			try {				rs.close();			}			catch ( SQLException e ) {				throw new IOException( e.toString() );			}		}	}		public DocumentIterator iterator() throws IOException {		try {			return new JdbcDocumentIterator();		}		catch ( SQLException e ) {			throw new IOException( e.toString() );		}	}	private void readObject( final ObjectInputStream s ) throws IOException, ClassNotFoundException {		s.defaultReadObject();		initDriver();	}		public static void main( final String[] arg ) throws JSAPException, InvocationTargetException, NoSuchMethodException, IllegalAccessException, IOException, SQLException, ClassNotFoundException, InstantiationException {		SimpleJSAP jsap = new SimpleJSAP( JdbcDocumentCollection.class.getName(), "Saves a serialised document collection based on a set of database rows. The first column of the query is used as an integer id, and the second column for titles. Each remaining column is used to build a segmented input stream, which is passed to a ComposedDocumentFactory made of the specified factories.",				new Parameter[] {					new FlaggedOption( "property", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.NOT_REQUIRED, 'p', "property", "A 'key=value' specification, or the name of a property file" ).setAllowMultipleDeclarations( true ),					new FlaggedOption( "jdbcDriver", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.NOT_REQUIRED, 'd', "driver", "The JDBC driver. You can omit it if it is already loaded." ),					new FlaggedOption( "factory", MG4JClassParser.getParser(), IdentityDocumentFactory.class.getName(), JSAP.NOT_REQUIRED, 'f', "factory", "One document factory for each indexed field." ).setAllowMultipleDeclarations( true ),					new FlaggedOption( "fieldName", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.NOT_REQUIRED, 'n', "field-name", "One field name for each field in the composed document factory. If all specified factories have just one field, the name of the SQL column will be used as a default field name." ).setAllowMultipleDeclarations( true ),					new UnflaggedOption( "collection", JSAP.STRING_PARSER, JSAP.REQUIRED, "The filename for the serialised collection." ),					new UnflaggedOption( "dburi", JSAP.STRING_PARSER, JSAP.REQUIRED, "The JDBC URI defining the database." ),					new UnflaggedOption( "select", JSAP.STRING_PARSER, JSAP.REQUIRED, "A SQL query generating the collection, except for the WHERE part." ),					new FlaggedOption( "idSpec", JSAP.STRING_PARSER, "id", JSAP.NOT_REQUIRED, 'i', "id-spec", "An optional, more precise specification for the id field (the first column)." ),					new FlaggedOption( "where", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.NOT_REQUIRED, 'w', "where", "The the WHERE part (without the WHERE keyword) of the SQL query generating the collection." )								}		);				JSAPResult jsapResult = jsap.parse( arg );		if ( jsap.messagePrinted() ) return;		// We run the query to get meta-information about the columns.		@SuppressWarnings("unused") Class<?> jdbcDriver = Class.forName( jsapResult.getString( "jdbcDriver" ) );		Connection connection = DriverManager.getConnection( jsapResult.getString( "dburi" ) );		Statement s = connection.createStatement();		ResultSet rs = s.executeQuery( "SELECT " + jsapResult.getString( "select" ) );		ResultSetMetaData metaData = rs.getMetaData();				String[] column = new String[ metaData.getColumnCount() - 2 ];		for( int i = 3; i <= metaData.getColumnCount(); i++ ) column[ i - 3 ] = metaData.getColumnName( i );		rs.close();				final DocumentFactory[] factory = new DocumentFactory[ column.length ];		final Class<?>[] factoryClass = jsapResult.getClassArray( "factory" );		final String[] property = jsapResult.getStringArray( "property" );		for( int i = 0; i < factory.length; i++ ) { 			factory[ i ] = PropertyBasedDocumentFactory.getInstance( factoryClass[ Math.min( i, factoryClass.length - 1 ) ], property );			if ( factory[ i ].numberOfFields() > 1 && ! jsapResult.userSpecified( "fieldName" ) ) throw new IllegalArgumentException( "For factories with more than one field you must specify the name of each field of the composed factory" );		}		if ( jsapResult.userSpecified(  "fieldName" ) ) column = jsapResult.getStringArray( "fieldName" );				BinIO.storeObject( new JdbcDocumentCollection( 								jsapResult.getString( "dburi" ),								jsapResult.getString( "jdbcDriver" ), 								jsapResult.getString( "select" ),								jsapResult.getString( "idSpec" ),								jsapResult.getString( "where" ),								CompositeDocumentFactory.getFactory( factory, column )							), jsapResult.getString( "collection" ) );	}}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -