⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 avaliaprecisionrecall.java

📁 Calculate TFIDF using MySQL and Lucene on Cystic Fibrosis
💻 JAVA
字号:
package br.ufrj.cos.bri.controller;

import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.util.ArrayList;
import java.util.InputMismatchException;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Scanner;

import javax.persistence.EntityManager;
import javax.persistence.EntityManagerFactory;
import javax.persistence.EntityTransaction;
import javax.persistence.Persistence;

import org.apache.commons.collections.map.HashedMap;
import org.apache.lucene.queryParser.ParseException;
import org.jdom.Document;
import org.jdom.Element;
import org.jdom.JDOMException;
import org.jdom.input.SAXBuilder;

import br.ufrj.cos.bri.model.Author;
import br.ufrj.cos.bri.model.Avaliador;
import br.ufrj.cos.bri.model.Citations;
import br.ufrj.cos.bri.model.Documento;
import br.ufrj.cos.bri.model.ItemScore;
import br.ufrj.cos.bri.model.MajorSubtopic;
import br.ufrj.cos.bri.model.MinorSubtopic;
import br.ufrj.cos.bri.model.Pergunta;
import br.ufrj.cos.bri.model.Referencia;


public class AvaliaPrecisionRecall {
	private String[] arquivosDados;
	private String arquivoQuery;
	
	private int realizaParserDados(File file, EntityManager em) throws JDOMException, IOException {
		SAXBuilder sb = new SAXBuilder(true);
		Document d = sb.build(file);
		Element root = (Element) d.getRootElement();
		List children = root.getChildren();
		
		Iterator<Element> it = children.iterator();
		
		int acumulador = 0;
		while (it.hasNext()) {
			StringBuilder stringBuilderTitleAbstract = new StringBuilder();
			StringBuilder stringBuilderFullText = new StringBuilder();
			stringBuilderFullText.append("<RECORD>");
			
			Element e = (Element) it.next();
			
			Documento documentModel = new Documento();
			
			String paperNum = e.getChildText("PAPERNUM");
			stringBuilderFullText.append("<PAPERNUM>"+ paperNum + "</PAPERNUM>");
			
			documentModel.setNumeroPaper(paperNum);
			
			String title = e.getChildText("TITLE");
			stringBuilderFullText.append("<TITLE>" + title + "</TITLE>");
			
			documentModel.setTitulo(title);
			String conteudoAbstractExtract = e.getChildText("ABSTRACT");
			if (conteudoAbstractExtract != null) {
				stringBuilderFullText.append("<ABSTRACT>" + conteudoAbstractExtract + "</ABTRACT>");
				documentModel.setTextoAbstract(conteudoAbstractExtract);
			} else {
				conteudoAbstractExtract = e.getChildText("EXTRACT");
				stringBuilderFullText.append("<EXTRACT>" + conteudoAbstractExtract + "</EXTRACT>");
				documentModel.setTextoAbstract(conteudoAbstractExtract);
			}
			stringBuilderTitleAbstract.append(title + " " + conteudoAbstractExtract);
			documentModel.setTituloAbstract(stringBuilderTitleAbstract.toString());
			
			String medLineNum = e.getChildText("MEDLINENUM");
			stringBuilderFullText.append("<MEDLINENUM>" + medLineNum + "</MEDLINENUM>");
			
			String recordNum = e.getChildText("RECORDNUM");
			stringBuilderFullText.append("<RECORDNUM>" + recordNum + "</RECORDNUM>");
			
			documentModel.setNumeroMedline(medLineNum);
			documentModel.setNumeroRegistro(recordNum);
			
			String source = e.getChildText("SOURCE");
			stringBuilderFullText.append("<SOURCE>" + source + "</SOURCE>");
			
			documentModel.setFonte(source);
			
			Element elementAuthors = e.getChild("AUTHORS");
			if (elementAuthors != null) {
				stringBuilderFullText.append("<AUTHORS>");
				Map<String, Author> autores = new HashedMap();
				
				List list = elementAuthors.getChildren();
				Iterator iterator = list.iterator();
				
				while (iterator.hasNext()) {
					Element a = (Element) iterator.next();
					String nameAuthor = a.getText();
					Author author = new Author();
					author.setName(nameAuthor);
					autores.put(nameAuthor, author);
					
					stringBuilderFullText.append("<AUTHOR>" + nameAuthor + "</AUTHOR>");
				}
				stringBuilderFullText.append("</AUTHORS>");
				documentModel.setAutores(autores);
			}
			
			Element elementMajorSubj = e.getChild("MAJORSUBJ");
			if (elementMajorSubj != null) {
				stringBuilderFullText.append("<MAJORSUBJ>");
				List<MajorSubtopic> listMajorSubj = new ArrayList<MajorSubtopic>();
				
				List list = elementMajorSubj.getChildren();
				Iterator iterator = list.iterator();
				
				while (iterator.hasNext()) {
					Element a = (Element) iterator.next();
					String majorSubj = a.getText();
					MajorSubtopic topic = new MajorSubtopic();
					topic.setTopic(majorSubj);
					listMajorSubj.add(topic);
					stringBuilderFullText.append("<TOPIC>" + majorSubj + "</TOPIC>");
				}
				stringBuilderFullText.append("</MAJORSUBJ>");
				documentModel.setMajorSubtopic(listMajorSubj);
			} 
			
			Element elementMinorSubj = e.getChild("MINORSUBJ");
			if (elementMinorSubj != null) {
				stringBuilderFullText.append("<MINORSUBJ>");
				List<MinorSubtopic> listMinorSubj = new ArrayList<MinorSubtopic>();
				
				List list = elementMinorSubj.getChildren();
				Iterator iterator = list.iterator();
				
				while (iterator.hasNext()) {
					Element a = (Element) iterator.next();
					String minorSubj = a.getText();
					MinorSubtopic topic = new MinorSubtopic();
					topic.setTopic(minorSubj);
					listMinorSubj.add(topic);
					stringBuilderFullText.append("<TOPIC>" + minorSubj + "</TOPIC>");
				}
				stringBuilderFullText.append("</MINORSUBJ>");
				documentModel.setMinorSubtopic(listMinorSubj);
			} 
			
			Element elementReferences = e.getChild("REFERENCES");
			if (elementReferences != null) {
				stringBuilderFullText.append("<REFERENCES>");
				List<Referencia> listRefences = new ArrayList<Referencia>();
				
				List list = elementReferences.getChildren();
				Iterator iterator = list.iterator();
				
				while (iterator.hasNext()) {
					stringBuilderFullText.append("<CITE ");
					Element a = (Element) iterator.next();
					Referencia cite = new Referencia();
					
					String num = a.getAttribute("num").getValue();
					String author = a.getAttribute("author").getValue();
					String publication = a.getAttribute("publication").getValue();
					String d1 = a.getAttribute("d1").getValue();
					String d2 = a.getAttribute("d2").getValue();
					String d3 = a.getAttribute("d3").getValue();
					
					stringBuilderFullText.append("num='" + num + "' ");
					stringBuilderFullText.append("author='" + author + "' ");
					stringBuilderFullText.append("publication='" + publication + "'" );
					stringBuilderFullText.append("d1='" + d1 + "' ");
					stringBuilderFullText.append("d2='" + d2 + "' ");
					stringBuilderFullText.append("d3='" + d3 + "'/>");
					
					cite.setAutor(author);
					cite.setD1(d1);
					cite.setD2(d2);
					cite.setD3(d3);
					cite.setNumero(num);
					cite.setPublicacao(publication);
					listRefences.add(cite);
					
				}
				stringBuilderFullText.append("</REFERENCES>");
				documentModel.setReferences(listRefences);
			}
			
			Element elementCitations = e.getChild("CITATIONS");
			if (elementCitations != null) {
				stringBuilderFullText.append("<CITATIONS>");
				List<Citations> listCitations = new ArrayList<Citations>();
				
				List list = elementCitations.getChildren();
				Iterator iterator = list.iterator();
				
				while (iterator.hasNext()) {
					stringBuilderFullText.append("<CITE ");
					Element a = (Element) iterator.next();
					Citations cite = new Citations();
					
					String num = a.getAttribute("num").getValue();
					String author = a.getAttribute("author").getValue();
					String publication = a.getAttribute("publication").getValue();
					String d1 = a.getAttribute("d1").getValue();
					String d2 = a.getAttribute("d2").getValue();
					String d3 = a.getAttribute("d3").getValue();
					
					stringBuilderFullText.append("num='" + num + "' ");
					stringBuilderFullText.append("author='" + author + "' ");
					stringBuilderFullText.append("publication='" + publication + "'" );
					stringBuilderFullText.append("d1='" + d1 + "' ");
					stringBuilderFullText.append("d2='" + d2 + "' ");
					stringBuilderFullText.append("d3='" + d3 + "'/>");
					
					cite.setAuthor(author);
					cite.setD1(d1);
					cite.setD2(d2);
					cite.setD3(d3);
					cite.setNum(Integer.parseInt(num));
					cite.setPublication(publication);
					listCitations.add(cite);
				}
				stringBuilderFullText.append("</CITATIONS>");
				documentModel.setCitations(listCitations);
			}
			stringBuilderFullText.append("</RECORD>");
			documentModel.setTextoCompleto(stringBuilderFullText.toString());
			
			em.persist(documentModel);
			acumulador++;
		}
		return acumulador;
	}
	
	public AvaliaPrecisionRecall() {
		arquivosDados = new String[] {"cf74.xml", "cf75.xml", "cf76.xml", "cf77.xml", "cf78.xml", "cf79.xml"};
		arquivoQuery = new String("cfquery-corrigido.xml");
	}
	
	public boolean carregaDadosXML(EntityManager em) {
		try {
			EntityTransaction tx = em.getTransaction();
			int numeroTotalArquivos = 0;
			tx.begin();
			for (int i = 0; i < arquivosDados.length; i++) {
				File file = new File(arquivosDados[i]);
				int numeroArquivos = realizaParserDados(file, em);
				System.out.println("O arquivo: " + file.getName() + " cont閙 " + numeroArquivos + " registros em seu conte鷇o");
				numeroTotalArquivos += numeroArquivos; 
			}
			tx.commit();
			System.out.println("O total de registros persistidos 

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -