java读取excel:JAVA读取WORD,EXCEL,POWERPOINT,PDF文件的方法及代码来源: 发布时间:星期四, 2009年2月12日 浏览:318次 评论:0
OFFICE文档使用POIControl控件 ![]() ![]() ![]() ![]() ![]() ![]() ![]() WORD: import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.poi.hwpf.extractor.WordExtractor; import java.io.File; import java.io.InputStream; import java.io.FileInputStream; import com.search.code.Index; public Document getDocument(Index index, String url, String title, InputStream is) throws DocCenterException { String bodyText = null; try { WordExtractor ex = ![]() ![]() bodyText = ex.getText ![]() ![]() index.AddIndex(url, title, bodyText); } }catch (DocCenterException e) { throw ![]() }catch(Exception e){ e.pr ![]() ![]() } } ![]() } Excel: import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.poi.hwpf.extractor.WordExtractor; import org.apache.poi.hssf.usermodel.HSSFWorkbook; import org.apache.poi.hssf.usermodel.HSSFSheet; import org.apache.poi.hssf.usermodel.HSSFRow; import org.apache.poi.hssf.usermodel.HSSFCell; import java.io.File; import java.io.InputStream; import java.io.FileInputStream; import com.search.code.Index; public Document getDocument(Index index, String url, String title, InputStream is) throws DocCenterException { StringBuffer content = ![]() ![]() try{ HSSFWorkbook workbook = ![]() ![]() for ( ![]() ![]() ![]() ![]() HSSFSheet aSheet = workbook.getSheetAt(numSheets);//获得 ![]() for ( ![]() ![]() ![]() ![]() HSSFRow aRow = aSheet.getRow(rowNumOfSheet); //获得 ![]() for ( ![]() ![]() ![]() ![]() HSSFCell aCell = aRow.getCell(cellNumOfRow);//获得列值 content.append(aCell.getStringCellValue ![]() } } } } } } ![]() index.AddIndex(url, title, content.toString ![]() } }catch (DocCenterException e) { throw ![]() }catch(Exception e) { ![]() ![]() ![]() } ![]() } PowerPo ![]() import java.io.InputStream; import org.apache.lucene.document.Document; import org.apache.poi.hslf.HSLFSlideShow; import org.apache.poi.hslf.model.TextRun; import org.apache.poi.hslf.model.Slide; import org.apache.poi.hslf.usermodel.SlideShow; public Document getDocument(Index index, String url, String title, InputStream is) throws DocCenterException { StringBuffer content = ![]() try{ SlideShow ss = ![]() ![]() ![]() ![]() Slide ![]() ![]() ![]() for( ![]() ![]() TextRun ![]() ![]() ![]() ![]() for( ![]() ![]() content.append(t[j].getText ![]() } content.append(slides[i].getTitle ![]() } index.AddIndex(url, title, content.toString ![]() }catch(Exception ex){ ![]() ![]() ![]() } ![]() } PDF: import java.io.InputStream; import java.io.IOException; import org.apache.lucene.document.Document; import org.pdfbox.cos.COSDocument; import org.pdfbox.pdfparser.PDFParser; import org.pdfbox.pdmodel.PDDocument; import org.pdfbox.pdmodel.PDDocumentInformation; import org.pdfbox.util.PDFTextStripper; import com.search.code.Index; public Document getDocument(Index index, String url, String title, InputStream is)throws DocCenterException { COSDocument cosDoc = null; try { cosDoc = parseDocument(is); } catch (IOException e) { closeCOSDocument(cosDoc); throw ![]() } ![]() ![]() ![]() closeCOSDocument(cosDoc); throw ![]() ![]() } String docText = null; try { PDFTextStripper stripper = ![]() ![]() docText = stripper.getText( ![]() } catch (IOException e) { closeCOSDocument(cosDoc); throw ![]() } PDDocument pdDoc = null; try { pdDoc = ![]() PDDocumentInformation docInfo = pdDoc.getDocumentInformation ![]() ![]() ![]() ![]() title = docInfo.getTitle ![]() } } catch (Exception e) { closeCOSDocument(cosDoc); closePDDocument(pdDoc); ![]() ![]() ![]() ![]() } finally { closeCOSDocument(cosDoc); closePDDocument(pdDoc); } ![]() } private ![]() PDFParser parser = ![]() parser.parse ![]() ![]() ![]() } private void closeCOSDocument(COSDocument cosDoc) { ![]() try { cosDoc.close ![]() } catch (IOException e) { } } } private void closePDDocument(PDDocument pdDoc) { ![]() try { pdDoc.close ![]() } catch (IOException e) { } } } 0
相关文章读者评论发表评论 |