View Javadoc
1 //============================================================================== 2 // file : XMLDocumentHandlerDOM.java 3 // project: Lucene Search System 4 // 5 // last change: date: $Date: 2003/09/09 03:11:52 $ 6 // by: $Author: bitiboy $ 7 // revision: $Revision: 1.1 $ 8 //------------------------------------------------------------------------------ 9 // copyright: GNU GPL Software License (see class documentation) 10 //============================================================================== 11 12 package com.justhis.lucene.xml; 13 14 import org.apache.lucene.document.Field; 15 16 /* 17 * $Id: XMLDocumentHandlerDOM.java,v 1.1 2003/09/09 03:11:52 bitiboy Exp $ 18 * 19 * Copyright 2003 Acai Software All Rights Reserved. 20 * 21 * This file LuceneException.java is part of the Lucene Search System. 22 23 * The Lucene Search System is free software; you can redistribute it and/or modify 24 * it under the terms of the GNU General Public License as published by 25 * the Free Software Foundation; either version 2 of the License, or 26 * (at your option) any later version. 27 28 * Lucene Search System is distributed in the hope that it will be useful, 29 * but WITHOUT ANY WARRANTY; without even the implied warranty of 30 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 31 * GNU General Public License for more details. 32 33 * You should have received a copy of the GNU General Public License 34 * along with the Lucene Search System; if not, write to the Free Software 35 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 36 37 * http://www.justhis.com http://ejb.cn 38 * CONTACT: email = webmaster@justhis.com superaxis@sohu.com 39 */ 40 import org.w3c.dom.*; 41 import org.w3c.dom.Node; 42 43 import java.io.File; 44 45 import javax.xml.parsers.*; 46 47 48 /*** 49 * ????DOM????XML?????????????? ??????XML????????????????Jdom???????????? 50 * 51 * @author <a href="http://blog.ejb.cn">acai</a> 52 * @version $Revision: 1.1 $ 53 */ 54 public class XMLDocumentHandlerDOM { 55 //~ Methods ---------------------------------------------------------------- 56 57 /*** 58 * ????????lucene?????? ????????????lucene???????? 59 * 60 * @param f 61 * 62 * @return TODO 63 */ 64 public org.apache.lucene.document.Document createXMLDocument(File f) { 65 org.apache.lucene.document.Document document = new org.apache.lucene.document.Document(); 66 DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance(); 67 68 try { 69 DocumentBuilder df = dbf.newDocumentBuilder(); 70 org.w3c.dom.Document d = df.parse(f); 71 Node root = d.getDocumentElement(); 72 traverseTree(root, document); 73 } catch (Exception e) { 74 System.out.println("error: " + e); 75 e.printStackTrace(); 76 } 77 78 return document; 79 } 80 81 /*** 82 * ??XML????????????????????????lucenen???????? 83 * 84 * @param node 85 * @param document 86 */ 87 private static void traverseTree(Node node, 88 org.apache.lucene.document.Document document 89 ) { 90 NodeList nl = node.getChildNodes(); 91 92 if (nl.getLength() == 0) { 93 if (node.getNodeType() == Node.TEXT_NODE) { 94 Node parentNode = node.getParentNode(); 95 96 if (parentNode.getNodeType() == Node.ELEMENT_NODE) { 97 String parentNodeName = parentNode.getNodeName(); 98 99 // String nodeValue = node.getNodeValue(); 100 // if (parentNodeName.equals("name")) 101 // { 102 Node siblingNode = node.getNextSibling(); 103 104 if (siblingNode != null) { 105 if (siblingNode.getNodeType() == Node.CDATA_SECTION_NODE 106 ) { 107 document.add(Field.Text("name", 108 siblingNode.getNodeValue() 109 ) 110 ); 111 } 112 } 113 114 // } 115 // else if (parentNodeName.equals("profession")) 116 // { 117 // Node siblingNode = node.getNextSibling(); 118 // if (siblingNode != null) 119 // { 120 // if (siblingNode.getNodeType() == Node.CDATA_SECTION_NODE) 121 // { 122 // document.add(Field.Text([arentNodeName, siblingNode.getNodeValue())); 123 // } 124 // } 125 // } 126 // else if (parentNodeName == "addressLine1") 127 // { 128 // Node siblingNode = node.getNextSibling(); 129 // if(siblingNode != null) 130 // { 131 // if (siblingNode.getNodeType() == Node.CDATA_SECTION_NODE) 132 // { 133 // document.add(Field.Text("addressLine1", siblingNode.getNodeValue())); 134 // } 135 // } 136 // } 137 // else if (parentNodeName.equals("addressLine2")) 138 // { 139 // Node siblingNode = node.getNextSibling(); 140 // if (siblingNode != null) 141 // { 142 // if (siblingNode.getNodeType() == Node.CDATA_SECTION_NODE) 143 // { 144 // document.add(Field.Text("addressLine2", siblingNode.getNodeValue())); 145 // } 146 // } 147 // } 148 // if (parentNodeName.equals("city")) 149 // { 150 // Node siblingNode = node.getNextSibling(); 151 // if (siblingNode != null) 152 // { 153 // if (siblingNode.getNodeType() == Node.CDATA_SECTION_NODE) 154 // { 155 // document.add(Field.Text("city", siblingNode.getNodeValue())); 156 // } 157 // } 158 // } 159 // else if (parentNodeName.equals("zip")) 160 // { 161 // Node siblingNode = node.getNextSibling(); 162 // if (siblingNode != null) 163 // { 164 // if (siblingNode.getNodeType() == Node.CDATA_SECTION_NODE) 165 // { 166 // document.add(Field.Text("zip", siblingNode.getNodeValue())); 167 // } 168 // } 169 // } 170 // else if (parentNodeName.equals("state")) 171 // { 172 // Node siblingNode = node.getNextSibling(); 173 // if (siblingNode != null) 174 // { 175 // if (siblingNode.getNodeType() == Node.CDATA_SECTION_NODE) 176 // { 177 // document.add(Field.Text("state", siblingNode.getNodeValue())); 178 // } 179 // } 180 // } 181 // else if (parentNodeName.equals("country")) 182 // { 183 // Node siblingNode = node.getNextSibling(); 184 // if (siblingNode != null) 185 // { 186 // if (siblingNode.getNodeType() == Node.CDATA_SECTION_NODE) 187 // { 188 // document.add(Field.Text("country", siblingNode.getNodeValue())); 189 // } 190 // } 191 // } 192 } 193 } 194 } else { 195 for (int i = 0; i < nl.getLength(); i++) { 196 traverseTree(nl.item(i), document); 197 } 198 } 199 } 200 } 201 /* 202 * $Log: XMLDocumentHandlerDOM.java,v $ 203 * Revision 1.1 2003/09/09 03:11:52 bitiboy 204 * *** empty log message *** 205 * 206 * Revision 1.1 2003/09/09 00:54:45 bitiboy 207 * *** empty log message *** 208 * 209 * Revision 1.1 2003/09/07 08:23:50 superaxis 210 * *** empty log message *** 211 * 212 * 213 */

This page was automatically generated by Maven