1 //==============================================================================
2 // file : XMLDocumentHandlerDOM.java
3 // project: Lucene Search System
4 //
5 // last change: date: $Date: 2003/09/09 03:11:52 $
6 // by: $Author: bitiboy $
7 // revision: $Revision: 1.1 $
8 //------------------------------------------------------------------------------
9 // copyright: GNU GPL Software License (see class documentation)
10 //==============================================================================
11
12 package com.justhis.lucene.xml;
13
14 import org.apache.lucene.document.Field;
15
16 /*
17 * $Id: XMLDocumentHandlerDOM.java,v 1.1 2003/09/09 03:11:52 bitiboy Exp $
18 *
19 * Copyright 2003 Acai Software All Rights Reserved.
20 *
21 * This file LuceneException.java is part of the Lucene Search System.
22
23 * The Lucene Search System is free software; you can redistribute it and/or modify
24 * it under the terms of the GNU General Public License as published by
25 * the Free Software Foundation; either version 2 of the License, or
26 * (at your option) any later version.
27
28 * Lucene Search System is distributed in the hope that it will be useful,
29 * but WITHOUT ANY WARRANTY; without even the implied warranty of
30 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
31 * GNU General Public License for more details.
32
33 * You should have received a copy of the GNU General Public License
34 * along with the Lucene Search System; if not, write to the Free Software
35 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
36
37 * http://www.justhis.com http://ejb.cn
38 * CONTACT: email = webmaster@justhis.com superaxis@sohu.com
39 */
40 import org.w3c.dom.*;
41 import org.w3c.dom.Node;
42
43 import java.io.File;
44
45 import javax.xml.parsers.*;
46
47
48 /***
49 * ????DOM????XML?????????????? ??????XML????????????????Jdom????????????
50 *
51 * @author <a href="http://blog.ejb.cn">acai</a>
52 * @version $Revision: 1.1 $
53 */
54 public class XMLDocumentHandlerDOM {
55 //~ Methods ----------------------------------------------------------------
56
57 /***
58 * ????????lucene?????? ????????????lucene????????
59 *
60 * @param f
61 *
62 * @return TODO
63 */
64 public org.apache.lucene.document.Document createXMLDocument(File f) {
65 org.apache.lucene.document.Document document = new org.apache.lucene.document.Document();
66 DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
67
68 try {
69 DocumentBuilder df = dbf.newDocumentBuilder();
70 org.w3c.dom.Document d = df.parse(f);
71 Node root = d.getDocumentElement();
72 traverseTree(root, document);
73 } catch (Exception e) {
74 System.out.println("error: " + e);
75 e.printStackTrace();
76 }
77
78 return document;
79 }
80
81 /***
82 * ??XML????????????????????????lucenen????????
83 *
84 * @param node
85 * @param document
86 */
87 private static void traverseTree(Node node,
88 org.apache.lucene.document.Document document
89 ) {
90 NodeList nl = node.getChildNodes();
91
92 if (nl.getLength() == 0) {
93 if (node.getNodeType() == Node.TEXT_NODE) {
94 Node parentNode = node.getParentNode();
95
96 if (parentNode.getNodeType() == Node.ELEMENT_NODE) {
97 String parentNodeName = parentNode.getNodeName();
98
99 // String nodeValue = node.getNodeValue();
100 // if (parentNodeName.equals("name"))
101 // {
102 Node siblingNode = node.getNextSibling();
103
104 if (siblingNode != null) {
105 if (siblingNode.getNodeType() == Node.CDATA_SECTION_NODE
106 ) {
107 document.add(Field.Text("name",
108 siblingNode.getNodeValue()
109 )
110 );
111 }
112 }
113
114 // }
115 // else if (parentNodeName.equals("profession"))
116 // {
117 // Node siblingNode = node.getNextSibling();
118 // if (siblingNode != null)
119 // {
120 // if (siblingNode.getNodeType() == Node.CDATA_SECTION_NODE)
121 // {
122 // document.add(Field.Text([arentNodeName, siblingNode.getNodeValue()));
123 // }
124 // }
125 // }
126 // else if (parentNodeName == "addressLine1")
127 // {
128 // Node siblingNode = node.getNextSibling();
129 // if(siblingNode != null)
130 // {
131 // if (siblingNode.getNodeType() == Node.CDATA_SECTION_NODE)
132 // {
133 // document.add(Field.Text("addressLine1", siblingNode.getNodeValue()));
134 // }
135 // }
136 // }
137 // else if (parentNodeName.equals("addressLine2"))
138 // {
139 // Node siblingNode = node.getNextSibling();
140 // if (siblingNode != null)
141 // {
142 // if (siblingNode.getNodeType() == Node.CDATA_SECTION_NODE)
143 // {
144 // document.add(Field.Text("addressLine2", siblingNode.getNodeValue()));
145 // }
146 // }
147 // }
148 // if (parentNodeName.equals("city"))
149 // {
150 // Node siblingNode = node.getNextSibling();
151 // if (siblingNode != null)
152 // {
153 // if (siblingNode.getNodeType() == Node.CDATA_SECTION_NODE)
154 // {
155 // document.add(Field.Text("city", siblingNode.getNodeValue()));
156 // }
157 // }
158 // }
159 // else if (parentNodeName.equals("zip"))
160 // {
161 // Node siblingNode = node.getNextSibling();
162 // if (siblingNode != null)
163 // {
164 // if (siblingNode.getNodeType() == Node.CDATA_SECTION_NODE)
165 // {
166 // document.add(Field.Text("zip", siblingNode.getNodeValue()));
167 // }
168 // }
169 // }
170 // else if (parentNodeName.equals("state"))
171 // {
172 // Node siblingNode = node.getNextSibling();
173 // if (siblingNode != null)
174 // {
175 // if (siblingNode.getNodeType() == Node.CDATA_SECTION_NODE)
176 // {
177 // document.add(Field.Text("state", siblingNode.getNodeValue()));
178 // }
179 // }
180 // }
181 // else if (parentNodeName.equals("country"))
182 // {
183 // Node siblingNode = node.getNextSibling();
184 // if (siblingNode != null)
185 // {
186 // if (siblingNode.getNodeType() == Node.CDATA_SECTION_NODE)
187 // {
188 // document.add(Field.Text("country", siblingNode.getNodeValue()));
189 // }
190 // }
191 // }
192 }
193 }
194 } else {
195 for (int i = 0; i < nl.getLength(); i++) {
196 traverseTree(nl.item(i), document);
197 }
198 }
199 }
200 }
201 /*
202 * $Log: XMLDocumentHandlerDOM.java,v $
203 * Revision 1.1 2003/09/09 03:11:52 bitiboy
204 * *** empty log message ***
205 *
206 * Revision 1.1 2003/09/09 00:54:45 bitiboy
207 * *** empty log message ***
208 *
209 * Revision 1.1 2003/09/07 08:23:50 superaxis
210 * *** empty log message ***
211 *
212 *
213 */
This page was automatically generated by Maven