00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019 #include "khtmlreader.h"
00020
00021 #include "khtmlreader.moc"
00022 #include <kdebug.h>
00023 #include <dom/dom_text.h>
00024 #include <dom/dom2_views.h>
00025 #include <dom/dom_doc.h>
00026 #include <qcolor.h>
00027 #include <dom/dom_element.h>
00028 #include <dom/html_table.h>
00029 #include <khtmlview.h>
00030 #include <qwidget.h>
00031 #include <kapplication.h>
00032 #include <dom/html_misc.h>
00033 #include <qregexp.h>
00034
00035 KHTMLReader::KHTMLReader(KWDWriter *w){
00036 _html=new KHTMLPart();
00037 _writer=w;
00038 _it_worked=false;
00039 }
00040
00041
00042 void qt_enter_modal( QWidget *widget );
00043 void qt_leave_modal( QWidget *widget );
00044
00045
00046 bool KHTMLReader::filter(KURL url) {
00047 kdDebug(30503) << "KHTMLReader::filter" << endl;
00048 QObject::connect(_html,SIGNAL(completed()),this,SLOT(completed()));
00049
00050 _state.clear();
00051 _list_depth=0;
00052
00053 _html->view()->resize(600,530);
00054 _html->setAutoloadImages(false);
00055 _html->setJScriptEnabled(false);
00056 _html->setPluginsEnabled(false);
00057 _html->setJavaEnabled(false);
00058 _html->setMetaRefreshEnabled(false);
00059 if (_html->openURL(url) == false) {
00060 kdWarning(30503) << "openURL returned false" << endl;
00061 return false;
00062 }
00063
00064
00065 QWidget dummy(0,0,WType_Dialog | WShowModal);
00066 qt_enter_modal(&dummy);
00067 qApp->enter_loop();
00068 qt_leave_modal(&dummy);
00069 return _it_worked;
00070 }
00071
00072 HTMLReader_state *KHTMLReader::state() {
00073 if (_state.count() == 0) {
00074 HTMLReader_state *s=new HTMLReader_state;
00075 s->frameset=_writer->mainFrameset();
00076 s->paragraph = _writer->addParagraph(s->frameset);
00077 s->format=_writer->currentFormat(s->paragraph,true);
00078 s->layout=_writer->currentLayout(s->paragraph);
00079 s->in_pre_mode = false;
00080 _state.push(s);
00081 }
00082 return _state.top();
00083 }
00084
00085 HTMLReader_state *KHTMLReader::pushNewState() {
00086 HTMLReader_state *s=new HTMLReader_state;
00087 s->frameset=state()->frameset;
00088 s->paragraph=state()->paragraph;
00089 s->format=state()->format;
00090 s->layout=state()->layout;
00091 s->in_pre_mode=state()->in_pre_mode;
00092 _writer->cleanUpParagraph(s->paragraph);
00093 _state.push(s);
00094 return s;
00095 }
00096
00097
00098 void KHTMLReader::popState() {
00099 kdDebug(30503) << "Entering popState" << endl;
00100
00101 HTMLReader_state *s=_state.pop();
00102
00111 if (s->frameset == state()->frameset)
00112 {
00113 state()->paragraph=s->paragraph;
00114 if ((state()->layout != s->layout)) {
00115 if (_writer->getText(state()->paragraph).length()!=0) startNewLayout(false,state()->layout);
00116 }
00117 state()->format=_writer->startFormat(state()->paragraph, state()->format);
00118 }
00119 delete(s);
00120 }
00121
00122 void KHTMLReader::startNewLayout(bool startNewFormat) {
00123 QDomElement layout;
00124 startNewLayout(startNewFormat,layout);
00125 }
00126
00127 void KHTMLReader::startNewLayout(bool startNewFormat, QDomElement layout) {
00128 kdDebug() << "entering startNewLayout" << endl;
00129 startNewParagraph(startNewFormat,true);
00130 state()->layout=_writer->setLayout(state()->paragraph,layout);
00131 }
00132
00133
00134 void KHTMLReader::completed() {
00135 kdDebug(30503) << "KHTMLReader::completed" << endl;
00136 qApp->exit_loop();
00137 DOM::Document doc=_html->document();
00138 DOM::NodeList list=doc.getElementsByTagName("body");
00139 DOM::Node docbody=list.item(0);
00140
00141 if (docbody.isNull()) {
00142 kdWarning(30503) << "no <BODY>, giving up" << endl;
00143 _it_worked=false;
00144 return;
00145 }
00146
00147
00148 parseNode(docbody);
00149
00150 list = doc.getElementsByTagName("head");
00151 DOM::Node dochead=list.item(0);
00152 if (!dochead.isNull())
00153 parse_head(dochead);
00154 else
00155 kdWarning(30503) << "WARNING: no html <HEAD> section" << endl;
00156
00157 _writer->cleanUpParagraph(state()->paragraph);
00158 _it_worked=_writer->writeDoc();
00159 }
00160
00161
00162 void KHTMLReader::parseNode(DOM::Node node) {
00163 kdDebug(30503) << "Entering parseNode" << endl;
00164
00165 DOM::Text t=node;
00166 if (!t.isNull()) {
00167 _writer->addText(state()->paragraph,t.data().string(),1,state()->in_pre_mode);
00168 return;
00169 }
00170
00171
00172 state()->format=_writer->currentFormat(state()->paragraph,true);
00173 state()->layout=_writer->currentLayout(state()->paragraph);
00174 pushNewState();
00175
00176 DOM::Element e=node;
00177
00178 bool go_recursive=true;
00179
00180 if (!e.isNull()) {
00181
00182 parseStyle(e);
00183
00184 go_recursive=parseTag(e);
00185 }
00186 if (go_recursive) {
00187 for (DOM::Node q=node.firstChild(); !q.isNull(); q=q.nextSibling()) {
00188 parseNode(q);
00189 }
00190 }
00191 popState();
00192
00193
00194 }
00195
00196 void KHTMLReader::parse_head(DOM::Element e) {
00197 for (DOM::Element items=e.firstChild();!items.isNull();items=items.nextSibling()) {
00198 if (items.tagName().string().lower() == "title") {
00199 DOM::Text t=items.firstChild();
00200 if (!t.isNull()) {
00201 _writer->createDocInfo("HTML import filter",t.data().string());
00202 }
00203 }
00204 }
00205 }
00206
00207 #define _PP(x) { \
00208 if (e.tagName().lower() == #x) \
00209 return parse_##x(e); \
00210 }
00211
00212 #define _PF(x,a,b,c) { \
00213 if (e.tagName().lower() == #x) \
00214 { \
00215 _writer->formatAttribute(state()->paragraph, #a,#b,#c); \
00216 return true; \
00217 } \
00218 }
00219
00220
00221
00222
00223 #define _PL(x,a,b,c) { \
00224 if (e.tagName().lower() == #x) \
00225 { \
00226 state()->layout=_writer->setLayout(state()->paragraph,state()->layout);\
00227 if (!(_writer->getText(state()->paragraph).isEmpty())) \
00228 startNewParagraph(false,false); \
00229 _writer->layoutAttribute(state()->paragraph, #a,#b,#c); \
00230 return true; \
00231 } \
00232 }
00233
00234
00235 bool KHTMLReader::parseTag(DOM::Element e) {
00236 kdDebug(30503) << "Entering parseTag for " << e.tagName().lower() << endl;
00237 _PP(a);
00238 _PP(p);
00239 _PP(br);
00240 _PP(table);
00241 _PP(pre);
00242 _PP(ul);
00243 _PP(ol);
00244 _PP(font);
00245 _PP(hr);
00246
00247
00248
00249 _PF(b,WEIGHT,value,75);
00250 _PF(strong,WEIGHT,value,75);
00251 _PF(u,UNDERLINE,value,1);
00252 _PF(i,ITALIC,value,1);
00253
00254 _PL(center,FLOW,align,center);
00255 _PL(right,FLOW,align,right);
00256 _PL(left,FLOW,align,left);
00257
00258 _PL(h1,NAME,value,h1);
00259 _PL(h2,NAME,value,h2);
00260 _PL(h3,NAME,value,h3);
00261 _PL(h4,NAME,value,h4);
00262 _PL(h5,NAME,value,h5);
00263 _PL(h6,NAME,value,h6);
00264 kdDebug(30503) << "Leaving parseTag" << endl;
00265
00266
00267 if(e.nodeType() == DOM::Node::COMMENT_NODE || e.tagName().lower() == "script") {
00268 return false;
00269 }
00270
00271 return true;
00272 }
00273
00274
00275
00276
00277
00278 void KHTMLReader::startNewParagraph(bool startnewformat, bool startnewlayout) {
00279 kdDebug() << "Entering startNewParagraph" << endl;
00280
00281 QDomElement qf=state()->format;
00282 QDomElement ql=state()->layout;
00283
00284 _writer->cleanUpParagraph(state()->paragraph);
00285
00286 if ((startnewlayout==true) || ql.isNull())
00287 {state()->paragraph=_writer->addParagraph(state()->frameset);}
00288 else
00289 {state()->paragraph=
00290 _writer->addParagraph(state()->frameset,state()->layout);}
00291
00292
00293
00294 if (qf.isNull() || (startnewformat==true)) {
00295 state()->format=_writer->startFormat(state()->paragraph);
00296 } else {
00297 state()->format=_writer->startFormat(state()->paragraph,qf);
00298 }
00299
00305 QString ct=_writer->getLayoutAttribute(state()->paragraph,"COUNTER","type");
00306 if ((!ct.isNull()) && (ct != "0")) {
00307 _writer->layoutAttribute(state()->paragraph,"COUNTER","type","0");
00308 _writer->layoutAttribute(state()->paragraph,"COUNTER","numberingtype","0");
00309 _writer->layoutAttribute(state()->paragraph,"COUNTER","righttext","");
00310 int currdepth=(_writer->getLayoutAttribute(state()->paragraph,"COUNTER","depth")).toInt();
00311 _writer->layoutAttribute(state()->paragraph,"COUNTER","depth",QString("%1").arg(currdepth+1));
00312 }
00313 }
00314
00315 KHTMLReader::~KHTMLReader(){
00316 delete _html;
00317 }
00318
00319
00320
00321
00322
00323
00324
00325
00326
00327
00328
00329 bool KHTMLReader::parse_CommonAttributes(DOM::Element e) {
00330 kdDebug(30503) << "entering KHTMLReader::parse_CommonAttributes" << endl;
00331 kdDebug(30503) << "tagName is " << e.tagName().string() << endl;
00332 QString s=e.getAttribute("align").string();
00333 if (!s.isEmpty())
00334 {
00335 _writer->formatAttribute(state()->paragraph,"FLOW","align",s);
00336 }
00337 QRegExp rx( "h[0-9]+" );
00338 if ( 0 == rx.search( e.getAttribute("class").string() ) )
00339
00340 {
00341 _writer->layoutAttribute(state()->paragraph,"NAME","value",e.getAttribute("class").string());
00342 }
00343 kdDebug(30503) << "leaving parse_CommonAttributes" << endl;
00344 return true;
00345 }
00346
00347 bool KHTMLReader::parse_a(DOM::Element e) {
00348 QString url = e.getAttribute("href").string();
00349 if (!url.isEmpty())
00350 {
00351 QString linkName;
00352 DOM::Text t = e.firstChild();
00353 if (t.isNull()) {
00354
00355 return false;
00356 }
00357 linkName = t.data().string().simplifyWhiteSpace();
00358 t.setData(DOM::DOMString("#"));
00359 _writer->createLink(state()->paragraph, linkName, url);
00360 }
00361 return true;
00362 }
00363
00364 bool KHTMLReader::parse_p(DOM::Element e)
00365 {
00366
00367
00368 kdDebug() << "entering parse_p" << endl;
00369 static bool firstparagraph=true;
00370 if (!(_writer->getText(state()->paragraph).isEmpty()))
00371 startNewParagraph(false,false);
00372 parse_CommonAttributes(e);
00373 kdDebug() << "leaving parse_p" << endl;
00374 return true;
00375 }
00376
00377 bool KHTMLReader::parse_hr(DOM::Element ) {
00378 startNewParagraph();
00379 _writer->createHR(state()->paragraph);
00380 startNewParagraph();
00381 return true;
00382 }
00383
00384 bool KHTMLReader::parse_br(DOM::Element ) {
00385 startNewParagraph(false,false);
00386 return false;
00387 }
00388
00389 static const QColor parsecolor(const QString& colorstring) {
00390 QColor color;
00391 if (colorstring[0]=='#') {
00392 color.setRgb(
00393 colorstring.mid(1,2).toInt(0,16),
00394 colorstring.mid(3,2).toInt(0,16),
00395 colorstring.mid(5,2).toInt(0,16)
00396 );
00397 } else {
00398 QString colorlower=colorstring.lower();
00399
00400 if (colorlower=="black")
00401 color.setRgb(0,0,0);
00402 else if (colorlower=="white")
00403 color.setRgb(255,255,255);
00404 else if (colorlower=="silver")
00405 color.setRgb(0xc0,0xc0,0xc0);
00406 else if (colorlower=="gray")
00407 color.setRgb(128,128,128);
00408
00409 else if (colorlower=="red")
00410 color.setRgb(255,0,0);
00411 else if (colorlower=="lime")
00412 color.setRgb(0,255,0);
00413 else if (colorlower=="blue")
00414 color.setRgb(0,0,255);
00415 else if (colorlower=="yellow")
00416 color.setRgb(255,255,0);
00417 else if (colorlower=="fuchsia")
00418 color.setRgb(255,0,255);
00419 else if (colorlower=="aqua")
00420 color.setRgb(0,255,255);
00421
00422 else if (colorlower=="maroon")
00423 color.setRgb(128,0,0);
00424 else if (colorlower=="green")
00425 color.setRgb(0,128,0);
00426 else if (colorlower=="navy")
00427 color.setRgb(0,0,128);
00428 else if (colorlower=="olive")
00429 color.setRgb(128,128,0);
00430 else if (colorlower=="purple")
00431 color.setRgb(128,0,128);
00432 else if (colorlower=="teal")
00433 color.setRgb(0,128,128);
00434 else {
00435
00436
00437 color.setNamedColor(colorstring);
00438 }
00439 }
00440 return colorstring;
00441 }
00442
00443 void KHTMLReader::parseStyle(DOM::Element e) {
00444
00445
00446
00447 kdDebug(30503) << "entering parseStyle" << endl;
00448 DOM::CSSStyleDeclaration s1=e.style();
00449 DOM::Document doc=_html->document();
00450 DOM::CSSStyleDeclaration s2=doc.defaultView().getComputedStyle(e,"");
00451
00452 kdDebug(30503) << "font-weight=" << s1.getPropertyValue("font-weight").string() << endl;
00453 if ( s1.getPropertyValue("font-weight").string() == "bolder" )
00454 {
00455 _writer->formatAttribute(state()->paragraph,"WEIGHT","value","75");
00456 }
00457 if ( s1.getPropertyValue("font-weight").string() == "bold" )
00458 {
00459 _writer->formatAttribute(state()->paragraph,"WEIGHT","value","75");
00460 }
00461
00462
00463 if ( s1.getPropertyValue("color").string() != QString() )
00464 {
00465 QColor c=parsecolor(s1.getPropertyValue("color").string());
00466 _writer->formatAttribute(state()->paragraph,"COLOR","red",QString::number(c.red()));
00467 _writer->formatAttribute(state()->paragraph,"COLOR","green",QString::number(c.green()));
00468 _writer->formatAttribute(state()->paragraph,"COLOR","blue",QString::number(c.blue()));
00469 }
00470
00471
00472 if ( s1.getPropertyValue("font-size").string() != QString() )
00473 {
00474 QString size=s1.getPropertyValue("font-size").string();
00475 if (size.endsWith("pt"))
00476 {
00477 size=size.left(size.length()-2);
00478 }
00479 _writer->formatAttribute(state()->paragraph,"SIZE","value",size);
00480 }
00481
00482
00483 if ( s1.getPropertyValue("text-align").string() != QString() )
00484 {
00485 state()->layout=_writer->setLayout(state()->paragraph,state()->layout);
00486 _writer->layoutAttribute(state()->paragraph, "FLOW","align",s1.getPropertyValue("text-align").string());
00487 }
00488
00489
00490
00491
00492
00493
00494
00495
00496
00497
00498
00499
00500
00501
00502
00503
00504 }
00505
00506 bool KHTMLReader::parse_table(DOM::Element e) {
00507 if(_writer->isInTable()) {
00508
00509
00510 for (DOM::Node rows=e.firstChild().firstChild();!rows.isNull();rows=rows.nextSibling())
00511 if (!rows.isNull() && rows.nodeName().string().lower() == "tr")
00512 for (DOM::Node cols=rows.firstChild();!cols.isNull();cols=cols.nextSibling())
00513 if (!cols.isNull())
00514 parseNode(cols);
00515 return false;
00516 }
00517
00518 DOM::Element table_body=e.firstChild();
00519 if(table_body.isNull()) {
00520
00521
00522
00523 return true;
00524 }
00525
00526 int tableno=_writer->createTable();
00527 int nrow=0;
00528 int ncol=0;
00529 bool has_borders=false;
00530 QColor bgcolor=parsecolor("#FFFFFF");
00531
00532 if (!table_body.getAttribute("bgcolor").string().isEmpty())
00533 bgcolor=parsecolor(table_body.getAttribute("bgcolor").string());
00534 if ((e.getAttribute("border").string().toInt() > 0))
00535 has_borders=true;
00536
00537
00538
00539 for (DOM::Node rowsnode=table_body.firstChild();!rowsnode.isNull();rowsnode=rowsnode.nextSibling()) {
00540 DOM::Element rows = rowsnode;
00541 if (!rows.isNull() && rows.tagName().string().lower() == "tr") {
00542 QColor obgcolor=bgcolor;
00543 if (!rows.getAttribute("bgcolor").string().isEmpty())
00544 bgcolor=parsecolor(rows.getAttribute("bgcolor").string());
00545
00546 ncol=0;
00547 for (DOM::Node colsnode=rows.firstChild();!colsnode.isNull();colsnode=colsnode.nextSibling()) {
00548 DOM::Element cols = colsnode;
00549 const QString nodename = cols.isNull() ? QString::null : cols.nodeName().string().lower();
00550 if (nodename == "td" || nodename == "th") {
00551 QColor bbgcolor=bgcolor;
00552 if (!cols.getAttribute("bgcolor").string().isEmpty())
00553 bgcolor=parsecolor(cols.getAttribute("bgcolor").string());
00554
00555 pushNewState();
00556 QRect colrect=cols.getRect();
00557 state()->frameset=_writer->createTableCell(tableno,nrow,ncol,1,colrect);
00558 state()->frameset.firstChild().toElement().setAttribute("bkRed",bgcolor.red());
00559 state()->frameset.firstChild().toElement().setAttribute("bkGreen",bgcolor.green());
00560 state()->frameset.firstChild().toElement().setAttribute("bkBlue",bgcolor.blue());
00561 if (has_borders) {
00562 state()->frameset.firstChild().toElement().setAttribute("lWidth",1);
00563 state()->frameset.firstChild().toElement().setAttribute("rWidth",1);
00564 state()->frameset.firstChild().toElement().setAttribute("bWidth",1);
00565 state()->frameset.firstChild().toElement().setAttribute("tWidth",1);
00566 }
00567
00568
00569 state()->paragraph=_writer->addParagraph(state()->frameset);
00570 parseNode(cols);
00571 _writer->cleanUpParagraph(state()->paragraph);
00572 popState();
00573 ncol++;
00574 bgcolor=bbgcolor;
00575 }
00576 }
00577 nrow++;
00578 bgcolor=obgcolor;
00579 }
00580 }
00581 _writer->finishTable(tableno);
00582 startNewParagraph(false,false);
00583 _writer->createInline(state()->paragraph,_writer->fetchTableCell(tableno,0,0));
00584 startNewParagraph(false,false);
00585 return false;
00586 }
00587
00588 bool KHTMLReader::parse_img(DOM::Element ) {
00589
00590 return true;
00591 }
00592
00593 bool KHTMLReader::parse_pre(DOM::Element e) {
00594 #if 0 // see Bug #74601 (normal): kword doesn't recognize PRE-tags in HTML
00595
00597 DOM::HTMLElement htmlelement(e);
00598 if(! htmlelement.isNull())
00599 _writer->addText(state()->paragraph,htmlelement.innerHTML().string(),1);
00600 startNewParagraph();
00601
00602 return false;
00603 #else
00604 pushNewState();
00605 state()->in_pre_mode=true;
00606 for (DOM::Node q=e.firstChild(); !q.isNull(); q=q.nextSibling()) {
00607 parseNode(q);
00608 }
00609 popState();
00610 return false;
00611 #endif
00612 }
00613
00614 bool KHTMLReader::parse_ol(DOM::Element e) {
00615 return parse_ul(e);
00616 }
00617
00618 bool KHTMLReader::parse_font(DOM::Element e) {
00619
00620 QString face=e.getAttribute("face").string();
00621 QColor color=parsecolor("#000000");
00622 if (!e.getAttribute("color").string().isEmpty())
00623 color=parsecolor(e.getAttribute("color").string());
00624 QString size=e.getAttribute("size").string();
00625 int isize=-1;
00626 if (size.startsWith("+"))
00627 isize=12+size.right(size.length()-1).toInt();
00628 else if (size.startsWith("-"))
00629 isize=12-size.right(size.length()-1).toInt();
00630 else
00631 isize=12+size.toInt();
00632
00633 _writer->formatAttribute(state()->paragraph,"FONT","name",face);
00634 if ((isize>=0) && (isize != 12))
00635 _writer->formatAttribute(state()->paragraph,"SIZE","value",QString("%1").arg(isize));
00636
00637 _writer->formatAttribute(state()->paragraph,"COLOR","red",QString("%1").arg(color.red()));
00638 _writer->formatAttribute(state()->paragraph,"COLOR","green",QString("%1").arg(color.green()));
00639 _writer->formatAttribute(state()->paragraph,"COLOR","blue",QString("%1").arg(color.blue()));
00640 return true;
00641 }
00642
00643 bool KHTMLReader::parse_ul(DOM::Element e) {
00644 _list_depth++;
00645 bool popstateneeded = false;
00646 for (DOM::Node items=e.firstChild();!items.isNull();items=items.nextSibling()) {
00647 if (items.nodeName().string().lower() == "li") {
00648 if (popstateneeded) {
00649 popState();
00650
00651 }
00652 pushNewState();
00653 startNewLayout();
00654 popstateneeded = true;
00655 _writer->layoutAttribute(state()->paragraph,"COUNTER","numberingtype","1");
00656 _writer->layoutAttribute(state()->paragraph,"COUNTER","righttext",".");
00657 if (e.tagName().string().lower() == "ol")
00658 {
00659 _writer->layoutAttribute(state()->paragraph,"COUNTER","type","1");
00660 _writer->layoutAttribute(state()->paragraph,"COUNTER","numberingtype","1");
00661 _writer->layoutAttribute(state()->paragraph,"COUNTER","righttext",".");
00662 }
00663 else
00664 {
00665 _writer->layoutAttribute(state()->paragraph,"COUNTER","type","10");
00666 _writer->layoutAttribute(state()->paragraph,"COUNTER","numberingtype","");
00667 _writer->layoutAttribute(state()->paragraph,"COUNTER","righttext","");
00668 }
00669 _writer->layoutAttribute(state()->paragraph,"COUNTER","depth",QString("%1").arg(_list_depth-1));
00670 }
00671 parseNode(items);
00672 }
00673 if (popstateneeded)
00674 popState();
00675 _list_depth--;
00676 return false;
00677 }
00678