Tuesday, March 20, 2007

tag completion for xhtml conversion using java

This java class completes the incomplete tags in your html [you need to pass the html as a string to the function convertHtml(String)]. The code is free for use.

First make sure your html is browser compatible, then use this code for tag completion.

import java.util.Stack;

public class HtmlToXHtml
{
public String convertHtml(String fs)
{
StringBuffer nfs = new StringBuffer();
boolean parsingOver = false;
StringBuffer startTag = new StringBuffer();
StringBuffer endTag = new StringBuffer();
StringBuffer currentEndTag = new StringBuffer();

Stack stack = new Stack();

int sl = fs.length();
sl--;

int i=0;

while(!parsingOver)
{
if(i>sl) {parsingOver = true;break;}

if(fs.charAt(i)=='<')
{
//END TAG PART
if(fs.charAt(i+1)=='/')
{
endTag = new StringBuffer(""+fs.charAt(i));
i++;
//System.out.println(i);
while(fs.charAt(i)!= '>')
{
if(fs.charAt(i)=='<')
{
nfs.append(endTag);
break;
}
else if (i>sl)
{
nfs.append(endTag);
parsingOver = true;
break;
}
endTag.append(fs.charAt(i));
//System.out.println("\n CHARACTER : "+fs.charAt(i));
i++;
//System.out.println(i);
if(i>sl){i--; parsingOver=true; break;}
}

if(fs.charAt(i)== '>')
{
endTag.append(fs.charAt(i));
//System.out.println("\n CHARACTER : "+fs.charAt(i));


if(endTag.indexOf(" ")== -1)
{
//System.out.println("\nA END TAG COMING IN : "+endTag);

startTag = new StringBuffer(); //essential for the condition in the next while

try
{
//System.out.println(stack);
startTag = (StringBuffer)stack.pop();
//System.out.println("POPPING START TAG : "+startTag);

while(!(startTag.toString()).equals(getStartTagForEndTag(endTag).toString()))
{
if(i>sl) {parsingOver = true;break;}
//System.out.println("START TAG : "+startTag+" END TAG : "+getStartTagForEndTag(endTag));
currentEndTag = getEndTagForStartTag(startTag);
nfs.append(currentEndTag);
try
{
startTag = (StringBuffer)stack.pop();
//System.out.println("POPPING START TAG : "+startTag);
}
catch(Exception e)
{
nfs.append(getStartTagForEndTag(endTag));
break;
}

}
}
catch(Exception e)
{
nfs.append(getStartTagForEndTag(endTag));
//System.out.println(e);
}

nfs.append(endTag);
i++;
//System.out.println(i);
}
else
{
nfs.append(endTag);
}
}
else
{
nfs.append(endTag);
}
}


//STARTING TAG PART
else if (fs.charAt(i+1)!=' ')
{
startTag = new StringBuffer(""+fs.charAt(i));
i++;
System.out.println(i+" : "+nfs );
while(fs.charAt(i)!= '>')
{
if(i>sl) {parsingOver = true;break;}
if(fs.charAt(i)=='<')
{
break;
}
startTag.append(fs.charAt(i));
i++;
if(i>sl){i--; parsingOver=true; break;}
}
if(fs.charAt(i)== '>')
{
startTag.append(fs.charAt(i));
nfs.append(startTag);

if((startTag.indexOf(" ")== -1)||(startTag.indexOf(" ")>1))
{
StringBuffer startTagPart = new StringBuffer();
if(startTag.indexOf(" ")>1)
{
startTagPart = new StringBuffer(startTag.substring(0,startTag.indexOf(" "))).append(""+'>');
}
else startTagPart = new StringBuffer(startTag);
stack.push(startTagPart);
}

i++;
}
else
{
nfs.append(startTag);
}
}
}
else
{
nfs.append(fs.charAt(i));
//System.out.println("\n CHARACTER1 : "+fs.charAt(i));
i++;
//System.out.println(i);
}
}

while(!stack.empty())
{
try
{
startTag = (StringBuffer)stack.pop();
//System.out.println("POPPING START TAG : "+startTag);
currentEndTag = getEndTagForStartTag(startTag);
nfs.append(currentEndTag);
}
catch(Exception e)
{
break;
}
}

return nfs.toString();
}


private StringBuffer getEndTagForStartTag(StringBuffer startTag)
{
StringBuffer startTag1 = new StringBuffer(startTag);
return startTag1.insert(1, '/');
}

private StringBuffer getStartTagForEndTag(StringBuffer endTag)
{
StringBuffer endTag1 = new StringBuffer(endTag);
endTag1.replace(1,2,"");
return endTag1;
}

}

No comments: