// $Id: DOCTYPEChangerStream.java,v 1.1 2000/10/03 17:13:15 nigelw Exp $
/*
 * Copyright (c) 1999-2000 by Simon St.Laurent.  All Rights Reserved.
 *
 * This program is open source software; you may use, copy, modify, and
 * redistribute it under the terms of the LICENSE with which it was
 * originally distributed.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * LICENSE for more details.
 */

//package com.simonstl.xml;

import java.io.*;

/**
 * <p>This class adds a DOCTYPE to an incoming XML document or replaces its existing DOCTYPE declaration if it has one.</p>

 *
 * @author Simon St.Laurent, Nigel Whitaker
 * @version 0.01 $Date: 2000/10/03 17:13:15 $
 */
public final class DOCTYPEChangerStream extends FilterInputStream
{
        public DOCTYPEChangerStream (InputStream in) {
	    super(in);
        }

protected boolean replace=true;
protected String rootElement;
protected String publicIdentifier="";
protected String systemIdentifier="";
protected String internalSubsetContent="";
protected boolean docStarted=false;
protected boolean generating=false;
protected boolean cycle=true;
protected boolean internalSubset=false;
protected StringBuffer myBuffer=new StringBuffer();

/**
Use setReplace to indicate whether to replace the DOCTYPE declarations for documents that already have one.  False means don't replace, true means do replace.
*/

public void setReplace(boolean replaceChoice) {
	replace=replaceChoice;
}

/**
Use setRootElement to set the root element identified by the DOCTYPE declaration.
*/

public void setRootElement(String elementName){
	rootElement=elementName;
}

/**
Use setPublicIdentifier to set the public identifer identified by the DOCTYPE declaration.
If set, the result will be &lt;!DOCTYPE <i>rootElement</i> PUBLIC '<i>publicIdentifier</i>' '<i>systemIdentifer</i>' <i>[internalSubset, if present]></i>&gt; .  You must also set a system identifier for this to work properly.
*/


public void setPublicIdentifier(String identifier){
	publicIdentifier=identifier;
}

/**
Use setSystemIdentifier to set the public identifer identified by the DOCTYPE declaration.
If set without a public identifier, the result will be &lt;!DOCTYPE <i>rootElement</i> SYSTEM '<i>systemIdentifer</i>' <i>[internalSubset, if present]></i>&gt;
*/

public void setSystemIdentifier(String identifier){
	systemIdentifier=identifier;
}
/**
Use this method to set the internal subset identified by the DOCTYPE declaration.
If set, the result will be &lt;!DOCTYPE <i>rootElement</i> <i>PUBLIC or SYSTEM identifiers</i> [<i>internalSubset</i>]>&gt;
*/

public void setInternalSubset(String subsetContents){
	internalSubsetContent=subsetContents;
}

/**
Returns the root element. Mostly useful to see if you set it previously.
*/


public String getRootElement() {
	return rootElement;
}
/**
Returns the public identifier. Mostly useful to see if you set it previously.
*/

public String getPublicIdentifier() {
	return publicIdentifier;
}
/**
Returns the system identifier. Mostly useful to see if you set it previously.
*/

public String getSystemIdentifier() {
	return systemIdentifier;
}

/**
Returns the internal subset.
*/

public String getInternalSubset() {
	return internalSubsetContent;
}


protected void addDocType(){
	myBuffer.append("!DOCTYPE ");
	myBuffer.append(rootElement);
	if (publicIdentifier.equals("")) {
		if (!(systemIdentifier.equals(""))){
			myBuffer.append(" SYSTEM '");
			myBuffer.append(systemIdentifier);
			myBuffer.append("'");
		}
	} else {
		myBuffer.append(" PUBLIC '");
		myBuffer.append(publicIdentifier);
		myBuffer.append("' '");
		myBuffer.append(systemIdentifier);
		myBuffer.append("'");
	}
	myBuffer.append(" ");

	if (!(internalSubsetContent.equals(""))) {
		myBuffer.append(" [\n");
      	myBuffer.append(internalSubsetContent);
		myBuffer.append("\n]");
	}

	myBuffer.append(">\n");
	docStarted=true;

}



protected int feedFromInternalBuffer() {
	//feed out the buffer a character at a time
	int retChar=myBuffer.charAt(0);
	myBuffer.reverse();
	myBuffer.setLength(myBuffer.length()-1);
	myBuffer.reverse();
	return retChar;
}

public int read() throws IOException {
	int c =32;
	if (myBuffer.length()==0) {
		c=in.read();
		if ((c==60) && (docStarted==false)) {
//figure out if we have a DOCTYPE declaration
			int d=in.read();
				switch ( d ){
				case (63):	//question mark, let it go
					myBuffer.append((char) d);
					break;

				case (33):	//either comment or DOCTYPE
					int e=in.read();
					if (e==68) {
						//DOCTYPE! Bingo.
						//INCLUDE/IGNORE are prohibited from
						//internal subset, so we'll look for
						//]> and >.
						if (replace) {
						while (cycle==true) {
							e=in.read();
							if (e==91) {
								internalSubset=true;
							}
							if ((e==62) && (internalSubset==false)) {
								//end of DOCTYPE
								addDocType();
								cycle=false;
							}
							if (e==93) {
								internalSubset=false;
							}

						}//end while
						} else {//end replace
							myBuffer.append((char) d);
							myBuffer.append((char) e);

						}//end else
						docStarted=true;
					} else {//e didn't equal 68
						myBuffer.append((char) d);
						myBuffer.append((char) e);
					} //end e==68
					break;

				default:	//root element, need to insert in front
					addDocType();
					myBuffer.append("<");
					myBuffer.append((char) d);
					docStarted=true;
					break; 

				}//end switch


		}//end c==60, docStarted==false
	} else {
		c=feedFromInternalBuffer();
	}
  return c;

} 
  


/**
This method is a placeholder - all 'real' activity appears in the int read() method.  This placeholder is substantially from Java I/O by Elliotte Rusty Harold, http://www.oreilly.com/catalog/javaio/.
*/
  private boolean endOfStream = false;
  
  public int read(byte[] text, int offset, int length) throws IOException {
  
    if (endOfStream) return -1;
    int numRead = 0;
    
    for (int i = offset; i < offset+length; i++) {
      int temp = this.read();
      if (temp == -1) {
        this.endOfStream = true;
        break;
      }
      text[i] = (byte) temp;
      numRead++;
    }
    return numRead;
  
  }



  /** This class defines a main() method to test the DOCTYPEChanger */
public static void main(String[] args) {
      try {
        if (args.length != 1) 
          throw new IllegalArgumentException("Wrong number of arguments");
        // Create a stream to read and clean the file
	  DOCTYPEChangerStream tester=new DOCTYPEChangerStream(new FileInputStream(args[0]));
	  tester.setRootElement("html");
	  tester.setSystemIdentifier("http://www.simonstl.com/html");
	  tester.setPublicIdentifier("-//SIMONSTLCOM//DTD tester//EN");
	  tester.setInternalSubset("this is a test");
	  tester.setReplace(false);
        BufferedReader in = new BufferedReader(new InputStreamReader(tester));
        String line;
        while((line = in.readLine()) != null)
          System.out.println(line);
        in.close();  // Close the stream.
      }
      catch(Exception e) {
        e.printStackTrace();
        System.err.println("Usage: java DOCTYPEChangerStream <filename>");
      }
    }
  
/* Comments from Nigel Whitaker regarding using 
   a Stream rather than a Reader:

My main comment is that I think extending a FilterInputStream would be
a better choice than a FilterReader.  Let me try and explain my (and
xml-dev's) reasoning/research:

I am trying to use DOCTYPEChanger as a direct filter on the input to
SAX (well SAX2 as in Xerces' XMLReader).  My original code created an
org.xml.sax.InputSource and fed this into the parse method.  I then
tried to add your Filter and started getting validation exceptions,
even though I ensured your code was correctly adding a DOCTYPE.

The problem arises from the use of a Reader in this chain:

java.io.File -> java.io.FileInputStream -> 
  java.io.InputStreamReader -> DOCTYPEChanger -> 
  org.xml.sax.InputSource -> parser.parse()

My original working/validating code (with the DOCTYPE added by hand) was:

java.io.File -> java.io.FileInputStream -> 
  org.xml.sax.InputSource -> parser.parse()


Although the use of Readers and Writers is the prefered way of
doing Java IO since Java 1.1, it causes some problems with SAX parsing.
It seems that SAX prefers to handle InputStreams of raw bytes
because otherwise (with java.lang.Character based Readers) it cannot
properly handle the encoding conversion issues of the input file.

I did a bit of research and found that this issue was previously
covered in a July 1999 xml-dev discussion entitled "encoding problem
fixed".  It appears that depending on the platform-specific JVM
Readers may default to different encodings.  See this thread:

  http://lists.xml.org/archives/xml-dev/199907/msg00413.html

David Megginson also issued an "Important SAX Guideline" in this
message:

  http://lists.xml.org/archives/xml-dev/199907/msg00414.html

  Always use an InputStream in preference to a Reader when you don't
  know the XML document's character encoding in advance.

Users who use DOCTYPEChanger to write a temporary or intermediate file
may not encounter this problem, only those who apply the Filter as
part of the parsing, as I was doing.

Following the guideline seems to have solved my problems.  The
conversion to a FilterInputStream was a fairly simple change to your
code. 
*/

}
