/* * ContentParser.java February 2001 * * Copyright (C) 2001, Niall Gallagher * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or * implied. See the License for the specific language governing * permissions and limitations under the License. */ package org.simpleframework.http.parse; import org.simpleframework.http.ContentType; import org.simpleframework.util.KeyMap; import org.simpleframework.util.parse.ParseBuffer; import org.simpleframework.util.parse.Parser; /** * This provides access to the MIME type parts, that is the primary * type, the secondary type and an optional character set parameter. * The charset parameter is one of many parameters that * can be associated with a MIME type. This however this exposes this * parameter with a typed method. *

* The getCharset will return the character encoding the * content type is encoded within. This allows the user of the content * to decode it correctly. Other parameters can be acquired from this * by simply providing the name of the parameter. * * @author Niall Gallagher */ public class ContentParser extends Parser implements ContentType { /** * Used to store the characters consumed for the subtype. */ private ParseBuffer subtype; /** * Used to store the characters for the charset. */ private ParseBuffer charset; /** * Used to store the characters consumed for the type. */ private ParseBuffer type; /** * Used to collect the name of a content type parameter. */ private ParseBuffer name; /** * Used to collect the value of the content type parameter. */ private ParseBuffer value; /** * Used to store the name value pairs of the parameters. */ private KeyMap map; /** * The default constructor will create a ContentParser * that contains no charset, type or subtype. This can be used to * extract the type, subtype and the optional charset * parameter by using the parser's parse(String) * method. */ public ContentParser(){ this.subtype = new ParseBuffer(); this.charset = new ParseBuffer(); this.value = new ParseBuffer(); this.name = new ParseBuffer(); this.type = new ParseBuffer(); this.map = new KeyMap(); } /** * This is primarily a convenience constructor. This will parse * the String given to extract the MIME type. This * could be achived by calling the default no-arg constructor * and then using the instance to invoke the parse * method on that String. * * @param header String containing a MIME type value */ public ContentParser(String header){ this(); parse(header); } /** * This sets the primary type to whatever value is in the string * provided is. If the string is null then this will contain a * null string for the primary type of the parameter, which is * likely invalid in most cases. * * @param primary the type to set for the primary type of this */ public void setPrimary(String primary) { type.reset(primary); } /** * This is used to retrieve the primary type of this MIME type. The * primary type part within the MIME type defines the generic type. * For example text/plain; charset=UTF-8. This will * return the text value. If there is no primary type then this * will return null otherwise the string value. * * @return the primary type part of this MIME type */ public String getPrimary() { return type.toString(); } /** * This sets the secondary type to whatever value is in the string * provided is. If the string is null then this will contain a * null string for the secondary type of the parameter, which is * likely invalid in most cases. * * @param type the type to set for the primary type of this */ public void setSecondary(String type) { subtype.reset(type); } /** * This is used to retrieve the secondary type of this MIME type. * The secondary type part within the MIME type defines the generic * type. For example text/html; charset=UTF-8. This * will return the HTML value. If there is no secondary type then * this will return null otherwise the string value. * * @return the primary type part of this MIME type */ public String getSecondary(){ return subtype.toString(); } /** * This will set the charset to whatever value the * string contains. If the string is null then this will not set * the parameter to any value and the toString method * will not contain any details of the parameter. * * @param enc parameter value to add to the MIME type */ public void setCharset(String enc) { charset.reset(enc); } /** * This is used to retrieve the charset of this MIME * type. This is a special parameter associated with the type, if * the parameter is not contained within the type then this will * return null, which typically means the default of ISO-8859-1. * * @return the value that this parameter contains */ public String getCharset() { return charset.toString(); } /** * This is used to retrieve an arbitrary parameter from the MIME * type header. This ensures that values for boundary * or other such parameters are not lost when the header is parsed. * This will return the value, unquoted if required, as a string. * * @param name this is the name of the parameter to be retrieved * * @return this is the value for the parameter, or null if empty */ public String getParameter(String name) { return map.get(name); } /** * This will add a named parameter to the content type header. If * a parameter of the specified name has already been added to the * header then that value will be replaced by the new value given. * Parameters such as the boundary as well as other * common parameters can be set with this method. * * @param name this is the name of the parameter to be added * @param value this is the value to associate with the name */ public void setParameter(String name, String value) { map.put(name, value); } /** * This will initialize the parser when it is ready to parse * a new String. This will reset the parser to a * ready state. The init method is invoked by the parser when * the Parser.parse method is invoked. */ protected void init(){ if(count > 0) { pack(); } clear(); } /** * This is used to clear all previously collected tokens. This * allows the parser to be reused when there are multiple source * strings to be parsed. Clearing of the tokens is performed * when the parser is initialized. */ private void clear() { type.clear(); subtype.clear(); charset.clear(); name.clear(); value.clear(); map.clear(); off = 0; } /** * Reads and parses the MIME type from the given String * object. This uses the syntax defined by RFC 2616 for the media-type * syntax. This parser is only concerned with one parameter, the * charset parameter. The syntax for the media type is *

    * media-type = token "/" token *( ";" parameter )
    * parameter = token | literal 
    *

*/ protected void parse(){ type(); off++; subtype(); parameters(); } /** * This is used to remove all whitespace characters from the * String excluding the whitespace within literals. * The definition of a literal can be found in RFC 2616. *

* The definition of a literal for RFC 2616 is anything between 2 * quotes but excluding quotes that are prefixed with the backward * slash character. */ private void pack() { char old = buf[0]; int len = count; int seek = 0; int pos = 0; while(seek < len){ char ch = buf[seek++]; if(ch == '"' && old != '\\'){ /* qd-text*/ buf[pos++] = ch; while(seek < len){ old = buf[seek-1]; ch = buf[seek++]; buf[pos++] = ch; if(ch =='"'&& old!='\\'){ /*qd-text*/ break; } } }else if(!space(ch)){ old = buf[seek - 1]; buf[pos++] = old; } } count = pos; } /** * This reads the type from the MIME type. This will fill the * type ParseBuffer. This will read all chars * upto but not including the first instance of a '/'. The type * of a media-type as defined by RFC 2616 is * type/subtype;param=val;param2=val. */ private void type(){ while(off < count){ if(buf[off] =='/'){ break; } type.append(buf[off]); off++; } } /** * This reads the subtype from the MIME type. This will fill the * subtype ParseBuffer. This will read all chars * upto but not including the first instance of a ';'. The subtype * of a media-type as defined by RFC 2616 is * type/subtype;param=val;param2=val. */ private void subtype(){ while(off < count){ if(buf[off] ==';'){ break; } subtype.append(buf[off]); off++; } } /** * This will read the parameters from the MIME type. This will search * for the charset parameter within the set of parameters * which are given to the type. The charset param is the * only parameter that this parser will tokenize. *

* This will remove any parameters that preceed the charset parameter. * Once the charset is retrived the MIME type is considered * to be parsed. */ private void parameters(){ while(skip(";")){ if(skip("charset=")){ charset(); break; }else{ parameter(); insert(); } } } /** * This will add the name and value tokens to the parameters map. * If any previous value of the given name has been inserted * into the map then this will overwrite that value. This is * used to ensure that the string value is inserted to the map. */ private void insert() { insert(name, value); } /** * This will add the given name and value to the parameters map. * If any previous value of the given name has been inserted * into the map then this will overwrite that value. This is * used to ensure that the string value is inserted to the map. * * @param name this is the name of the value to be inserted * @param value this is the value of a that is to be inserted */ private void insert(ParseBuffer name, ParseBuffer value) { map.put(name.toString(), value.toString()); } /** * This is a parameter as defined by RFC 2616. The parameter is added to a * MIME type e.g. type/subtype;param=val etc. The parameter * name and value are not stored. This is used to simply update the read * offset past the parameter. The reason for reading the parameters is to * search for the charset parameter which will indicate the * encoding. */ private void parameter(){ name(); off++; /* = */ value(); } /** * This will simply read all characters from the buffer before the first '=' * character. This represents a parameter name (see RFC 2616 for token). The * parameter name is not buffered it is simply read from the buffer. This will * not cause an IndexOutOfBoundsException as each offset * is checked before it is acccessed. */ private void name(){ while(off < count){ if(buf[off] =='='){ break; } name.append(buf[off]); off++; } } /** * This is used to read a parameters value from the buf. This will read all * char's upto but excluding the first terminal char * encountered from the off within the buf, or if the value is a literal * it will read a literal from the buffer (literal is any data between * quotes except if the quote is prefixed with a backward slash character). */ private void value(){ if(quote(buf[off])){ for(off++; off < count;){ if(quote(buf[off])){ if(buf[++off-2]!='\\'){ break; } } value.append(buf[off++]); } }else{ while(off < count){ if(buf[off] ==';') { break; } value.append(buf[off]); off++; } } } /** * This method is used to determine if the specified character is a quote * character. The quote character is typically used as a boundary for the * values within the header. This accepts a single or double quote. * * @param ch the character to determine if it is a quotation * * @return true if the character provided is a quotation character */ private boolean quote(char ch) { return ch == '\'' || ch == '"'; } /** * This is used to read the value from the charset param. * This will fill the charset ParseBuffer and with * the charset value. This will read a literal or a token as * the charset value. If the charset is a literal * then the quotes will be read as part of the charset. */ private void charset(){ if(buf[off] == '"'){ charset.append('"'); for(off++; off < count;){ charset.append(buf[off]); if(buf[off++]=='"') if(buf[off-2]!='\\'){ break; } } }else{ while(off < count){ if(buf[off]==';') { break; } charset.append(buf[off]); off++; } } } /** * This will return the value of the MIME type as a string. This * will concatenate the primary and secondary type values and * add the charset parameter to the type which will * recreate the content type. * * @return this returns the string representation of the type */ private String encode() { StringBuilder text = new StringBuilder(); if(type != null) { text.append(type); text.append("/"); text.append(subtype); } if(charset.length() > 0) { text.append("; charset="); text.append(charset); } return encode(text); } /** * This will return the value of the MIME type as a string. This * will concatenate the primary and secondary type values and * add the charset parameter to the type which will * recreate the content type. * * @param text this is the buffer to encode the parameters to * * @return this returns the string representation of the type */ private String encode(StringBuilder text) { for(String name : map) { String value = map.get(name); text.append("; "); text.append(name); if(value != null) { text.append("="); text.append(value);; } } return text.toString(); } /** * This will return the value of the MIME type as a string. This * will concatenate the primary and secondary type values and * add the charset parameter to the type which will * recreate the content type. * * @return this returns the string representation of the type */ public String toString() { return encode(); } }