File: input.c

package info (click to toggle)
msort 8.53-2.2
links: PTS
area: main
in suites: buster
size: 2,360 kB
sloc: sh: 10,138; ansic: 10,031; makefile: 51
file content (332 lines) | stat: -rw-r--r-- 8,154 bytes
parent folder | download | duplicates (5)
/* Time-stamp: <2008-11-02 12:10:41 poser> */
/*
 * Copyright (C) 2005-2007 William J. Poser.
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of version 3 of the GNU General Public License as
 * published by the Free Software Foundation;
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 */

#include "config.h"
#include "compdefs.h"

#include <unistd.h>
#include <stdlib.h>
#include <stdio.h>
#include <stddef.h>
#ifdef HAVE_STDINT_H
#include <stdint.h>
#endif
#include <wchar.h>
#ifdef LOCALE_GETTEXT
#include <libintl.h>
#define _(x) gettext(x)
#else
#define _(x) (x)
#endif
#ifdef HAVE_UNINUM_UNICODE_H
#include <uninum/unicode.h>
#else
#include "unicode.h"
#endif
#include "input.h"
#include "limits.h"
#include "utf8error.h"

#define MSGSIZE 128
#define MAXUTF8LEN 6

static char msg [MSGSIZE];

int
ReportReadError(
	FILE *fp,
	UTF32 c, 		/* Error code */
	unsigned char *rp,	/* Pointer to raw input sequence */
	unsigned long RecordNumber,
	unsigned long ByteCnt)
{

  extern void ExplicateBadUTF8(FILE *, unsigned char *);

  switch (c)
    { 
    case UTF8_NOTENOUGHBYTES:
      fprintf(fp,_("Truncated UTF-8 sequence encountered at record %ld, byte %ld.\n"),
	      RecordNumber, ByteCnt);
      exit(1);
      break;
    case UTF8_BADINCODE:
      fprintf(fp,_("Invalid UTF-8 code encountered at record %ld, byte %ld.\n"),
	      RecordNumber, ByteCnt);
      ExplicateBadUTF8(fp,rp);
      exit(1);
      break;
    case UTF8_BADOUTCODE:
      fprintf(fp,_("Encountered invalid Unicode at record %ld, byte %ld.\n"),
	      RecordNumber, ByteCnt);
      exit(1);
      break;
    case UTF8_IOERROR:
      snprintf(msg,MSGSIZE-1,_("Error reading input at record %ld, byte %ld.\n"),
	       RecordNumber,ByteCnt);
      perror(msg);
      exit(1);
      break;
    default:			/* Normal EOF */
      return(0);
      break;			/* NOTREACHED */
    }
}

/*
 * Read a block of UTF-8 text terminated by an extra newline into a buffer,
 * reallocating storage as necessary. The variable BufferSize is updated
 * if the size of the buffer is increased.
 * Returns the block of text.
 * The number of characters put into the buffer, not including
 * the terminating null is returned in the variable status if everything goes well.
 * Otherwise, the error code is placed in status:  INPUT_BUFOVERFLOW if storage cannot be allocated.
 * On end of input a count of zero characters is returned in status.
 */

UTF8 *
GetNNBlockRAUTF8(FILE *fp,
	 UTF8 *buf,
	 int *status,
	 int *BufferSize,
	 wchar_t t,
	 unsigned long RecordNumber,
	 unsigned long ByteCnt)
{
  int cnt = 0;
  int c;
  int state = 0;
  int eol;
  static int done = 0;

  if(done){
    done = 0;
    *status =	INPUT_ENDOFINPUT;
    return(buf);
  }

  eol = (int) t;
  while( (c = getc(fp)) != EOF){
    if(cnt == *BufferSize){
      *BufferSize = 2 * *BufferSize;
      buf = (unsigned char *) realloc( (void *) buf, (size_t) (*BufferSize * sizeof(unsigned char)));
      if(buf == NULL){
	*status=INPUT_BUFOVERFLOW;
	return(buf);
      }
    }
    if(c == eol) {
      if(state==0){
	buf[cnt++]=eol;
	state=1;
      }
      else if(state == 1){
	buf[cnt]='\0';
	*status=cnt;
	return(buf);
      }
    }
    else {
      if(state==1) state=0;
      buf[cnt++]=c;
    }
  } /* End of while */
	
  buf[cnt] = '\0';
  done = 1;
  *status=cnt;
  return(buf);
}

void
ucstrappend(unsigned char *tgt, unsigned char *src,int slen) {
  int i;
  for(i = 0; i <slen; i++){
    *tgt++ = *src++;
  }
  *tgt = '\0';
}

/*
 * Read a block of UTF-8 text terminated by a specified character
 * from a stream into a buffer, checking to make
 * sure that the size of the buffer is not exceeded.
 * Returns the block of text.
 * The number of characters put into the buffer, not including
 * the terminating null is returned in the variable status if everything goes well.
 * Otherwise, the error code is placed in status:  INPUT_BUFOVERFLOW if storage cannot be allocated.
 * On end of input a count of zero characters is returned in status.
 */

UTF8 *
GetBlockSepCharRAUTF8(
      FILE *stream,
      UTF8 *buf,
      int *status,
      int *BufferSize,
      wchar_t t,
      unsigned long RecordNumber,
      unsigned long ByteCnt
)
{
  UTF32 c;
  int cnt = 0;
  int UCBytes;
  unsigned char *rawptr;
  int infd;
  static int done = 0;
  extern UTF32 UTF8in (int,int *,unsigned char **);

  infd = fileno(stream);
  if(done){
    done = 0;
    *status=INPUT_ENDOFINPUT;
    return(buf);
  }

  while ( (c = UTF8in(infd,&UCBytes,&rawptr)) <= UNI_MAX_UTF32){
    if(cnt >= (*BufferSize -MAXUTF8LEN)){
      *BufferSize = 2 * *BufferSize;
      buf = (UTF8 *) realloc( (void *) buf, (size_t) (((*BufferSize) +1) * sizeof(UTF8)));
      if(buf == NULL){
	fprintf(stderr,"null buf ptr on request for %u bytes\n",*BufferSize);fflush(stderr);
	*status=INPUT_BUFOVERFLOW;
	return(buf);
      }
    }
    if(c == t){
      *status=cnt;
      return(buf);
    }
    else {
      ucstrappend(buf+cnt,rawptr,UCBytes);
      cnt += UCBytes;
    }
  }
  if(c > UNI_MAX_UTF32){
    ReportReadError(stderr,c,rawptr,RecordNumber,ByteCnt-UCBytes);
  }
  done = 1;
  *status=cnt;
  return(buf);
}

/*
 * Read a line into buffer from a file of UTF-8 characters, converting
 * en passant to UTF32.
 * Returns the number of characters in the line, not counting the
 * terminating null. Returns:
 *	 
 *     INPUT_ENDOFINPUT     if there is no more input
 *     INPUT_BUFOVERFLOW    if the length of the line exceeds the buffer size 
 *     	 
 */

#define DEFAULT 0
#define OVERFLOW 1
#define MSGSIZE 128

int
ugetline(FILE *fp,wchar_t *buf, int size)
{

  wchar_t c;
  int cnt = 0;
  int state = DEFAULT;
  static short GetLineDone = 0;
  char msg[MSGSIZE];
  /* These two are not used here but I don't want to include two versions of Get_UTF32...*/
  int UCBytes;		
  unsigned char *rawptr;

  extern UTF32 UTF8in (int,int *,unsigned char **);

  if(GetLineDone){
    GetLineDone = 0;
    return(INPUT_ENDOFINPUT);
  }
   
  while( (c = UTF8in(fileno(fp),&UCBytes,&rawptr)) <= UNI_MAX_UTF32){
    if(cnt == size){
      buf[cnt] = '\0';
      state = OVERFLOW;
    }
    if(c == L'\n'){
      if(state == OVERFLOW) return(INPUT_BUFOVERFLOW);
      else{
	buf[cnt] = L'\0';
	return(cnt);
      }
    }
    else if(state == DEFAULT) buf[cnt++] = c;
  }
  GetLineDone = 1;
  switch (c){ 
     case UTF8_NOTENOUGHBYTES:
       fprintf(stderr,"Truncated UTF-8 sequence encountered.\n");
       exit(1);
       break;
     case UTF8_BADINCODE:
       fprintf(stderr,"Invalid UTF-8 code encountered.\n");
       exit(1);
       break;
     case UTF8_BADOUTCODE:
       fprintf(stderr,"Encountered invalid Unicode.\n");
       exit(1);
       break;
     case UTF8_IOERROR:
       snprintf(msg,MSGSIZE-1,"Error reading input.\n");
       perror(msg);
       exit(1);
       break;
     default:			/* Normal EOF */
       break;
  }
  if(state == OVERFLOW) return(INPUT_BUFOVERFLOW);
  else{
    buf[cnt] = '\0';
    GetLineDone = 1;
    return(cnt);
  }
}


UTF8 *
GetFixedLengthRecord(
     FILE *fp,	/* Stream from which to read */
     UTF8 *buf,  /* Address of buffer */
     int *bread, /* Pointer to number of bytes actually read - return parameter */
     int *lenptr, /* Pointer to intended length of record, in bytes */
     wchar_t dummy2, /* Unused argument needed for consistency with other functions  */
     unsigned long RecordNumber, /* Dummy */
     unsigned long ByteCnt	/* Dummy */

) {
  int BytesRead;
  BytesRead = read(fileno(fp),buf,*lenptr);
  if(BytesRead == 0) {
    *bread = INPUT_ENDOFINPUT;
  }
  else if (BytesRead < *lenptr) {
    *bread = INPUT_SHORTRECORD;
  } else {
    *bread = BytesRead;
    buf[*lenptr] = '\0';
  }
  return buf;
}