File: std_romaji.c

package info (click to toggle)
lookup 1.08b-5
links: PTS
area: contrib
in suites: woody
size: 1,108 kB
ctags: 1,305
sloc: ansic: 12,634; makefile: 236; perl: 174; sh: 53
file content (267 lines) | stat: -rw-r--r-- 7,621 bytes
parent folder | download | duplicates (9)
/*
 * Jeffrey Friedl
 * Omron Corporation			ʳ
 * Nagaokakyoshi, Japan			617Ĺ
 *
 * jfriedl@nff.ncl.omron.co.jp
 *
 * This work is placed under the terms of the GNU General Purpose License
 * (the "GNU Copyleft").
 */

#include <ctype.h>
#include "config.h"
#include "system.h"
#if defined(_HAVE_STRINGS_H_) /* might be defined in system.h */
# include <strings.h>
#else
# include <string.h>
# define index strchr
# define rindex strrchr
#endif
#include "romaji2kana.h"
#include "jreadline.h"
#include "strsave.h"
#include "xmalloc.h"

/*
 * romaji_converter
 *
 * This routine allows on-the-fly romaji->kana conversion when used
 * with my readline-ish package jreadline.  To activate, just call
 *	set_romaji_converter(std_romaji_converter);
 * once before calling readline().
 */
#ifndef MOVE_MEMORY
   /* this must be a "safe" memory copy */
   #define MOVE_MEMORY(FROM, TO, LENGTH) \
       (void)bcopy((char*)(FROM), (char*)(TO), (int)(LENGTH))
#endif

#define line_length()   (end_of_line - start_of_line)
#define eol()		(dot == end_of_line)
#define bol()		(dot == start_of_line)

static const char *allowed_nonletters_in_romaji = (const  char *)"-^'";

const char *
std_romaji_allowed_nonletters(const char *new)
{
    const char *old = allowed_nonletters_in_romaji;
    allowed_nonletters_in_romaji = new;
    return old;
}

int std_romaji_toggled_force = 1;

/*
 * Give a chance to do automatic romaji-to-kana conversion.
 * We have some input text from START_OF_LINE to END_OF_LINE, with
 * the cursor at DOT. Attempt to convert any romaji just to the left
 * of DOT.  If FORCE is false, this is an automatic invocation, and
 * we'll work slightly different. If FORCE is true, it's "real-time"
 * conversion.  If the conversion would cause the end of the line to
 * expand past BUFEND, it is not done.
 *
 * An immediate subsequent FORCEs will undo the previous force
 * if 'std_romaji_toggled_force' is true.
 */
void
std_romaji_converter(const unsigned char *start_of_line,
		     const unsigned char *bufend,
		     unsigned char **dot_p,
		     const unsigned char **eol_p,
		     int force,
		     int eat_leading_slash)
{
    unsigned char *dot = *dot_p;
    const unsigned char *end_of_line = *eol_p;
    unsigned char *ptr = &dot[-1];


    static struct {
	unsigned char *start;
	unsigned char *orig;
	unsigned char *new;
	unsigned orig_len;
	unsigned new_len;
    } last_forced;

    if (std_romaji_toggled_force && force &&
	last_forced.start + last_forced.new_len == dot)
    {
	if (!strncmp((void*)last_forced.start,
		     (void*)last_forced.new, last_forced.new_len))
	{
	    /* swap orig and new and return */
	    int len_delta = last_forced.orig_len - last_forced.new_len;
	    if (len_delta && dot != end_of_line)
	    {
		MOVE_MEMORY(/* from  */ dot,
			    /* to    */ dot + len_delta,
			    /* count */ end_of_line - dot);
	    }
	    /*
	     * Now put the new text.
	     */
	    MOVE_MEMORY(/* from  */ last_forced.orig,
			/* to    */ last_forced.start,
			/* count */ last_forced.orig_len);
	    
	    /* adjust pointers for any text size change */
	    *eol_p += len_delta;
	    *dot_p += len_delta;

	    /* swap old & new */
	    { unsigned char *temp_ptr = last_forced.orig;
	      last_forced.orig = last_forced.new;
	      last_forced.new = temp_ptr; }

	    { unsigned temp_len = last_forced.orig_len;
	      last_forced.orig_len = last_forced.new_len;
	      last_forced.new_len = temp_len; }

	    return;
	}
    }

    /*
     * If at the beginning of a line, or right after a non-ascii,
     * obviously nothing to convert.
     */
    if (dot == start_of_line || !isascii(dot[-1]))
	return;

    /*
     * Search to the left of the cursor for a potential place to start
     * converting.... if a non-ascii is found before a non-letter,
     * non-allowed_nonletters_in_romaji char, we'll stop and convert. 
     */
    while (ptr != start_of_line &&
	   isascii(*ptr) &&
	   (isalnum(*ptr) || index(allowed_nonletters_in_romaji, *ptr)))
    {
	ptr--;
    }

    /* in automatic conversions, don't convert if the '/' has been escaped */
    if (!force && ptr > start_of_line && ptr[-1] == '\\')
	return;

    /* a leading '/' or EUC means auto-convert) */
    if (force || !isascii(*ptr) || *ptr == '/')
    {
	unsigned char kana[MAX_INPUT_LINE_LENGTH * 2];
	unsigned romaji_len, kana_len;
	signed int len_delta;
	unsigned char *romaji_end;

	/*
	 * Because of the special nature of 'n' in romaji, we don't want to
	 * convert it "on the fly" if it's the last thing on the line... it
	 * might well the start of "ne", etc.  To enter "", the user would
	 * have to type "n'e" or some other thing using one of the romaji2kana
	 * packages OMIT characters.
	 * Also allow for "nyu" etc. (for "ˤ").
	 *
	 * Also watch out for 'h', since maybe allowing 'h' to be used
	 * as an 'o' extender. Should probably watch out for 'm' as well.....
	 */
	if (!force && (dot[-1] == 'n' || dot[-1] == 'N' ||
		       dot[-1] == 'h' || dot[-1] == 'H'))
	    romaji_end = dot - 1;
	else if (!force &&
		 (dot[-1] == 'y' || dot[-1] == 'Y') &&
		 (&dot[-2] >= start_of_line) &&
		 (dot[-2] == 'n' || dot[-2] == 'N' ||
		  dot[-2] == 'h' || dot[-2] == 'H'))
	    romaji_end = dot - 2;
	else
	    romaji_end = dot;

	if (isascii(*ptr))
	{
	    if (*ptr == '/')
		ptr++;  /* skip it */
	    if (ptr == romaji_end)
		return;	/* nothing left to actually convert */
	} else {
	    /*
	     * If we're up against an EUC, we'll give that as well
	     * to the conversion routine. That's so if the first
	     * char after that to be converted is a long vowel
	     * indicator (such as '^' as in "To^kyo^"), the routine
	     * will know which vowel to extend (it being the EUC char
	     * we're including here).
	     */
	    ptr--; /* include the last EUC char */
	    if (ptr + 2 == romaji_end)
		return; /* nothing left to actually convert */
	}

	if (romaji2kana(ptr, romaji_end, kana, sizeof(kana), 0) < 0)
	    return; /* oops */


	kana_len = strlen((void*)kana);
	/*
	 * Remove the leading slash as well if:
	 *     there *is* a leading slash.
	 *     not forced conversion.
	 *     the first kana character is not ascii.
	 */
	if (kana_len &&
	    eat_leading_slash &&
	    !isascii(kana[0]) &&
	    ptr > start_of_line &&
	    ptr[-1] == '/' &&
	    (ptr == start_of_line || ptr[-2] != '\\') )
	{
	    ptr--;
	}

	romaji_len = romaji_end - ptr;
	len_delta = kana_len - romaji_len;

	/* abort if resulting kana would overflow */
	if (end_of_line + len_delta >= bufend)
	    return;

	if (std_romaji_toggled_force && force)
	{
	    /* save info about what conversion is done */
	    if (last_forced.orig)
		free(last_forced.orig);
	    last_forced.orig_len = romaji_len;
	    last_forced.orig = xmalloc(last_forced.orig_len + 1);
	    strcpy((void*)last_forced.orig, (void*)ptr);

	    if (last_forced.new)
		free(last_forced.new);
	    last_forced.new_len = kana_len;
	    last_forced.new = xmalloc(last_forced.new_len + 1);
	    strcpy((void*)last_forced.new, (void*)kana);
	    last_forced.start = ptr;
	}

	/*
	 * If kana is different size than romaji and there's text after
	 * the romaji, move the stuff after appropriately.
	 */
	if (len_delta && romaji_end != end_of_line)
	    MOVE_MEMORY(/* from  */romaji_end,
			/* to    */romaji_end + len_delta,
			/* count */end_of_line - romaji_end);
	/*
	 * Now put the converted stuff.
	 */
	MOVE_MEMORY(/*from*/kana, /*to*/ptr, /*len*/kana_len);

	/* adjust pointers for any text size change */
	end_of_line += len_delta;
	dot         += len_delta;
    }

    *dot_p = dot;
    *eol_p = end_of_line;
}