File: gul-string.c

package info (click to toggle)
galeon 2.0.6-2.1
links: PTS, VCS
area: main
in suites: lenny
size: 19,156 kB
ctags: 9,680
sloc: ansic: 77,798; cpp: 13,928; sh: 9,000; xml: 5,761; makefile: 901
file content (563 lines) | stat: -rw-r--r-- 13,445 bytes
parent folder | download | duplicates (3)
/*
 *  Copyright (C) 2002 Marco Pesenti Gritti
 *
 *  This program is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 2, or (at your option)
 *  any later version.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with this program; if not, write to the Free Software
 *  Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
 */
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif

/* Galeon includes */
#include "gul-string.h"

/* system includes */
#include <glib/gi18n.h>
#include <libgnomevfs/gnome-vfs-mime.h>
#include <libxml/parser.h>
#include <time.h>
#include <string.h>
#include <errno.h>

/* precomputed g_unichar_to_utf8 (8230, ellipsis); */
#define ELLIPSIS "\xe2\x80\xa6"

/**
 * gul_string_double_underscores: escape _'s in string such that the
 * gtk_label_parse_uline will display as normal.  caller is responsible for
 * freeing the returned string
 */
char *
gul_string_double_underscores (const char *string)
{
        int underscores;
        const char *p;
        char *q;
        char *escaped;
        
        if (string == NULL) {
                return NULL;
        }
        
        underscores = 0;
        for (p = string; *p != '\0'; p++) {
                underscores += (*p == '_');
        }
        
        if (underscores == 0) {
                return g_strdup (string);
        }

        escaped = g_new (char, strlen (string) + underscores + 1);
        for (p = string, q = escaped; *p != '\0'; p++, q++) {
                /* Add an extra underscore. */
                if (*p == '_') {
                        *q++ = '_';
                }
                *q = *p;
        }
        *q = '\0';
        
        return escaped;
}

/**
 * gul_string_new_num_accel: creates a string with a numbered/lettered
 * accel (caller must free). returns NULL if num is out of the range of
 * acceleratable nums/letters
 */
gchar *
gul_string_new_num_accel (gint num, gchar *text, gboolean lettersok)
{
	gchar *label = NULL;

	if (num < 9)
		label = g_strdup_printf	("_%i. %s", num+1, text);
	else if (num == 9)
		label = g_strdup_printf	("_%i. %s", 0, text);
	else if (num < 36 && lettersok)
		label = g_strdup_printf	("_%c. %s", 'a'+num-10, text);
	return label;
}

/**
 * gul_string_strip_uline_accel: strip the _ out of a string like
 * gtk_label_parse_uline would do.  caller is responsible for freeing the
 * returned string
 */
gchar *
gul_string_strip_uline_accel (const gchar *text)
{
	GString *out;
	const gchar *u, *cur = text;

	out = g_string_new (NULL);
	while ((u = strchr (cur, '_')))
	{
		if (*(u+1) == '_')
		{
			/* __ in the string is equal to _ in the output 
			 * so include the _ in the output, skip over the 
			 * second _ and continue scanning. */
			g_string_append_printf (out, "%.*s", u - cur + 1, cur);
			cur = u + 2;
		} else {
			/* copy everything before the _ and skip over it */
			g_string_append_printf (out, "%.*s", u - cur , cur);
			cur = u + 1;
			/* only one accel per string, so we are done now */
			break;
		}
	}
	if (cur && *cur)
	{
		/* attach rest of string */
		g_string_append (out, cur);
	}

	u = out->str;
	g_string_free (out, FALSE); /* don't free char data, caller must */
	return (gchar *)u;
}

/**
 * gul_string_shorten: returns a newly allocated shortened version of str.
 * the new string will be no longer than target_length characters, and will
 * be of the form "http://blahblah...blahblah.html".
 */
gchar *
gul_string_shorten (const gchar *str, gint target_length)
{
	GString *new_str;
	gchar *ret;
	gint actual_length, first_length, second_length;

	if (!str) return NULL;

	actual_length = g_utf8_strlen (str, -1);

	/* if the string is already short enough return a new copy */
	if (actual_length <= target_length)
		return g_strdup (str);

	/* calc lengths to take from beginning and ending of str
	 * (1 == ellipsis is only one character) */
	second_length = (target_length - 1) / 2;
	first_length = target_length - 1 - second_length;

	/* create string */
	new_str = g_string_sized_new (target_length);
	g_string_append_len (new_str, str, 
			     (g_utf8_offset_to_pointer (str, first_length) - str));
	g_string_append (new_str, ELLIPSIS);
	g_string_append (new_str, g_utf8_offset_to_pointer (str, actual_length - second_length));

	ret = new_str->str;
	g_string_free (new_str, FALSE);

	return ret;
}

/**
 * gul_string_strcasestr: test if a string b is a substring of string a,
 * independent of case (ascii).
 */
const gchar *
gul_string_ascii_strcasestr (const gchar *a, const gchar *b)
{
	gchar *down_a;
	gchar *down_b;
	gchar *ptr;

	/* copy and lower case the strings */
	down_a = g_strdup (a);
	down_b = g_strdup (b);
	g_ascii_strdown (down_a, -1);
	g_ascii_strdown (down_b, -1);

	/* compare */
	ptr = strstr (down_a, down_b);

	/* free allocated strings */
	g_free (down_a);
	g_free (down_b);
	
	/* return result of comparison */
	return ptr == NULL ? NULL : (a + (ptr - down_a));
}

/**
 * gul_string_strdup_replace: returns a new string (which must be freed
 * later) identical to str, but with all occurrences of a replaced by b.
 * if any of the given strings are null, or if a is empty, null is
 * returned. */
gchar *
gul_string_strdup_replace (const gchar *str, const gchar *a, const gchar *b)
{
	gchar *new_str, *start_str, *end_str, *dest_str;
	gint str_len, a_len, b_len, new_str_len;

	/* return if any of the strings are null, or if a is empty */
	if (!str || !a || !b || a[0] == '\0') return NULL;

	/* get the lengths of the user-supplied strings */
	str_len = new_str_len = strlen (str);
	a_len = strlen (a);
	b_len = strlen (b);

	/* find the length of the new string */
	start_str = (gchar *) str;
	while ((start_str = strstr (start_str, a)))
	{
		new_str_len -= a_len;
		new_str_len += b_len;
		start_str = &(start_str[a_len]);
	}

	/* allocate memory for the new string */
	new_str = g_new0 (gchar, new_str_len + 1);
	g_return_val_if_fail (new_str, NULL);

	/* walk through the string, replacing a with b */
	start_str = (gchar *) str;
	dest_str = new_str;
	while ((end_str = strstr (start_str, a)))
	{
		/* copy the stuff preceding a */
		memcpy (dest_str, start_str, end_str - start_str);
		dest_str = &(dest_str[end_str - start_str]);
		/* copy b in the place of a */
		memcpy (dest_str, b, b_len);
		dest_str = &(dest_str[b_len]);
		/* move to the end of the occurrence of a */
		start_str = &(end_str[a_len]);
	}

	/* copy the last part of the string and terminate it */
	memcpy (dest_str, start_str, strlen (start_str));
	dest_str[strlen (start_str)] = '\0';

	return new_str;
}

/**
 * gul_string_store_time_in_string:
 * NOTE: str must be at least 256 chars long
 */
void
gul_string_store_time_in_string (GTime t, gchar *str)
{
	int length;

	if (t > 0)
	{
		GDate * date = g_date_new ();
#if GLIB_CHECK_VERSION (2,9,0)
		g_date_set_time_t (date, (time_t)t);
#else
		g_date_set_time (date, t);
#endif
		length = g_date_strftime (str, 255, "%x", date);
		str[length] = '\0';

		g_date_free (date);
	}
	else
	{
		str[0] = '\0';
	}
}

/**
 * gul_string_strip_newline: strip newline from an utf8 string
 */
gchar *
gul_string_strip_newline (const gchar *c)
{
	/* FIXME ??? */
	return g_strdup (c);
}

/**
 * gul_string_remove_outside_whitespace: removes leading and trailing
 * whitespace.  returns a newly-allocated string.
 */
gchar *
gul_string_remove_outside_whitespace (const gchar *str)
{
	gint start, end;

	for (start = 0; str[start] != '\0' && (str[start] == ' ' ||
		str[start] == '\n' || str[start] == '\t' ||
		str[start] == '\r'); start++);
	for (end = strlen (str) - 1; end > start && (str[end] == ' ' ||
		str[end] == '\n' || str[end] == '\t' ||
		str[end] == '\r'); end--);
	return g_strndup (&(str[start]), end + 1 - start);
}

/**
 * gul_string_expand_home_dir: expands a leading ~ in the string to the
 * user's home directory.  doesn't handle ~some_user syntax.  returns a
 * newly-allocated string.
 */
gchar *
gul_string_expand_home_dir (const gchar *str)
{
        if (!str) return NULL;
        if (str[0] != '~') return g_strdup (str);

        return g_strconcat (g_get_home_dir (), str + 1, NULL);
}

int
gul_strcasecmp (const char *string_a, const char *string_b)
{
        return g_ascii_strcasecmp (string_a == NULL ? "" : string_a,
                                   string_b == NULL ? "" : string_b);
}

int
gul_strcasecmp_compare_func (gconstpointer string_a, gconstpointer string_b)
{
        return gul_strcasecmp ((const char *) string_a,
			       (const char *) string_b);
}

/**
 * like strpbrk but ignores chars preceded by slashes, unless the
 * slash is also preceded by a slash unless that later slash is
 * preceded by another slash... ;-)
 */
static char *
gul_strpbrk_unescaped (const char *s, const char *accept)
{
	gchar *ret = strpbrk (s, accept);

	if (!ret || ret == s || *(ret - 1) != '\\')
	{
		return ret;
	}
	else
	{
		gchar *c = ret - 1;
		g_assert (*c == '\\');
		
		while (c >= s && *c == '\\') c--;

		if ((ret - c) % 2 == 0)
		{
			return gul_strpbrk_unescaped (ret + 1, accept);
		}
		else
		{
			return ret;
		}
	}
}

/**
 * like strstr but supports quoting, ignoring matches inside quoted text
 */
static char *
gul_strstr_with_quotes (const char *haystack, const char *needle,
			const char *quotes)
{
	gchar *quot = gul_strpbrk_unescaped (haystack, quotes);
	gchar *ret = strstr (haystack, needle);

	if (!quot || !ret || ret < quot)
	{
		return ret;
	}

	quot = gul_strpbrk_unescaped (quot + 1, quotes);

	if (quot) 
	{
		return gul_strstr_with_quotes (quot + 1, needle, quotes);
	}
	else
	{
		return NULL;
	}
}

/**
 * like strpbrk but supports quoting, ignoring matches inside quoted text
 */
static char *
gul_strpbrk_with_quotes (const char *haystack, const char *needles,
			const char *quotes)
{
	gchar *quot = gul_strpbrk_unescaped (haystack, quotes);
	gchar *ret = strpbrk (haystack, needles);

	if (!quot || !ret || ret < quot)
	{
		return ret;
	}

	quot = gul_strpbrk_unescaped (quot + 1, quotes);

	if (quot) 
	{
		return gul_strpbrk_with_quotes (quot + 1, needles, quotes);
	}
	else
	{
		return NULL;
	}
}

/**
 * Like g_strsplit, but does not split tokens betwen quotes. Ignores
 * quotes preceded by '\'.
 */
gchar **
gul_strsplit_with_quotes (const gchar *string,
			  const gchar *delimiter,
			  gint max_tokens,
			  const gchar *quotes)
{
	GSList *string_list = NULL, *slist;
	gchar **str_array, *s;
	guint n = 0;
	const gchar *remainder;
	
	g_return_val_if_fail (string != NULL, NULL);
	g_return_val_if_fail (delimiter != NULL, NULL);
	g_return_val_if_fail (delimiter[0] != '\0', NULL);

	if (quotes == NULL)
	{
		return g_strsplit (string, delimiter, max_tokens);
	}

	if (max_tokens < 1)
	{
		max_tokens = G_MAXINT;
	}

	remainder = string;
	s = gul_strstr_with_quotes (remainder, delimiter, quotes);
	if (s)
	{
		gsize delimiter_len = strlen (delimiter);   
		
		while (--max_tokens && s)
		{
			gsize len;     
			gchar *new_string;
			
			len = s - remainder;
			new_string = g_new (gchar, len + 1);
			strncpy (new_string, remainder, len);
			new_string[len] = 0;
			string_list = g_slist_prepend (string_list, new_string);
			n++;
			remainder = s + delimiter_len;
			s = gul_strstr_with_quotes (remainder, delimiter, quotes);
		}
	}
	if (*string)
	{
		n++;
		string_list = g_slist_prepend (string_list, g_strdup (remainder));
	}
	
	str_array = g_new (gchar*, n + 1);
	
	str_array[n--] = NULL;
	for (slist = string_list; slist; slist = slist->next)
	{
		str_array[n--] = slist->data;
	}
	
	g_slist_free (string_list);
	
	return str_array;
}

/**
 * like gul_strsplit_with_quotes, but matches any char in 'delimiters' as delimiter
 * and does not return empty tokens
 */
gchar **
gul_strsplit_multiple_delimiters_with_quotes (const gchar *string,
					      const gchar *delimiters,
					      gint max_tokens,
					      const gchar *quotes)
{
	GSList *string_list = NULL, *slist;
	gchar **str_array, *s;
	guint n = 0;
	const gchar *remainder;
	
	g_return_val_if_fail (string != NULL, NULL);
	g_return_val_if_fail (delimiters != NULL, NULL);
	g_return_val_if_fail (delimiters[0] != '\0', NULL);

	if (quotes == NULL)
	{
		quotes = "";
	}

	if (max_tokens < 1)
	{
		max_tokens = G_MAXINT;
	}

	remainder = string;
	s = gul_strpbrk_with_quotes (remainder, delimiters, quotes);
	if (s)
	{
		const gsize delimiter_len = 1; /* only chars */
		
		while (--max_tokens && s)
		{
			gsize len;     
			gchar *new_string;
			
			len = s - remainder;
			if (len > 0) /* ignore empty strings */
			{
				new_string = g_new (gchar, len + 1);
				strncpy (new_string, remainder, len);
				new_string[len] = 0;
				string_list = g_slist_prepend (string_list, new_string);
				n++;
			}
			remainder = s + delimiter_len;
			s = gul_strpbrk_with_quotes (remainder, delimiters, quotes);
		}
	}
	if (*string)
	{
		n++;
		string_list = g_slist_prepend (string_list, g_strdup (remainder));
	}
	
	str_array = g_new (gchar*, n + 1);
	
	str_array[n--] = NULL;
	for (slist = string_list; slist; slist = slist->next)
	{
		str_array[n--] = slist->data;
	}
	
	g_slist_free (string_list);
	
	return str_array;
}