/* sml3_string.c: verschiedene Funktionen fuer Strings */

/* Copyright 2012-2017 Kurt Nienhaus
 *
 * This file is part of libsammel3.
 * libsammel3 is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 2 of the License, or
 * (at your option) any later version.
 * libsammel3 is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 * You should have received a copy of the GNU General Public License
 * along with libsammel3.  If not, see <http://www.gnu.org/licenses/>.
 */

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/types.h>
#include <unistd.h>
#include <errno.h>
#include "config.h"
#include "sml3_fehler.h"
#include "sml3_gummi.h"
#include "sml3_util.h"
#include "sml3_dynfeld.h"
#include "sml3_regex.h"
#include "sml3_string.h"

char * SML3_string_toks(const char **, const char *, struct SML3_gummi *);
char ** SML3_string_split(const char *, int, const char *, int);
char ** SML3_string_twosplit(const char *, int, const char *, int, const char *, int);
char * SML3_string_join(char **, const char *, int, int (*)(const char *, struct SML3_gummi *));


/* SML3_string_toks [thread-sicher]:
 * gibt Teilstring aus String zurueck, auch leere Teilstrings
 * 1.Arg: Adresse auf String, wird aktualisiert
 * 2.Arg: Auswahl von Zeichen als Trenner
 * 3.Arg: fuer Rueckgabe Teilstring
 * Rueckgabe: Pointer auf Inhalt vom 3.Arg
 *            oder NULL = Ende
 * Bsp:
 *  struct SML3_gummi gm = SML3_GUM_INITIALIZER;
 *  const char *saveptr, *string = "a:b;c:;d";
 *  char *tstring;
 *  saveptr = string;
 *  while ((tstring = SML3_string_toks(&saveptr, ":;", &gm)) != NULL) {
 *    printf("<%s> ", tstring);
 *  }
 *  printf("\n");
 *  SML3_gumdest(&gm);
 *  ==> Ausgabe: <a> <b> <c> <> <d>
 */
char *
SML3_string_toks(const char **string, const char *delim, struct SML3_gummi *gret)
{
  const char *kptr;
  size_t slen;

  if (string == NULL || delim == NULL || *delim == '\0' || gret == NULL) { return NULL; }
  if (*string == NULL) { return NULL; }

  kptr = strpbrk(*string, delim);
  if (kptr == NULL) { kptr = *string + strlen(*string); }

  slen = (size_t)(kptr - (*string));
  if (slen > 0) { SML3_gumncpy(gret, 0, *string, slen); }
  SML3_gumcpy(gret, slen, "");

  if (*kptr != '\0') { *string = kptr + 1; } else { *string = NULL; }

  return SML3_gumgetval(gret);
} /* Ende SML3_string_toks */


/* SML3_string_split [thread-sicher]:
 * splittet einen String auf in ein Feld
 * 1.Arg: String
 * 2.Arg: >0: maximale Anzahl Feldelemente inklusive leere
 *         0: alle, leere uebergehen
 *        -1: alle, auch leere
 * 3.Arg: Regulaerer Trennausdruck
 * 4.Arg: SML3_REXFLAG_* Flags: SML3_REXFLAG_Q, SML3_REXFLAG_I, SML3_REXFLAG_N
 * Rueckgabe: NULL-terminiertes alloziertes Feld
 *            oder NULL = Fehler
 * SML3-errno-Wert: EINVAL = Fehler Uebergabeparameter
 *                  ENOMEM  = Allokationsfehler
 *                  ENOEXEC = Compilierungsfehler des regulaeren Trennausdrucks
 *
 * Bsp:
 *   char **feld, **pptr;
 *   feld = SML3_string_split("SML3_string_split", -1, "[si]", SML3_REXFLAG_I);
 *   if (feld == NULL) { fprintf(stderr, "%s\n", SML3_fehlermsg()); exit(1); }
 *   for (pptr = feld; *pptr != NULL; pptr++) { printf("<%s> ", *pptr); }
 *   printf("\n");
 *   SML3_freefeld(feld, NULL);
 *  ==> Ausgabe: <> <ML3_> <tr> <ng_> <pl> <t>
 */
char **
SML3_string_split(const char *string, int anzahl, const char *delim, int flag)
{
  struct SML3_rexpatt *rpatt;
  struct SML3_dynfeld dft;
  char *pt0, *pt1, *pt2;
  size_t slen;
  int mitleer;

  if (string == NULL || delim == NULL || *delim == '\0') { SML3_fehlernew(EINVAL, "%s", SML3_strerror(EINVAL)); return NULL; }
  if (anzahl == 0) { mitleer = 0; } else { mitleer = 1; }
  if (anzahl < 0) { anzahl = 0; }

  rpatt = SML3_rexcomp(delim, flag);
  if (rpatt == NULL) { SML3_fehleradd(NULL); return NULL; }

  SML3_dynfeld_init(&dft, sizeof(char *), 16);

  pt0 = (char *)string;
  while (--anzahl != 0 && (pt1 = SML3_rexsuche(pt0, &pt0, rpatt, SML3_REXFLAG_COMP, NULL)) != NULL) {
    slen = (size_t)(pt1 - string);
    if (slen > 0 || mitleer) {
      pt2 = SML3_malloc(slen + 1);
      if (slen > 0) { strncpy(pt2, string, slen); }
      pt2[slen] = '\0';
      SML3_dynfeld_add(&dft, &pt2);
    }
    string = pt0;
    if (pt1 == pt0) {
      if (*pt0 == '\0') { break; }
      pt0++;
    }
    if (anzahl < 0) { anzahl = 0; }
  }
  SML3_rexcompfree(rpatt);

  if (*string != '\0' || mitleer) {
    pt2 = SML3_strdup(string);
    SML3_dynfeld_add(&dft, &pt2);
  }

  return SML3_dynfeld_get(&dft, NULL);
} /* Ende SML3_string_split */


/* SML3_string_twosplit [thread-sicher]:
 * splittet einen String jeweils zwischen 2 Trennern auf in ein Feld
 * 1.Arg: String
 * 2.Arg: >0: maximale Anzahl Feldelemente inklusive leere
 *         0: alle, leere uebergehen
 *        -1: alle, auch leere
 * 3.Arg: Regulaerer Trennausdruck fuer Starttrenner
 * 4.Arg: SML3_REXFLAG_* Flags: SML3_REXFLAG_Q, SML3_REXFLAG_I, SML3_REXFLAG_N
 * 5.Arg: Regulaerer Trennausdruck fuer Endetrenner
 * 6.Arg: SML3_REXFLAG_* Flags: SML3_REXFLAG_Q, SML3_REXFLAG_I, SML3_REXFLAG_N
 * Rueckgabe: NULL-terminiertes alloziertes Feld
 *            oder NULL = Fehler
 * SML3-errno-Wert: EINVAL = Fehler Uebergabeparameter
 *                  ENOMEM  = Allokationsfehler
 *                  ENOEXEC = Compilierungsfehler des regulaeren Trennausdrucks
 *
 * Bsp:
 *   char **feld, **pptr;
 *   feld = SML3_string_twosplit("eins <zwei drei> vier <fuenf> sechs", -1, "<", 0, ">", 0);
 *   if (feld == NULL) { fprintf(stderr, "%s\n", SML3_fehlermsg()); exit(1); }
 *   for (pptr = feld; *pptr != NULL; pptr++) { printf("<%s> ", *pptr); }
 *   printf("\n");
 *   SML3_freefeld(feld, NULL);
 *  ==> Ausgabe: <zwei drei> <fuenf>
 */
char **
SML3_string_twosplit(const char *string, int anzahl, const char *delim1, int flag1, const char *delim2, int flag2)
{
  struct SML3_rexpatt *rpatt1, *rpatt2;
  struct SML3_dynfeld dft;
  char *pt0, *pt1, *pt2;
  size_t slen;
  int mitleer;

  if (string == NULL || delim1 == NULL || *delim1 == '\0' || delim2 == NULL || *delim2 == '\0') { SML3_fehlernew(EINVAL, "%s", SML3_strerror(EINVAL)); return NULL; }
  if (anzahl == 0) { mitleer = 0; } else { mitleer = 1; }
  if (anzahl < 0) { anzahl = 0; }

  rpatt1 = SML3_rexcomp(delim1, flag1);
  if (rpatt1 == NULL) { SML3_fehleradd(NULL); return NULL; }
  rpatt2 = SML3_rexcomp(delim2, flag2);
  if (rpatt2 == NULL) { SML3_fehleradd(NULL); SML3_rexcompfree(rpatt1); return NULL; }

  SML3_dynfeld_init(&dft, sizeof(char *), 16);

  pt0 = (char *)string;
  while (--anzahl != 0 && (pt1 = SML3_rexsuche(pt0, &pt0, rpatt1, SML3_REXFLAG_COMP, NULL)) != NULL) {
    string = pt0;
    if (pt1 == pt0) {
      if (*pt0 == '\0') { break; }
      pt0++;
    }
    if ((pt1 = SML3_rexsuche(pt0, &pt0, rpatt2, SML3_REXFLAG_COMP, NULL)) == NULL) { break; }
    slen = (size_t)(pt1 - string);
    if (slen > 0 || mitleer) {
      pt2 = SML3_malloc(slen + 1);
      if (slen > 0) { strncpy(pt2, string, slen); }
      pt2[slen] = '\0';
      SML3_dynfeld_add(&dft, &pt2);
    }
    string = pt0;
    if (pt1 == pt0) {
      if (*pt0 == '\0') { break; }
      pt0++;
    }
    if (anzahl < 0) { anzahl = 0; }
  }
  SML3_rexcompfree(rpatt1);
  SML3_rexcompfree(rpatt2);

  return SML3_dynfeld_get(&dft, NULL);
} /* Ende SML3_string_twosplit */


/* SML3_string_join [ohne Callback-Funktion: thread-sicher]:
 * fuegt ein Feld in einen String zusammen
 * 1.Arg: NULL-terminiertes Feld
 * 2.Arg: im String zu setzender Trenner zwischen Feldelementen oder NULL = keine
 * 3.Arg: Bitwert:
 *        Bit 0 (Wert 1): Trenner 2.Arg auch am Anfang und Ende setzen
 *        Bit 1 (Wert 2): leere Feldelemente uebergehen
 * 4.Arg: Funktion zum Veraendern des Feldelements, bevor es eingefuegt wird
 *          1.Parameter: Feldelement
 *          2.Parameter: fuer Rueckgabe veraendertes Feldelement
 *          Rueckgabe: 0 = Feldelement uebergehen, 1 = veraendertes Feldelement einfuegen
 *        oder NULL = keine Funktion
 * Rueckgabe: allozierter String
 */
char *
SML3_string_join(char **feld, const char *delim, int bitflag, int (*func)(const char *, struct SML3_gummi *))
{
  struct SML3_gummi gret = SML3_GUM_INITIALIZER, gfunk = SML3_GUM_INITIALIZER;
  size_t gretpos = 0;
  char **pptr, *ptr;
  int anfang = 1;

  if (feld == NULL) { return SML3_strdup(""); }
  if (delim != NULL && *delim == '\0') { delim = NULL; }

  for (pptr = feld; *pptr != NULL; pptr++) {
    ptr = *pptr;
    if (*ptr == '\0' && (bitflag & 2)) { continue; }
    if (func != NULL) {
      SML3_gumcpy(&gfunk, 0, "");
      if (!func(ptr, &gfunk)) { continue; }
      ptr = SML3_gumgetval(&gfunk);
    }
    gretpos += SML3_gumprintf(&gret, gretpos, "%s%s", (delim != NULL && (!anfang || (bitflag & 1))) ? delim : "", ptr);
    anfang = 0;
  }

  if (delim != NULL && !anfang && (bitflag & 1)) {
    gretpos += SML3_gumprintf(&gret, gretpos, "%s", delim);
  }

  SML3_gumdest(&gfunk);
  return SML3_gumswapstr(&gret, NULL, 0);
} /* Ende SML3_string_join */
