/* ******************************************************************************* * Copyright (C) 2007-2007, International Business Machines * Corporation and others. All Rights Reserved. ******************************************************************************* */ #include "rptp_map.h" #include "uvector.h" #include #include #include #include typedef enum { ITEM_UTS22_NAME, ITEM_RMAP_NAME, ITEM_TMAP_NAME, ITEM_COUNT } LineTypes; struct FilenameMapping { char *uts22Name; char *rmapName; char *tmapName; FilenameMapping(const char *uts22, const char *rmap, const char *tmap) : uts22Name(strdup(uts22)), rmapName(strdup(rmap)), tmapName(strdup(tmap)) { } ~FilenameMapping() { free(uts22Name); free(rmapName); free(tmapName); } }; U_CDECL_BEGIN static void U_EXPORT2 freeFilenameMapping(void *obj) { delete (FilenameMapping*)obj; } static int8_t U_EXPORT2 sortByUTS22(const UHashTok key1, const UHashTok key2) { FilenameMapping* item1 = (FilenameMapping*)key1.pointer; FilenameMapping* item2 = (FilenameMapping*)key2.pointer; return strcmp(item1->uts22Name, item2->uts22Name); } static int8_t U_EXPORT2 compareByUTS22(const UHashTok key1, const UHashTok key2) { return sortByUTS22(key1, key2) == 0; } static UBool U_EXPORT2 compareFilenameMapping(const UHashTok key1, const UHashTok key2) { FilenameMapping* item1 = (FilenameMapping*)key1.pointer; FilenameMapping* item2 = (FilenameMapping*)key2.pointer; return strcmp(item1->rmapName, item2->rmapName) == 0 && strcmp(item1->tmapName, item2->tmapName) == 0; } U_CDECL_END /* Simple uppercase a string */ static char *toUpperStr(char *str) { char *origStr = str; while (*str) { *str = toupper(*str); str++; } return origStr; } /* Trim off all line endings. */ static char *trim(char *str) { int32_t lastIdx = (int32_t)(strlen(str) - 1); while (lastIdx > 0 && (str[lastIdx] == '\r' || str[lastIdx] == '\n')) { str[lastIdx--] = 0; } return str; } FilenameMappingHistory::FilenameMappingHistory() : throwAway(U_ZERO_ERROR), sortedByResult(NULL, compareByUTS22, throwAway), origFilename(NULL) { } FilenameMappingHistory::~FilenameMappingHistory() { if (origFilename) { free(origFilename); } } void FilenameMappingHistory::addItem(UVector *vect, FilenameMapping *key, UErrorCode *status) { vect->addElement(key, *status); hashByRmap.put(key->rmapName, vect, *status); sortedByResult.sortedInsert((void*)key, sortByUTS22, *status); } FilenameMappingHistory *FilenameMappingHistory::create(const char *filename, UErrorCode *status) { FilenameMappingHistory *retVal = new FilenameMappingHistory(); FILE *file = fopen(filename, "r"); int32_t lineNum = 1; char line[1024]; char *str, *prevStr; char *itemStr[ITEM_COUNT]; if (file == NULL) { *status = U_FILE_ACCESS_ERROR; return NULL; } retVal->origFilename = strdup(filename); while (fgets(line, sizeof(line), file) != NULL) { *status = U_ZERO_ERROR; trim(line); if (line[0] != '#' && line[0] != 0) { prevStr = line; str = prevStr; for (int32_t idx = ITEM_UTS22_NAME; idx < ITEM_COUNT; idx++) { str = strchr(str, ','); // Make sure we parse the last field. The comma is in between fields. if (idx < ITEM_COUNT-1) { if (str == NULL) { fprintf(stderr, "Parse error for history file on line %d", lineNum); *status = U_PARSE_ERROR; return NULL; } str[0] = 0; } //fprintf(stderr, "%s", prevStr); itemStr[idx] = prevStr; if (idx < ITEM_COUNT-1) { // Get ready to parse the next item on the line. str++; prevStr = str; } } //fprintf(stderr, "%s,%s,%s\n", itemStr[ITEM_UTS22_NAME], itemStr[ITEM_RMAP_NAME], itemStr[ITEM_TMAP_NAME]); FilenameMapping *item = new FilenameMapping(itemStr[ITEM_UTS22_NAME], toUpperStr(itemStr[ITEM_RMAP_NAME]), toUpperStr(itemStr[ITEM_TMAP_NAME])); const UHashElement *elem = retVal->hashByRmap.find(itemStr[ITEM_RMAP_NAME]); UVector *vect; if (elem == NULL) { // New mapping vect = new UVector(freeFilenameMapping, compareFilenameMapping, *status); } else { // Mapping conflict for the RPMAP. We will have to be careful about this in the future. vect = (UVector*)(elem->value.pointer); if (vect->contains(item)) { fprintf(stderr, "Duplicate R?MAP/T?MAP combination in history file on line %d\n", lineNum); *status = U_PARSE_ERROR; return NULL; } } if (retVal->sortedByResult.indexOf(item) >= 0) { fprintf(stderr, "Duplicate result in history file on line %d\n", lineNum); *status = U_PARSE_ERROR; return NULL; } retVal->addItem(vect, item, status); } lineNum++; } fclose(file); return retVal; } void FilenameMappingHistory::writeHistoryFile(UErrorCode *status) { int32_t pos = -1; FILE *file = fopen(origFilename, "w"); if (file == NULL) { *status = U_FILE_ACCESS_ERROR; return; } fprintf(file, "# This file was machine generated by the rptp2ucm tool\n"); for (int32_t idx = 0; idx < sortedByResult.size(); idx++) { FilenameMapping *item = (FilenameMapping *)sortedByResult.elementAt(idx); fprintf(file, "%s,%s,%s\n", item->uts22Name, item->rmapName, item->tmapName); } fclose(file); } U_CFUNC UBool getCCSIDValues(uint32_t value, uint16_t *unicodeCCSID, uint16_t *ccsid) { *unicodeCCSID = 0; *ccsid = 0; /* is this really a Unicode conversion table? - get the CCSID */ *unicodeCCSID=value&0xffff; if(*unicodeCCSID==13488 || *unicodeCCSID==17584) { *ccsid = (uint16_t)(value>>16); } else { *unicodeCCSID=value>>16; if(*unicodeCCSID==13488 /* Unicode 2.0, UTF-16BE with IBM PUA */ || *unicodeCCSID==17584 /* Unicode 3.0, UTF-16BE with IBM PUA */ || *unicodeCCSID==1200 /* UTF-16BE with IBM PUA */ || *unicodeCCSID==1232 /* UTF-32BE with IBM PUA */ || *unicodeCCSID==21680 /* Unicode 4.0, UTF-16BE with IBM PUA */ || *unicodeCCSID==61956 /* UTF-16BE with Microsoft HKSCS-Big 5 PUA */ ) { *ccsid = (uint16_t)(value&0xffff); } else { return FALSE; } } return TRUE; } static const char *getUnicodeSuffix(uint16_t unicode) { switch (unicode) { case 13488: return "_U2"; /* Unicode 2.0 */ case 17584: return "_U3"; /* Unicode 3.0 */ case 21680: return "_U4"; /* Unicode 4.0 */ /*case 25776: return "_U4.1";*/ /* Not used */ case 61956: return "_MS"; /* Microsoft PUA extensions */ } return ""; } static char *generateFileName(const char *rpmapFilename, const char *tpmapFilename, uint16_t year, UBool useOldFormat) { char filename[1024]; char *s = NULL; uint32_t value = strtoul(rpmapFilename, &s, 16); uint16_t unicode, ccsid; int32_t length; getCCSIDValues(value, &unicode, &ccsid); length=sprintf(filename, "ibm-%u_", ccsid); filename[length++]=toupper(rpmapFilename[10]); /* P or X */ filename[length++]=toupper(rpmapFilename[14]); /* last 3 suffix characters */ filename[length++]=toupper(rpmapFilename[15]); filename[length++]=toupper(rpmapFilename[16]); if (!useOldFormat) { filename[length++]='_'; filename[length++]=toupper(tpmapFilename[10]); /* P or X */ filename[length++]=toupper(tpmapFilename[14]); /* last 3 suffix characters */ filename[length++]=toupper(tpmapFilename[15]); filename[length++]=toupper(tpmapFilename[16]); } length+=sprintf(filename+length, "-%d", year); if (!useOldFormat) { strcat(filename, getUnicodeSuffix(unicode)); } strcat(filename, ".ucm"); return strdup(filename); } const char *FilenameMappingHistory::getFilename(const char *rmapFilename, const char *tmapFilename, uint16_t year, UErrorCode *status) { const char *retVal = NULL; UVector *vect = NULL; UBool useOldNameFormat = TRUE; int idx; const UHashElement *elem; char *rmapFilenameDup = toUpperStr(strdup(rmapFilename)); char *tmapFilenameDup = toUpperStr(strdup(tmapFilename)); rmapFilename = rmapFilenameDup; tmapFilename = tmapFilenameDup; elem = hashByRmap.find(rmapFilename); if (elem != NULL) { FilenameMapping tempVal("", rmapFilename, tmapFilename); // We already know about this mapping table. Get the old value. vect = (UVector*)(elem->value.pointer); idx = vect->indexOf(&tempVal); if (idx >= 0) { FilenameMapping *prevItem = (FilenameMapping*)vect->elementAt(idx); retVal = prevItem->uts22Name; free(rmapFilenameDup); free(tmapFilenameDup); return retVal; } // else More than one TMAP is available. // This RPMAP has multiple choices, and it's new. useOldNameFormat = FALSE; } else { // New mapping table. Store information for future reference. vect = new UVector(freeFilenameMapping, compareFilenameMapping, *status); } // We didn't find this name. Make up a new one. char *fileNameDup = generateFileName(rmapFilename, tmapFilename, year, useOldNameFormat); FilenameMapping *item = new FilenameMapping(fileNameDup, rmapFilename, tmapFilename); // Double check that we haven't generated this name in the past. idx = sortedByResult.indexOf(item); if (idx >= 0) { FilenameMapping *foundItem = (FilenameMapping *)sortedByResult.elementAt(idx); if (strcmp(foundItem->rmapName, rmapFilename) != 0 || strcmp(foundItem->tmapName, tmapFilename) != 0) { free(fileNameDup); // Another Unicode CCSID conflicts with this table, or some other conflict. useOldNameFormat = FALSE; fileNameDup = generateFileName(rmapFilename, tmapFilename, year, useOldNameFormat); item->uts22Name = fileNameDup; } } else { free(fileNameDup); // filename was already copied. } addItem(vect, item, status); free(rmapFilenameDup); free(tmapFilenameDup); return retVal; }