1 /* ifiction.c common babel interface for processing ifiction metadata
\r
2 * (c) 2006 By L. Ross Raszewski
\r
4 * This code is freely usable for all purposes.
\r
6 * This work is licensed under the Creative Commons Attribution2.5 License.
\r
7 * To view a copy of this license, visit
\r
8 * http://creativecommons.org/licenses/by/2.5/ or send a letter to
\r
10 * 543 Howard Street, 5th Floor,
\r
11 * San Francisco, California, 94105, USA.
\r
13 * This file depends on treaty.h
\r
15 * This file contains common routines for handling ifiction metadata strings
\r
17 * int32 ifiction_get_IFID(char *metadata, char *output, int32 output_extent)
\r
18 * does what the babel treaty function GET_STORY_FILE_IFID_SEL would do for ifiction
\r
20 * void ifiction_parse(char *md, IFCloseTag close_tag, void *close_ctx,
\r
21 * IFErrorHandler error_handler, void *error_ctx)
\r
22 * parses the given iFiction metadata. close_tag(struct XMLtag xtg, close_ctx)
\r
23 * is called for each tag as it is closed, error_handler(char *error, error_ctx)
\r
24 * is called each time a structural or logical error is found in the iFiction
\r
25 * This is a very simple XML parser, and probably not as good as any "real"
\r
26 * XML parser. Its only two benefits are that (1) it's really small, and (2)
\r
27 * it strictly checks the ifiction record against the Treaty of Babel
\r
32 #include "ifiction.h"
\r
38 void *my_malloc(int, char *);
\r
39 extern char *format_registry[];
\r
45 static char utfeol[3] = { 0xe2, 0x80, 0xa8 };
\r
46 static int32 getln(char *endp)
\r
48 for(;lnlst<endp;lnlst++) if (*lnlst=='\n' || memcmp(lnlst,utfeol,3)==0) llp++;
\r
53 static int32 ifiction_get_first_IFID(char *metadata, char *output, int32 output_extent)
\r
55 char *ifid_begin, *ifid_end;
\r
57 ifid_begin=strstr(metadata,"<ifid>");
\r
58 if (!ifid_begin) return NO_REPLY_RV;
\r
61 ifid_end=strstr(ifid_begin,"</ifid>");
\r
62 if (!ifid_end) return NO_REPLY_RV;
\r
63 if (output_extent<=(ifid_end-ifid_begin)) return INVALID_USAGE_RV;
\r
65 memcpy(output,ifid_begin,ifid_end-ifid_begin);
\r
67 output[ifid_end-ifid_begin]=0;
\r
69 return ifid_end-metadata+7;
\r
73 int32 ifiction_get_IFID(char *metadata, char *output, int32 output_extent)
\r
79 if ((k=ifiction_get_first_IFID(metadata,output,output_extent)) <= 0) break;
\r
82 output_extent-=strlen(output)+1;
\r
83 output+=strlen(output);
\r
87 if (*(output-1)==',') *(output-1)=0;
\r
92 static char *leaf_tags[] = { "ifid",
\r
110 static char *one_per[] = { "identification",
\r
128 static char *required[] = {
\r
132 "resources", "auxiliary",
\r
133 "auxiliary", "leafname",
\r
134 "auxiliary", "description",
\r
135 "ifiction", "story",
\r
136 "story", "identification",
\r
137 "story", "bibliographic",
\r
138 "identification", "ifid",
\r
139 "identification", "format",
\r
140 "bibliographic", "title",
\r
141 "bibliographic", "author",
\r
142 "colophon", "generator",
\r
143 "colophon", "originated",
\r
146 static char *zarfian[] = {
\r
155 struct ifiction_info {
\r
160 static void ifiction_validate_tag(struct XMLTag *xtg, struct ifiction_info *xti, IFErrorHandler err_h, void *ectx)
\r
164 struct XMLTag *parent=xtg->next;
\r
167 for(i=0;leaf_tags[i];i++)
\r
168 if (strcmp(parent->tag,leaf_tags[i])==0)
\r
170 sprintf(ebuf, "Error: (line %d) Tag <%s> is not permitted within tag <%s>",
\r
171 xtg->beginl,xtg->tag,parent->tag);
\r
174 for(i=0;required[i];i+=2)
\r
175 if (strcmp(required[i],parent->tag)==0 && strcmp(required[i+1],xtg->tag)==0)
\r
176 parent->rocurrences[i]=1;
\r
177 for(i=0;one_per[i];i++)
\r
178 if (strcmp(one_per[i],xtg->tag)==0)
\r
179 if (parent->occurences[i]) {
\r
180 sprintf(ebuf,"Error: (line %d) Found more than one <%s> within <%s>",xtg->beginl,xtg->tag,
\r
184 else parent->occurences[i]=1;
\r
186 for(i=0;required[i];i+=2)
\r
187 if (strcmp(required[i],xtg->tag)==0 && !xtg->rocurrences[i])
\r
189 sprintf(ebuf,"Error: (line %d) Tag <%s> is required within <%s>",xtg->beginl, required[i+1],xtg->tag);
\r
192 if (parent && strcmp(parent->tag,"identification")==0)
\r
194 if (strcmp(xtg->tag,"format")==0)
\r
197 for(i=0;format_registry[i];i++) if (memcmp(xtg->begin,format_registry[i],strlen(format_registry[i]))==0) break;
\r
198 if (format_registry[i]) xti->format=i;
\r
202 memcpy(bf,xtg->begin,xtg->end-xtg->begin);
\r
203 bf[xtg->end-xtg->begin]=0;
\r
205 sprintf(ebuf,"Warning: (line %d) Unknown format %s.",xtg->beginl,bf);
\r
210 if (parent && strcmp(parent->tag,"cover")==0)
\r
212 if (strcmp(xtg->tag,"width")==0)
\r
215 sscanf(xtg->begin,"%d",&i);
\r
218 sprintf(ebuf,"Warning: (line %d) Cover art width should not be less than 120.",xtg->beginl);
\r
223 sprintf(ebuf,"Warning: (line %d) Cover art width should not exceed 1200.",xtg->beginl);
\r
226 if (!xti->width) xti->width=i;
\r
227 if (xti->height && (xti->width> 2 * xti->height || xti->height > 2 * xti->width))
\r
229 sprintf(ebuf,"Warning: (line %d) Cover art aspect ratio exceeds 2:1.",xtg->beginl);
\r
234 if (strcmp(xtg->tag,"height")==0)
\r
237 sscanf(xtg->begin,"%d",&i);
\r
240 sprintf(ebuf,"Warning: (line %d) Cover art height should not be less than 120.",xtg->beginl);
\r
245 sprintf(ebuf,"Warning: (line %d) Cover art height should not exceed 1200.",xtg->beginl);
\r
248 if (!xti->height) xti->height=i;
\r
249 if (xti->width && (xti->width> 2 * xti->height || xti->height > 2 * xti->width))
\r
251 sprintf(ebuf,"Warning: (line %d) Cover art aspect ratio exceeds 2:1.",xtg->beginl);
\r
256 if (strcmp(xtg->tag,"format")==0 && memcmp(xtg->begin,"jpg",3) && memcmp(xtg->begin,"png",3))
\r
258 sprintf(ebuf,"Warning: (line %d) <format> should be one of: png, jpg.",xtg->beginl);
\r
262 if (parent && strcmp(parent->tag,"bibliographic")==0)
\r
265 if (isspace(*xtg->begin)|| isspace(*(xtg->end-1)))
\r
267 sprintf(ebuf,"Warning: (line %d) Extraneous spaces at beginning or end of tag <%s>.",xtg->beginl,xtg->tag);
\r
270 for(p=xtg->begin;p<xtg->end-1;p++)
\r
271 /* Obsoleted by Revision 6
\r
272 if (isspace(*p) && isspace(*(p+1)))
\r
274 sprintf(ebuf,"Warning: (line %d) Extraneous spaces found in tag <%s>.",xtg->beginl, xtg->tag);
\r
277 else if (isspace(*p) && *p!=' ')
\r
279 sprintf(ebuf,"Warning: (line %d) Improper whitespace character found in tag <%s>.",xtg->beginl, xtg->tag);
\r
284 if (strcmp(xtg->tag, "description") && xtg->end-xtg->begin > 240)
\r
286 sprintf(ebuf,"Warning: (line %d) Tag <%s> length exceeds treaty guidelines",xtg->beginl, xtg->tag);
\r
289 if (strcmp(xtg->tag, "description")==0 && xtg->end-xtg->begin > 2400)
\r
291 sprintf(ebuf,"Warning: (line %d) Tag <%s> length exceeds treaty guidelines",xtg->beginl, xtg->tag);
\r
294 if (strcmp(xtg->tag,"firstpublished")==0)
\r
296 int l=xtg->end-xtg->begin;
\r
297 if ((l!=4 && l!=10) ||
\r
298 (!isdigit(xtg->begin[0]) ||
\r
299 !isdigit(xtg->begin[1]) ||
\r
300 !isdigit(xtg->begin[2]) ||
\r
301 !isdigit(xtg->begin[3])) ||
\r
302 (l==10 && ( xtg->begin[4]!='-' ||
\r
303 xtg->begin[7]!='-' ||
\r
304 !isdigit(xtg->begin[5]) ||
\r
305 !isdigit(xtg->begin[6]) ||
\r
306 !(xtg->begin[5]=='0' || xtg->begin[5]=='1') ||
\r
307 !(xtg->begin[5]=='0' || xtg->begin[6]<='2') ||
\r
308 !isdigit(xtg->begin[8]) ||
\r
309 !isdigit(xtg->begin[9]))))
\r
311 sprintf(ebuf,"Warning: (line %d) Tag <%s> should be format YYYY or YYYY-MM-DD",xtg->beginl, xtg->tag);
\r
315 if (strcmp(xtg->tag,"seriesnumber")==0)
\r
318 if (*xtg->begin=='0' && xtg->end!=xtg->begin+1)
\r
320 sprintf(ebuf,"Warning: (line %d) Tag <%s> should not use leading zeroes",xtg->beginl, xtg->tag);
\r
324 for(l=xtg->begin;l<xtg->end;l++) if (!isdigit(*l))
\r
326 sprintf(ebuf,"Warning: (line %d) Tag <%s> should be a positive number",xtg->beginl, xtg->tag);
\r
330 if (strcmp(xtg->tag,"forgiveness")==0)
\r
333 for(l=0;zarfian[l];l++) if (memcmp(xtg->begin,zarfian[l],strlen(zarfian[l]))==0) break;
\r
336 sprintf(ebuf,"Warning: (line %d) <forgiveness> should be one of: Merciful, Polite, Tough, Cruel",xtg->beginl);
\r
343 for(i=0;format_registry[i];i++) if (strcmp(xtg->tag,format_registry[i])==0) break;
\r
344 if (format_registry[i] && xti->format !=i)
\r
346 sprintf(ebuf,"Warning: (line %d) Found <%s> tag, but story is identified as %s.",xtg->beginl, xtg->tag, format_registry[xti->format]);
\r
350 if (strcmp(xtg->tag,"story")==0)
\r
361 void ifiction_parse(char *md, IFCloseTag close_tag, void *close_ctx, IFErrorHandler error_handler, void *error_ctx)
\r
363 char *xml, buffer[2400], *aep, *mda=md, ebuffer[512];
\r
364 struct XMLTag *parse=NULL, *xtg;
\r
365 struct ifiction_info xti;
\r
366 char BOM[3]={ 0xEF, 0xBB, 0xBF};
\r
373 while(*mda && isspace(*mda)) mda++;
\r
374 if (memcmp(mda,BOM,3)==0)
\r
376 while(*mda && isspace(*mda)) mda++;
\r
380 if (strncmp("<?xml version=\"1.0\" encoding=\"UTF-8\"?>",mda,
\r
381 strlen("<?xml version=\"1.0\" encoding=\"UTF-8\"?>"))
\r
383 strncmp("<?xml version=\"1.0\" encoding=\"utf-8\"?>",mda,
\r
384 strlen("<?xml version=\"1.0\" encoding=\"UTF-8\"?>"))
\r
387 error_handler("Error: XML header not found.",error_ctx);
\r
391 xml=strstr(md,"<ifindex");
\r
393 error_handler("Error: <ifindex> not found",error_ctx);
\r
398 char *bp, *ep, *tp;
\r
399 while(*xml&&*xml!='<') xml++;
\r
402 tp=strchr(bp+1,'<');
\r
403 ep=strchr(bp+1,'>');
\r
406 { xml=tp; continue; }
\r
408 if (bp[1]=='/') /* end tag */
\r
410 strncpy(buffer,bp+2,(ep-bp)-2);
\r
411 buffer[(ep-bp)-2]=0;
\r
412 if (parse && strcmp(buffer,parse->tag)==0)
\r
413 { /* copasetic. Close the tag */
\r
416 xtg->end=ep-strlen(buffer)-2;
\r
417 ifiction_validate_tag(xtg,&xti,error_handler, error_ctx);
\r
418 close_tag(xtg,close_ctx);
\r
423 for(xtg=parse;xtg && strcmp(buffer,xtg->tag);xtg=xtg->next);
\r
424 if (xtg) /* Intervening unclosed tags */
\r
425 { for(xtg=parse;xtg && strcmp(buffer,parse->tag);xtg=parse)
\r
429 sprintf(ebuffer,"Error: (line %d) unclosed <%s> tag",xtg->beginl,xtg->tag);
\r
430 error_handler(ebuffer,error_ctx);
\r
431 ifiction_validate_tag(xtg,&xti,error_handler, error_ctx);
\r
432 close_tag(xtg,close_ctx);
\r
440 ifiction_validate_tag(xtg,&xti, error_handler, error_ctx);
\r
441 close_tag(xtg,close_ctx);
\r
447 sprintf(ebuffer,"Error: (line %d) saw </%s> without <%s>",getln(xml), buffer,buffer);
\r
448 error_handler(ebuffer,error_ctx);
\r
453 else if(*(ep-1)=='/' || bp[1]=='!') /* unterminated tag */
\r
457 else /* Terminated tag beginning */
\r
460 xtg=(struct XMLTag *)my_malloc(sizeof(struct XMLTag),"XML Tag");
\r
462 xtg->beginl=getln(bp);
\r
463 for(i=0;bp[i+1]=='_' || bp[i+1]=='-' || isalnum(bp[i+1]);i++)
\r
464 xtg->tag[i]=bp[i+1];
\r
472 strncpy(parse->fulltag,bp+1,ep-bp-1);
\r
473 parse->fulltag[ep-bp-1]=0;
\r
483 sprintf(ebuffer,"Error: (line %d) Unclosed tag <%s>",xtg->beginl,xtg->tag);
\r
484 ifiction_validate_tag(xtg,&xti,error_handler, error_ctx);
\r
485 close_tag(xtg,close_ctx);
\r
498 static void ifiction_null_eh(char *e, void *c)
\r
504 static void ifiction_find_value(struct XMLTag *xtg, void *xti)
\r
506 struct get_tag *gt=(struct get_tag *)xti;
\r
508 if (gt->output && !gt->target) return;
\r
509 if (gt->target && gt->output && strcmp(gt->output,gt->target)==0) { gt->target=NULL; free(gt->output); gt->output=NULL; }
\r
510 if (((!xtg->next && !gt->parent) || (xtg->next && gt->parent && strcmp(xtg->next->tag,gt->parent)==0)) &&
\r
511 strcmp(xtg->tag,gt->tag)==0)
\r
513 int32 l = xtg->end-xtg->begin;
\r
515 if (gt->output) free(gt->output);
\r
516 gt->output=(char *)my_malloc(l+1, "ifiction tag buffer");
\r
517 memcpy(gt->output, xtg->begin, l);
\r
524 char *ifiction_get_tag(char *md, char *p, char *t, char *from)
\r
531 ifiction_parse(md,ifiction_find_value,>,ifiction_null_eh,NULL);
\r
532 if (gt.target){ if (gt.output) free(gt.output); return NULL; }
\r