MythTV  master
xine_demux_sputext.cpp
Go to the documentation of this file.
1 /*
2  * Copyright (C) 2000-2003 the xine project
3  *
4  * This file is part of xine, a free video player.
5  *
6  * xine is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation; either version 2 of the License, or
9  * (at your option) any later version.
10  *
11  * xine is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
19  *
20  * code based on old libsputext/xine_decoder.c
21  *
22  * code based on mplayer module:
23  *
24  * Subtitle reader with format autodetection
25  *
26  * Written by laaz
27  * Some code cleanup & realloc() by A'rpi/ESP-team
28  * dunnowhat sub format by szabi
29  */
30 
31 #ifdef HAVE_CONFIG_H
32 #include "config.h"
33 #endif
34 
35 #include <cctype>
36 #include <cstdio>
37 #include <cstdlib>
38 #include <cstring>
39 #include <fcntl.h>
40 #include <sys/stat.h>
41 #include <sys/types.h>
42 #include <unistd.h>
43 #include "xine_demux_sputext.h"
44 
45 #define LOG_MODULE "demux_sputext"
46 #define LOG_VERBOSE
47 /*
48 #define LOG
49 */
50 
51 #define ERR ((void *)-1)
52 #define LINE_LEN 1000
53 #define LINE_LEN_QUOT "1000"
54 
55 /*
56  * Demuxer code start
57  */
58 
59 #define FORMAT_UNKNOWN (-1)
60 #define FORMAT_MICRODVD 0
61 #define FORMAT_SUBRIP 1
62 #define FORMAT_SUBVIEWER 2
63 #define FORMAT_SAMI 3
64 #define FORMAT_VPLAYER 4
65 #define FORMAT_RT 5
66 #define FORMAT_SSA 6 /* Sub Station Alpha */
67 #define FORMAT_PJS 7
68 #define FORMAT_MPSUB 8
69 #define FORMAT_AQTITLE 9
70 #define FORMAT_JACOBSUB 10
71 #define FORMAT_SUBVIEWER2 11
72 #define FORMAT_SUBRIP09 12
73 #define FORMAT_MPL2 13 /*Mplayer sub 2 ?*/
74 
75 static int eol(char p) {
76  return (p=='\r' || p=='\n' || p=='\0');
77 }
78 
79 static inline void trail_space(char *s) {
80  int i;
81  while (isspace(*s)) {
82  char *copy = s;
83  do {
84  copy[0] = copy[1];
85  copy++;
86  } while(*copy);
87  }
88  i = strlen(s) - 1;
89  while (i > 0 && isspace(s[i]))
90  s[i--] = '\0';
91 }
92 
93 /*
94  * Reimplementation of fgets() using the input->read() method.
95  */
96 static char *read_line_from_input(demux_sputext_t *demuxstr, char *line, off_t len) {
97  off_t nread = 0;
98  char *s;
99  int linelen;
100 
101  // Since our RemoteFile code sleeps 200ms whenever we get back less data
102  // than requested, but this code just keeps trying to read until it gets
103  // an error back, we check for empty reads so that we can stop reading
104  // when there is no more data to read
105  if (demuxstr->emptyReads == 0 && (len - demuxstr->buflen) > 512) {
106  nread = len - demuxstr->buflen;
107  if (nread > demuxstr->rbuffer_len - demuxstr->rbuffer_cur)
108  nread = demuxstr->rbuffer_len - demuxstr->rbuffer_cur;
109  if (nread < 0) {
110  printf("read failed.\n");
111  return nullptr;
112  }
113  memcpy(&demuxstr->buf[demuxstr->buflen],
114  &demuxstr->rbuffer_text[demuxstr->rbuffer_cur],
115  nread);
116  demuxstr->rbuffer_cur += nread;
117  }
118 
119  if (!nread)
120  demuxstr->emptyReads++;
121 
122  demuxstr->buflen += nread;
123  demuxstr->buf[demuxstr->buflen] = '\0';
124 
125  s = strchr(demuxstr->buf, '\n');
126 
127  if (line && (s || demuxstr->buflen)) {
128 
129  linelen = s ? (s - demuxstr->buf) + 1 : demuxstr->buflen;
130 
131  memcpy(line, demuxstr->buf, linelen);
132  line[linelen] = '\0';
133 
134  memmove(demuxstr->buf, &demuxstr->buf[linelen], SUB_BUFSIZE - linelen);
135  demuxstr->buflen -= linelen;
136 
137  return line;
138  }
139 
140  return nullptr;
141 }
142 
143 
145 
146  static char line[LINE_LEN + 1];
147  static char *s = nullptr;
148  char text[LINE_LEN + 1], *p, *q;
149  int state;
150 
151  p = nullptr;
152  current->lines = current->start = 0;
153  current->end = -1;
154  state = 0;
155 
156  /* read the first line */
157  if (!s)
158  if (!(s = read_line_from_input(demuxstr, line, LINE_LEN))) return nullptr;
159 
160  do {
161  switch (state) {
162 
163  case 0: /* find "START=" */
164  s = strstr (s, "Start=");
165  if (s) {
166  current->start = strtol (s + 6, &s, 0) / 10;
167  state = 1; continue;
168  }
169  break;
170 
171  case 1: /* find "<P" */
172  if ((s = strstr (s, "<P"))) { s += 2; state = 2; continue; }
173  break;
174 
175  case 2: /* find ">" */
176  if ((s = strchr (s, '>'))) { s++; state = 3; p = text; continue; }
177  break;
178 
179  case 3: /* get all text until '<' appears */
180  if (*s == '\0') { break; }
181  else if (*s == '<') { state = 4; }
182  else if (!strncasecmp (s, "&nbsp;", 6)) { *p++ = ' '; s += 6; }
183  else if (*s == '\r') { s++; }
184  else if (!strncasecmp (s, "<br>", 4) || *s == '\n') {
185  *p = '\0'; p = text; trail_space (text);
186  if (text[0] != '\0')
187  current->text[current->lines++] = strdup (text);
188  if (*s == '\n') s++; else s += 4;
189  }
190  else *p++ = *s++;
191  continue;
192 
193  case 4: /* get current->end or skip <TAG> */
194  q = strstr (s, "Start=");
195  if (q) {
196  current->end = strtol (q + 6, &q, 0) / 10 - 1;
197  *p = '\0'; trail_space (text);
198  if (text[0] != '\0')
199  current->text[current->lines++] = strdup (text);
200  if (current->lines > 0) { state = 99; break; }
201  state = 0; continue;
202  }
203  s = strchr (s, '>');
204  if (s) { s++; state = 3; continue; }
205  break;
206  }
207 
208  /* read next line */
209  if (state != 99 && !(s = read_line_from_input (demuxstr, line, LINE_LEN)))
210  return nullptr;
211 
212  } while (state != 99);
213 
214  return current;
215 }
216 
217 
218 static char *sub_readtext(char *source, char **dest) {
219  int len=0;
220  char *p=source;
221 
222  while ( !eol(*p) && *p!= '|' ) {
223  p++,len++;
224  }
225 
226  if (!dest)
227  return (char*)ERR;
228 
229  *dest= (char *)malloc (len+1);
230  if (!(*dest))
231  return (char*)ERR;
232 
233  strncpy(*dest, source, len);
234  (*dest)[len]=0;
235 
236  while (*p=='\r' || *p=='\n' || *p=='|')
237  p++;
238 
239  if (*p) return p; /* not-last text field */
240  else return (char*)nullptr; /* last text field */
241 }
242 
244 
245  char line[LINE_LEN + 1];
246  char line2[LINE_LEN + 1];
247  char *p, *next;
248  int i;
249 
250  memset (current, 0, sizeof(subtitle_t));
251 
252  current->end=-1;
253  do {
254  if (!read_line_from_input (demuxstr, line, LINE_LEN)) return nullptr;
255  } while ((sscanf (line, "{%ld}{}%" LINE_LEN_QUOT "[^\r\n]", &(current->start), line2) !=2) &&
256  (sscanf (line, "{%ld}{%ld}%" LINE_LEN_QUOT "[^\r\n]", &(current->start), &(current->end),line2) !=3)
257  );
258 
259  p=line2;
260 
261  next=p, i=0;
262  while ((next =sub_readtext (next, &(current->text[i])))) {
263  if (current->text[i]==ERR) return (subtitle_t *)ERR;
264  i++;
265  if (i>=SUB_MAX_TEXT) {
266  printf ("Too many lines in a subtitle\n");
267  current->lines=i;
268  return current;
269  }
270  }
271  current->lines= ++i;
272 
273  return current;
274 }
275 
277 
278  char line[LINE_LEN + 1];
279  int a1,a2,a3,a4,b1,b2,b3,b4;
280  char *p=nullptr, *q=nullptr;
281  int len;
282 
283  memset (current, 0, sizeof(subtitle_t));
284 
285  while (true) {
286  if (!read_line_from_input(demuxstr, line, LINE_LEN)) return nullptr;
287  if (sscanf (line, "%d:%d:%d.%d,%d:%d:%d.%d",&a1,&a2,&a3,&a4,&b1,&b2,&b3,&b4) < 8) {
288  if (sscanf (line, "%d:%d:%d,%d,%d:%d:%d,%d",&a1,&a2,&a3,&a4,&b1,&b2,&b3,&b4) < 8)
289  continue;
290  }
291  current->start = a1*360000+a2*6000+a3*100+a4;
292  current->end = b1*360000+b2*6000+b3*100+b4;
293 
294  if (!read_line_from_input(demuxstr, line, LINE_LEN))
295  return nullptr;
296 
297  p=q=line;
298  for (current->lines=1; current->lines <= SUB_MAX_TEXT; current->lines++) {
299  for (q=p,len=0; *p && *p!='\r' && *p!='\n' && *p!='|' &&
300  (strncasecmp(p,"[br]",4) != 0); p++,len++);
301  current->text[current->lines-1]=(char *)malloc (len+1);
302  if (!current->text[current->lines-1]) return (subtitle_t *)ERR;
303  strncpy (current->text[current->lines-1], q, len);
304  current->text[current->lines-1][len]='\0';
305  if (!*p || *p=='\r' || *p=='\n') break;
306  if (*p=='[') while (*p++!=']');
307  if (*p=='|') p++;
308  }
309  if (current->lines > SUB_MAX_TEXT) current->lines = SUB_MAX_TEXT;
310  break;
311  }
312  return current;
313 }
314 
316  char line[LINE_LEN + 1];
317  int a1,a2,a3,a4,b1,b2,b3,b4;
318  int i,end_sub;
319 
320  memset(current,0,sizeof(subtitle_t));
321  do {
322  if(!read_line_from_input(demuxstr,line,LINE_LEN))
323  return nullptr;
324  i = sscanf(line,"%d:%d:%d%*[,.]%d --> %d:%d:%d%*[,.]%d",&a1,&a2,&a3,&a4,&b1,&b2,&b3,&b4);
325  } while(i < 8);
326  current->start = a1*360000+a2*6000+a3*100+a4/10;
327  current->end = b1*360000+b2*6000+b3*100+b4/10;
328  i=0;
329  end_sub=0;
330  do {
331  char *p; /* pointer to the curently read char */
332  char temp_line[SUB_BUFSIZE]; /* subtitle line that will be transfered to current->text[i] */
333  int temp_index; /* ... and its index wich 'points' to the first EMPTY place -> last read char is at temp_index-1 if temp_index>0 */
334  temp_line[SUB_BUFSIZE-1]='\0'; /* just in case... */
335  if(!read_line_from_input(demuxstr,line,LINE_LEN)) {
336  if(i)
337  break; /* if something was read, transmit it */
338  else
339  return nullptr; /* if not, repport EOF */
340  }
341  for(temp_index=0,p=line;*p!='\0' && !end_sub && temp_index<SUB_BUFSIZE && i<SUB_MAX_TEXT;p++) {
342  switch(*p) {
343  case '\\':
344  if(*(p+1)=='N' || *(p+1)=='n') {
345  temp_line[temp_index++]='\0'; /* end of curent line */
346  p++;
347  } else
348  temp_line[temp_index++]=*p;
349  break;
350  case '{':
351 #if 0 /* italic not implemented in renderer, ignore them for now */
352  if(!strncmp(p,"{\\i1}",5) && temp_index+3<SUB_BUFSIZE) {
353  temp_line[temp_index++]='<';
354  temp_line[temp_index++]='i';
355  temp_line[temp_index++]='>';
356 #else
357  if(!strncmp(p,"{\\i1}",5)) {
358 #endif
359  p+=4;
360  }
361 #if 0 /* italic not implemented in renderer, ignore them for now */
362  else if(!strncmp(p,"{\\i0}",5) && temp_index+4<SUB_BUFSIZE) {
363  temp_line[temp_index++]='<';
364  temp_line[temp_index++]='/';
365  temp_line[temp_index++]='i';
366  temp_line[temp_index++]='>';
367 #else
368  else if(!strncmp(p,"{\\i0}",5)) {
369 #endif
370  p+=4;
371  }
372  else
373  temp_line[temp_index++]=*p;
374  break;
375  case '\r': /* just ignore '\r's */
376  break;
377  case '\n':
378  temp_line[temp_index++]='\0';
379  break;
380  default:
381  temp_line[temp_index++]=*p;
382  break;
383  }
384  if(temp_index>0) {
385  if(temp_index==SUB_BUFSIZE)
386  printf("Too many characters in a subtitle line\n");
387  if(temp_line[temp_index-1]=='\0' || temp_index==SUB_BUFSIZE) {
388  if(temp_index>1) { /* more than 1 char (including '\0') -> that is a valid one */
389  current->text[i]=(char *)malloc(temp_index);
390  if(!current->text[i])
391  return (subtitle_t *)ERR;
392  strncpy(current->text[i],temp_line,temp_index); /* temp_index<=SUB_BUFSIZE is always true here */
393  i++;
394  temp_index=0;
395  } else
396  end_sub=1;
397  }
398  }
399  }
400  } while(i<SUB_MAX_TEXT && !end_sub);
401  if(i>=SUB_MAX_TEXT)
402  printf("Too many lines in a subtitle\n");
403  current->lines=i;
404  return current;
405 }
406 
408  char line[LINE_LEN + 1];
409  int a1,a2,a3,b1,b2,b3;
410  char *p=nullptr, *next, *p2;
411  int i;
412 
413  memset (current, 0, sizeof(subtitle_t));
414 
415  while (!current->text[0]) {
416  if( demuxstr->next_line[0] == '\0' ) { /* if the buffer is empty.... */
417  if( !read_line_from_input(demuxstr, line, LINE_LEN) ) return nullptr;
418  } else {
419  /* ... get the current line from buffer. */
420  strncpy( line, demuxstr->next_line, LINE_LEN);
421  line[LINE_LEN] = '\0'; /* I'm scared. This makes me feel better. */
422  demuxstr->next_line[0] = '\0'; /* mark the buffer as empty. */
423  }
424  /* Initialize buffer with next line */
425  if( ! read_line_from_input( demuxstr, demuxstr->next_line, LINE_LEN) ) {
426  demuxstr->next_line[0] = '\0';
427  return nullptr;
428  }
429  if( (sscanf( line, "%d:%d:%d:", &a1, &a2, &a3) < 3) ||
430  (sscanf( demuxstr->next_line, "%d:%d:%d:", &b1, &b2, &b3) < 3) )
431  continue;
432  current->start = a1*360000+a2*6000+a3*100;
433  current->end = b1*360000+b2*6000+b3*100;
434  if ((current->end - current->start) > LINE_LEN)
435  current->end = current->start + LINE_LEN; /* not too long though. */
436  /* teraz czas na wkopiowanie stringu */
437  p=line;
438  /* finds the body of the subtitle_t */
439  for (i=0; i<3; i++){
440  p2=strchr( p, ':');
441  if( p2 == nullptr ) break;
442  p=p2+1;
443  }
444 
445  next=p;
446  i=0;
447  while( (next = sub_readtext( next, &(current->text[i]))) ) {
448  if (current->text[i]==ERR)
449  return (subtitle_t *)ERR;
450  i++;
451  if (i>=SUB_MAX_TEXT) {
452  printf("Too many lines in a subtitle\n");
453  current->lines=i;
454  return current;
455  }
456  }
457  current->lines=++i;
458  }
459  return current;
460 }
461 
463  /*
464  * TODO: This format uses quite rich (sub/super)set of xhtml
465  * I couldn't check it since DTD is not included.
466  * WARNING: full XML parses can be required for proper parsing
467  */
468  char line[LINE_LEN + 1];
469  int a1,a2,a3,a4,b1,b2,b3,b4;
470  char *p=nullptr,*next=nullptr;
471  int i,len,plen;
472 
473  memset (current, 0, sizeof(subtitle_t));
474 
475  while (!current->text[0]) {
476  if (!read_line_from_input(demuxstr, line, LINE_LEN)) return nullptr;
477  /*
478  * TODO: it seems that format of time is not easily determined, it may be 1:12, 1:12.0 or 0:1:12.0
479  * to describe the same moment in time. Maybe there are even more formats in use.
480  */
481  if ((len=sscanf (line, "<Time Begin=\"%d:%d:%d.%d\" End=\"%d:%d:%d.%d\"",&a1,&a2,&a3,&a4,&b1,&b2,&b3,&b4)) < 8)
482 
483  plen=a1=a2=a3=a4=b1=b2=b3=b4=0;
484  if (
485  ((len=sscanf (line, "<%*[tT]ime %*[bB]egin=\"%d:%d\" %*[Ee]nd=\"%d:%d\"%*[^<]<clear/>%n",&a2,&a3,&b2,&b3,&plen)) < 4) &&
486  ((len=sscanf (line, "<%*[tT]ime %*[bB]egin=\"%d:%d\" %*[Ee]nd=\"%d:%d.%d\"%*[^<]<clear/>%n",&a2,&a3,&b2,&b3,&b4,&plen)) < 5) &&
487  /* ((len=sscanf (line, "<%*[tT]ime %*[bB]egin=\"%d:%d.%d\" %*[Ee]nd=\"%d:%d\"%*[^<]<clear/>%n",&a2,&a3,&a4,&b2,&b3,&plen)) < 5) && */
488  ((len=sscanf (line, "<%*[tT]ime %*[bB]egin=\"%d:%d.%d\" %*[Ee]nd=\"%d:%d.%d\"%*[^<]<clear/>%n",&a2,&a3,&a4,&b2,&b3,&b4,&plen)) < 6) &&
489  ((len=sscanf (line, "<%*[tT]ime %*[bB]egin=\"%d:%d:%d.%d\" %*[Ee]nd=\"%d:%d:%d.%d\"%*[^<]<clear/>%n",&a1,&a2,&a3,&a4,&b1,&b2,&b3,&b4,&plen)) < 8)
490  )
491  continue;
492  current->start = a1*360000+a2*6000+a3*100+a4/10;
493  current->end = b1*360000+b2*6000+b3*100+b4/10;
494  p=line; p+=plen;i=0;
495  /* TODO: I don't know what kind of convention is here for marking multiline subs, maybe <br/> like in xml? */
496  next = strstr(line,"<clear/>")+8;i=0;
497  while ((next =sub_readtext (next, &(current->text[i])))) {
498  if (current->text[i]==ERR)
499  return (subtitle_t *)ERR;
500  i++;
501  if (i>=SUB_MAX_TEXT) {
502  printf("Too many lines in a subtitle\n");
503  current->lines=i;
504  return current;
505  }
506  }
507  current->lines=i+1;
508  }
509  return current;
510 }
511 
513  int comma;
514  static int max_comma = 32; /* let's use 32 for the case that the */
515  /* amount of commas increase with newer SSA versions */
516 
517  int hour1, min1, sec1, hunsec1, hour2, min2, sec2, hunsec2, nothing;
518  int num;
519  char line[LINE_LEN + 1], line3[LINE_LEN + 1], *line2;
520  char *tmp;
521 
522  do {
523  if (!read_line_from_input(demuxstr, line, LINE_LEN)) return nullptr;
524  } while (sscanf (line, "Dialogue: Marked=%d,%d:%d:%d.%d,%d:%d:%d.%d,"
525  "%" LINE_LEN_QUOT "[^\n\r]", &nothing,
526  &hour1, &min1, &sec1, &hunsec1,
527  &hour2, &min2, &sec2, &hunsec2,
528  line3) < 9
529  &&
530  sscanf (line, "Dialogue: %d,%d:%d:%d.%d,%d:%d:%d.%d,"
531  "%" LINE_LEN_QUOT "[^\n\r]", &nothing,
532  &hour1, &min1, &sec1, &hunsec1,
533  &hour2, &min2, &sec2, &hunsec2,
534  line3) < 9 );
535 
536  line2=strchr(line3, ',');
537  if (!line2)
538  return nullptr;
539 
540  for (comma = 4; comma < max_comma; comma ++)
541  {
542  tmp = line2;
543  if(!(tmp=strchr(++tmp, ','))) break;
544  if(*(++tmp) == ' ') break;
545  /* a space after a comma means we're already in a sentence */
546  line2 = tmp;
547  }
548 
549  if(comma < max_comma)max_comma = comma;
550  /* eliminate the trailing comma */
551  if(*line2 == ',') line2++;
552 
553  current->lines=0;num=0;
554  current->start = 360000*hour1 + 6000*min1 + 100*sec1 + hunsec1;
555  current->end = 360000*hour2 + 6000*min2 + 100*sec2 + hunsec2;
556 
557  while (((tmp=strstr(line2, "\\n")) != nullptr) || ((tmp=strstr(line2, "\\N")) != nullptr) ){
558  current->text[num]=(char *)malloc(tmp-line2+1);
559  strncpy (current->text[num], line2, tmp-line2);
560  current->text[num][tmp-line2]='\0';
561  line2=tmp+2;
562  num++;
563  current->lines++;
564  if (current->lines >= SUB_MAX_TEXT) return current;
565  }
566 
567  current->text[num]=strdup(line2);
568  current->lines++;
569 
570  return current;
571 }
572 
573 /* Sylvain "Skarsnik" Colinet <scolinet@gmail.com>
574  * From MPlayer subreader.c :
575  *
576  * PJS subtitles reader.
577  * That's the "Phoenix Japanimation Society" format.
578  * I found some of them in http://www.scriptsclub.org/ (used for anime).
579  * The time is in tenths of second.
580  *
581  * by set, based on code by szabi (dunnowhat sub format ;-)
582  */
583 
585  char line[LINE_LEN + 1];
586  char text[LINE_LEN + 1];
587  char *s, *d;
588 
589  memset (current, 0, sizeof(subtitle_t));
590 
591  if (!read_line_from_input(demuxstr, line, LINE_LEN))
592  return nullptr;
593  for (s = line; *s && isspace(*s); s++);
594  if (*s == 0)
595  return nullptr;
596  if (sscanf (line, "%ld,%ld,", &(current->start),
597  &(current->end)) <2)
598  return (subtitle_t *)ERR;
599  /* the files I have are in tenths of second */
600  current->start *= 10;
601  current->end *= 10;
602 
603  /* walk to the beggining of the string */
604  for (; *s; s++) if (*s==',') break;
605  if (*s) {
606  for (s++; *s; s++) if (*s==',') break;
607  if (*s) s++;
608  }
609  if (*s!='"') {
610  return (subtitle_t *)ERR;
611  }
612  /* copy the string to the text buffer */
613  for (s++, d=text; *s && *s!='"'; s++, d++)
614  *d=*s;
615  *d=0;
616  current->text[0] = strdup(text);
617  current->lines = 1;
618 
619  return current;
620 }
621 
623  char line[LINE_LEN + 1];
624  float a,b;
625  int num=0;
626  char *p, *q;
627 
628  do {
629  if (!read_line_from_input(demuxstr, line, LINE_LEN))
630  return nullptr;
631  } while (sscanf (line, "%f %f", &a, &b) !=2);
632 
633  demuxstr->mpsub_position += (a*100.0f);
634  current->start = (int) demuxstr->mpsub_position;
635  demuxstr->mpsub_position += (b*100.0f);
636  current->end = (int) demuxstr->mpsub_position;
637 
638  while (num < SUB_MAX_TEXT) {
639  if (!read_line_from_input(demuxstr, line, LINE_LEN))
640  return nullptr;
641 
642  p=line;
643  while (isspace(*p))
644  p++;
645 
646  if (eol(*p) && num > 0)
647  return current;
648 
649  if (eol(*p))
650  return nullptr;
651 
652  for (q=p; !eol(*q); q++);
653  *q='\0';
654  if (strlen(p)) {
655  current->text[num]=strdup(p);
656  printf(">%s<\n",p);
657  current->lines = ++num;
658  } else {
659  if (num)
660  return current;
661  else
662  return nullptr;
663  }
664  }
665 
666  return nullptr;
667 }
668 
670  char line[LINE_LEN + 1];
671 
672  memset (current, 0, sizeof(subtitle_t));
673 
674  while (true) {
675  /* try to locate next subtitle_t */
676  if (!read_line_from_input(demuxstr, line, LINE_LEN))
677  return nullptr;
678  if (!(sscanf (line, "-->> %ld", &(current->start)) <1))
679  break;
680  }
681 
682  if (!read_line_from_input(demuxstr, line, LINE_LEN))
683  return nullptr;
684 
685  sub_readtext((char *) &line,&current->text[0]);
686  current->lines = 1;
687  current->end = -1;
688 
689  if (!read_line_from_input(demuxstr, line, LINE_LEN))
690  return current;;
691 
692  sub_readtext((char *) &line,&current->text[1]);
693  current->lines = 2;
694 
695  if ((current->text[0][0]==0) && (current->text[1][0]==0)) {
696  return nullptr;
697  }
698 
699  return current;
700 }
701 
703  char line1[LINE_LEN+1], line2[LINE_LEN+1], directive[LINE_LEN+1], *p, *q;
704  unsigned a1, a2, a3, a4, b1, b2, b3, b4, comment = 0;
705  static unsigned jacoTimeres = 30;
706  static int jacoShift = 0;
707 
708  memset(current, 0, sizeof(subtitle_t));
709  memset(line1, 0, LINE_LEN+1);
710  memset(line2, 0, LINE_LEN+1);
711  memset(directive, 0, LINE_LEN+1);
712  while (!current->text[0]) {
713  if (!read_line_from_input(demuxstr, line1, LINE_LEN)) {
714  return nullptr;
715  }
716  if (sscanf
717  (line1, "%u:%u:%u.%u %u:%u:%u.%u %" LINE_LEN_QUOT "[^\n\r]", &a1, &a2, &a3, &a4,
718  &b1, &b2, &b3, &b4, line2) < 9) {
719  if (sscanf(line1, "@%u @%u %" LINE_LEN_QUOT "[^\n\r]", &a4, &b4, line2) < 3) {
720  if (line1[0] == '#') {
721  int hours = 0, minutes = 0, seconds, delta, inverter =
722  1;
723  unsigned units = jacoShift;
724  switch (toupper(line1[1])) {
725  case 'S':
726  if (isalpha(line1[2])) {
727  delta = 6;
728  } else {
729  delta = 2;
730  }
731  if (sscanf(&line1[delta], "%d", &hours)) {
732  if (hours < 0) {
733  hours *= -1;
734  inverter = -1;
735  }
736  if (sscanf(&line1[delta], "%*d:%d", &minutes)) {
737  if (sscanf
738  (&line1[delta], "%*d:%*d:%d",
739  &seconds)) {
740  sscanf(&line1[delta], "%*d:%*d:%*d.%u",
741  &units);
742  } else {
743  hours = 0;
744  sscanf(&line1[delta], "%d:%d.%u",
745  &minutes, &seconds, &units);
746  minutes *= inverter;
747  }
748  } else {
749  hours = minutes = 0;
750  sscanf(&line1[delta], "%d.%u", &seconds,
751  &units);
752  seconds *= inverter;
753  }
754  jacoShift =
755  ((hours * 3600 + minutes * 60 +
756  seconds) * jacoTimeres +
757  units) * inverter;
758  }
759  break;
760  case 'T':
761  if (isalpha(line1[2])) {
762  delta = 8;
763  } else {
764  delta = 2;
765  }
766  sscanf(&line1[delta], "%u", &jacoTimeres);
767  break;
768  }
769  }
770  continue;
771  } else {
772  current->start =
773  (unsigned long) ((a4 + jacoShift) * 100.0 /
774  jacoTimeres);
775  current->end =
776  (unsigned long) ((b4 + jacoShift) * 100.0 /
777  jacoTimeres);
778  }
779  } else {
780  current->start =
781  (unsigned
782  long) (((a1 * 3600 + a2 * 60 + a3) * jacoTimeres + a4 +
783  jacoShift) * 100.0 / jacoTimeres);
784  current->end =
785  (unsigned
786  long) (((b1 * 3600 + b2 * 60 + b3) * jacoTimeres + b4 +
787  jacoShift) * 100.0 / jacoTimeres);
788  }
789  current->lines = 0;
790  p = line2;
791  while ((*p == ' ') || (*p == '\t')) {
792  ++p;
793  }
794  if (isalpha(*p)||*p == '[') {
795  int cont, jLength;
796 
797  if (sscanf(p, "%" LINE_LEN_QUOT "s %" LINE_LEN_QUOT "[^\n\r]", directive, line1) < 2)
798  return (subtitle_t *)ERR;
799  jLength = strlen(directive);
800  for (cont = 0; cont < jLength; ++cont) {
801  if (isalpha(*(directive + cont)))
802  *(directive + cont) = toupper(*(directive + cont));
803  }
804  if ((strstr(directive, "RDB") != nullptr)
805  || (strstr(directive, "RDC") != nullptr)
806  || (strstr(directive, "RLB") != nullptr)
807  || (strstr(directive, "RLG") != nullptr)) {
808  continue;
809  }
810  /* no alignment */
811 #if 0
812  if (strstr(directive, "JL") != nullptr) {
813  current->alignment = SUB_ALIGNMENT_HLEFT;
814  } else if (strstr(directive, "JR") != nullptr) {
815  current->alignment = SUB_ALIGNMENT_HRIGHT;
816  } else {
817  current->alignment = SUB_ALIGNMENT_HCENTER;
818  }
819 #endif
820  strcpy(line2, line1);
821  p = line2;
822  }
823  for (q = line1; (!eol(*p)) && (current->lines < SUB_MAX_TEXT); ++p) {
824  switch (*p) {
825  case '{':
826  comment++;
827  break;
828  case '}':
829  if (comment) {
830  --comment;
831  /* the next line to get rid of a blank after the comment */
832  if ((*(p + 1)) == ' ')
833  p++;
834  }
835  break;
836  case '~':
837  if (!comment) {
838  *q = ' ';
839  ++q;
840  }
841  break;
842  case ' ':
843  case '\t':
844  if ((*(p + 1) == ' ') || (*(p + 1) == '\t'))
845  break;
846  if (!comment) {
847  *q = ' ';
848  ++q;
849  }
850  break;
851  case '\\':
852  if (*(p + 1) == 'n') {
853  *q = '\0';
854  q = line1;
855  current->text[current->lines++] = strdup(line1);
856  ++p;
857  break;
858  }
859  if ((toupper(*(p + 1)) == 'C')
860  || (toupper(*(p + 1)) == 'F')) {
861  ++p,++p;
862  break;
863  }
864  if ((*(p + 1) == 'B') || (*(p + 1) == 'b') ||
865  /* actually this means "insert current date here" */
866  (*(p + 1) == 'D') ||
867  (*(p + 1) == 'I') || (*(p + 1) == 'i') ||
868  (*(p + 1) == 'N') ||
869  /* actually this means "insert current time here" */
870  (*(p + 1) == 'T') ||
871  (*(p + 1) == 'U') || (*(p + 1) == 'u')) {
872  ++p;
873  break;
874  }
875  if ((*(p + 1) == '\\') ||
876  (*(p + 1) == '~') || (*(p + 1) == '{')) {
877  ++p;
878  } else if (eol(*(p + 1))) {
879  if (!read_line_from_input(demuxstr, directive, LINE_LEN))
880  return nullptr;
881  trail_space(directive);
882  strncat(line2, directive,
883  ((LINE_LEN > 511) ? LINE_LEN-1 : 511)
884  - strlen(line2));
885  break;
886  }
887  // Checked xine-lib-1.2.8. No fix there. Seems like it
888  // should be a break.
889  break;
890  default:
891  if (!comment) {
892  *q = *p;
893  ++q;
894  }
895  }
896  }
897  *q = '\0';
898  if (current->lines < SUB_MAX_TEXT)
899  current->text[current->lines] = strdup(line1);
900  else
901  printf ("Too many lines in a subtitle\n");
902  }
903  current->lines++;
904  return current;
905 }
906 
908  char line[LINE_LEN+1];
909  int a1,a2,a3,a4;
910  char *p=nullptr;
911  int i,len;
912 
913  while (!current->text[0]) {
914  if (!read_line_from_input(demuxstr, line, LINE_LEN)) return nullptr;
915  if (line[0]!='{')
916  continue;
917  if ((len=sscanf (line, "{T %d:%d:%d:%d",&a1,&a2,&a3,&a4)) < 4)
918  continue;
919  current->start = a1*360000+a2*6000+a3*100+a4/10;
920  for (i=0; i<SUB_MAX_TEXT;) {
921  if (!read_line_from_input(demuxstr, line, LINE_LEN)) break;
922  if (line[0]=='}') break;
923  len=0;
924  for (p=line; *p!='\n' && *p!='\r' && *p; ++p,++len);
925  if (len) {
926  current->text[i]=(char *)malloc (len+1);
927  if (!current->text[i]) return (subtitle_t *)ERR;
928  strncpy (current->text[i], line, len); current->text[i][len]='\0';
929  ++i;
930  } else {
931  break;
932  }
933  }
934  current->lines=i;
935  }
936  return current;
937 }
938 
940  char line[LINE_LEN + 1];
941  char *next;
942  int h, m, s;
943  int i;
944 
945  memset (current, 0, sizeof(subtitle_t));
946 
947  do {
948  if (!read_line_from_input (demuxstr, line, LINE_LEN)) return nullptr;
949  } while (sscanf (line, "[%d:%d:%d]", &h, &m, &s) != 3);
950 
951  if (!read_line_from_input (demuxstr, line, LINE_LEN)) return nullptr;
952 
953  current->start = 360000 * h + 6000 * m + 100 * s;
954  current->end = -1;
955 
956  next=line;
957  i=0;
958  while ((next = sub_readtext (next, &(current->text[i])))) {
959  if (current->text[i]==ERR) return (subtitle_t *)ERR;
960  i++;
961  if (i>=SUB_MAX_TEXT) {
962  printf("Too many lines in a subtitle\n");
963  current->lines=i;
964  return current;
965  }
966  }
967  current->lines= ++i;
968 
969  return current;
970 }
971 
972 /* Code from subreader.c of MPlayer
973 ** Sylvain "Skarsnik" Colinet <scolinet@gmail.com>
974 */
975 
977  char line[LINE_LEN+1];
978  char line2[LINE_LEN+1];
979  char *p, *next;
980  int i;
981 
982  memset (current, 0, sizeof(subtitle_t));
983  do {
984  if (!read_line_from_input (demuxstr, line, LINE_LEN)) return nullptr;
985  } while ((sscanf (line,
986  "[%ld][%ld]%" LINE_LEN_QUOT "[^\r\n]",
987  &(current->start), &(current->end), line2) < 3));
988  current->start *= 10;
989  current->end *= 10;
990  p=line2;
991 
992  next=p, i=0;
993  while ((next = sub_readtext (next, &(current->text[i])))) {
994  if (current->text[i] == ERR) {return (subtitle_t *)ERR;}
995  i++;
996  if (i >= SUB_MAX_TEXT) {
997  printf("Too many lines in a subtitle\n");
998  current->lines = i;
999  return current;
1000  }
1001  }
1002  current->lines= ++i;
1003 
1004  return current;
1005 }
1006 
1007 
1008 static int sub_autodetect (demux_sputext_t *demuxstr) {
1009 
1010  char line[LINE_LEN + 1];
1011  int i, j=0;
1012  char p;
1013 
1014  while (j < 100) {
1015  j++;
1016  if (!read_line_from_input(demuxstr, line, LINE_LEN))
1017  return FORMAT_UNKNOWN;
1018 
1019  if ((sscanf (line, "{%d}{}", &i)==1) ||
1020  (sscanf (line, "{%d}{%d}", &i, &i)==2)) {
1021  demuxstr->uses_time=0;
1022  return FORMAT_MICRODVD;
1023  }
1024 
1025  if (sscanf (line, "%d:%d:%d%*[,.]%d --> %d:%d:%d%*[,.]%d", &i, &i, &i, &i, &i, &i, &i, &i)==8) {
1026  demuxstr->uses_time=1;
1027  return FORMAT_SUBRIP;
1028  }
1029 
1030  if (sscanf (line, "%d:%d:%d.%d,%d:%d:%d.%d", &i, &i, &i, &i, &i, &i, &i, &i)==8){
1031  demuxstr->uses_time=1;
1032  return FORMAT_SUBVIEWER;
1033  }
1034 
1035  if (sscanf (line, "%d:%d:%d,%d,%d:%d:%d,%d", &i, &i, &i, &i, &i, &i, &i, &i)==8){
1036  demuxstr->uses_time=1;
1037  return FORMAT_SUBVIEWER;
1038  }
1039 
1040  if (strstr (line, "<SAMI>")) {
1041  demuxstr->uses_time=1;
1042  return FORMAT_SAMI;
1043  }
1044  if (sscanf (line, "%d:%d:%d:", &i, &i, &i )==3) {
1045  demuxstr->uses_time=1;
1046  return FORMAT_VPLAYER;
1047  }
1048  /*
1049  * A RealText format is a markup language, starts with <window> tag,
1050  * options (behaviour modifiers) are possible.
1051  */
1052  if ( !strcasecmp(line, "<window") ) {
1053  demuxstr->uses_time=1;
1054  return FORMAT_RT;
1055  }
1056  if ((!memcmp(line, "Dialogue: Marked", 16)) || (!memcmp(line, "Dialogue: ", 10))) {
1057  demuxstr->uses_time=1;
1058  return FORMAT_SSA;
1059  }
1060  if (sscanf (line, "%d,%d,\"%c", &i, &i, (char *) &i) == 3) {
1061  demuxstr->uses_time=0;
1062  return FORMAT_PJS;
1063  }
1064  if (sscanf (line, "FORMAT=%d", &i) == 1) {
1065  demuxstr->uses_time=0;
1066  return FORMAT_MPSUB;
1067  }
1068  if (sscanf (line, "FORMAT=TIM%c", &p)==1 && p=='E') {
1069  demuxstr->uses_time=1;
1070  return FORMAT_MPSUB;
1071  }
1072  if (strstr (line, "-->>")) {
1073  demuxstr->uses_time=0;
1074  return FORMAT_AQTITLE;
1075  }
1076  if (sscanf(line, "@%d @%d", &i, &i) == 2 ||
1077  sscanf(line, "%d:%d:%d.%d %d:%d:%d.%d", &i, &i, &i, &i, &i, &i, &i, &i) == 8) {
1078  demuxstr->uses_time = 1;
1079  return FORMAT_JACOBSUB;
1080  }
1081  if (sscanf(line, "{T %d:%d:%d:%d",&i, &i, &i, &i) == 4) {
1082  demuxstr->uses_time = 1;
1083  return FORMAT_SUBVIEWER2;
1084  }
1085  if (sscanf(line, "[%d:%d:%d]", &i, &i, &i) == 3) {
1086  demuxstr->uses_time = 1;
1087  return FORMAT_SUBRIP09;
1088  }
1089 
1090  if (sscanf (line, "[%d][%d]", &i, &i) == 2) {
1091  demuxstr->uses_time = 1;
1092  return FORMAT_MPL2;
1093  }
1094  }
1095  return FORMAT_UNKNOWN; /* too many bad lines */
1096 }
1097 
1099 
1100  int n_max;
1101  int timeout;
1102  subtitle_t *first;
1103  subtitle_t * (*func[])(demux_sputext_t *demuxstr,subtitle_t *dest)=
1104  {
1119  };
1120 
1121  /* Rewind (sub_autodetect() needs to read input from the beginning) */
1122  demuxstr->rbuffer_cur = 0;
1123  demuxstr->buflen = 0;
1124  demuxstr->emptyReads = 0;
1125 
1126  demuxstr->format=sub_autodetect (demuxstr);
1127  if (demuxstr->format==FORMAT_UNKNOWN) {
1128  return nullptr;
1129  }
1130 
1131  /*printf("Detected subtitle file format: %d\n", demuxstr->format);*/
1132 
1133  /* Rewind */
1134  demuxstr->rbuffer_cur = 0;
1135  demuxstr->buflen = 0;
1136  demuxstr->emptyReads = 0;
1137 
1138  demuxstr->num=0;n_max=32;
1139  first = (subtitle_t *) malloc(n_max*sizeof(subtitle_t));
1140  if(!first) return nullptr;
1141  timeout = MAX_TIMEOUT;
1142 
1143  if (demuxstr->uses_time) timeout *= 100;
1144  else timeout *= 10;
1145 
1146  while(true) {
1147  subtitle_t *sub;
1148 
1149  if(demuxstr->num>=n_max){
1150  n_max+=16;
1151  subtitle_t *new_first=(subtitle_t *)realloc(first,n_max*sizeof(subtitle_t));
1152  if (new_first == nullptr) {
1153  free(first);
1154  return nullptr;
1155  }
1156  first = new_first;
1157  }
1158 
1159  sub = func[demuxstr->format] (demuxstr, &first[demuxstr->num]);
1160 
1161  if (!sub) {
1162  break; /* EOF */
1163  } else {
1164  demuxstr->emptyReads = 0;
1165  }
1166 
1167  if (sub==ERR)
1168  ++demuxstr->errs;
1169  else {
1170  if (demuxstr->num > 0 && first[demuxstr->num-1].end == -1) {
1171  /* end time not defined in the subtitle */
1172  if (timeout > 0) {
1173  /* timeout */
1174  if (timeout > sub->start - first[demuxstr->num-1].start) {
1175  first[demuxstr->num-1].end = sub->start;
1176  } else
1177  first[demuxstr->num-1].end = first[demuxstr->num-1].start + timeout;
1178  } else {
1179  /* no timeout */
1180  first[demuxstr->num-1].end = sub->start;
1181  }
1182  }
1183  ++demuxstr->num; /* Error vs. Valid */
1184  }
1185  }
1186  /* timeout of last subtitle */
1187  if (demuxstr->num > 0 && first[demuxstr->num-1].end == -1)
1188  if (timeout > 0) {
1189  first[demuxstr->num-1].end = first[demuxstr->num-1].start + timeout;
1190  }
1191 
1192 #ifdef DEBUG_XINE_DEMUX_SPUTEXT
1193  {
1194  char buffer[1024];
1195 
1196  sprintf(buffer, "Read %i subtitles", demuxstr->num);
1197 
1198  if(demuxstr->errs)
1199  sprintf(buffer + strlen(buffer), ", %i bad line(s).\n", demuxstr->errs);
1200  else
1201  strcat(buffer, "\n");
1202 
1203  printf("%s", buffer);
1204  }
1205 #endif
1206 
1207  return first;
1208 }
#define MAX_TIMEOUT
#define LINE_LEN
static int eol(char p)
#define SUB_MAX_TEXT
static subtitle_t * sub_read_line_subviewer2(demux_sputext_t *demuxstr, subtitle_t *current)
#define FORMAT_AQTITLE
long end
Ending time in msec or starting frame.
static subtitle_t * sub_read_line_microdvd(demux_sputext_t *demuxstr, subtitle_t *current)
#define FORMAT_MPSUB
subtitle_t * sub_read_file(demux_sputext_t *demuxstr)
#define FORMAT_SSA
#define SUB_BUFSIZE
#define FORMAT_SUBVIEWER2
static subtitle_t * sub_read_line_subrip(demux_sputext_t *demuxstr, subtitle_t *current)
long long copy(QFile &dst, QFile &src, uint block_size)
Copies src file to dst file.
static guint32 * tmp
Definition: goom_core.c:35
char buf[SUB_BUFSIZE]
#define off_t
unsigned char b
Definition: ParseText.cpp:340
static subtitle_t * sub_read_line_vplayer(demux_sputext_t *demuxstr, subtitle_t *current)
static subtitle_t * sub_read_line_sami(demux_sputext_t *demuxstr, subtitle_t *current)
#define FORMAT_UNKNOWN
static subtitle_t * sub_read_line_jacobsub(demux_sputext_t *demuxstr, subtitle_t *current)
#define FORMAT_SUBRIP09
static const uint16_t * d
QDateTime current(bool stripped)
Returns current Date and Time in UTC.
Definition: mythdate.cpp:10
static subtitle_t * sub_read_line_subrip09(demux_sputext_t *demuxstr, subtitle_t *current)
#define FORMAT_SUBVIEWER
#define FORMAT_SAMI
static char * read_line_from_input(demux_sputext_t *demuxstr, char *line, off_t len)
#define FORMAT_VPLAYER
static int sub_autodetect(demux_sputext_t *demuxstr)
static subtitle_t * sub_read_line_rt(demux_sputext_t *demuxstr, subtitle_t *current)
#define FORMAT_PJS
PictureAttribute next(PictureAttributeSupported supported, PictureAttribute attribute)
static subtitle_t * sub_read_line_mpl2(demux_sputext_t *demuxstr, subtitle_t *current)
static subtitle_t * sub_read_line_ssa(demux_sputext_t *demuxstr, subtitle_t *current)
static subtitle_t * sub_read_line_pjs(demux_sputext_t *demuxstr, subtitle_t *current)
static char * sub_readtext(char *source, char **dest)
#define FORMAT_JACOBSUB
static guint32 * p2
Definition: goom_core.c:35
static subtitle_t * sub_read_line_subviewer(demux_sputext_t *demuxstr, subtitle_t *current)
static subtitle_t * sub_read_line_aqt(demux_sputext_t *demuxstr, subtitle_t *current)
static void trail_space(char *s)
static subtitle_t * sub_read_line_mpsub(demux_sputext_t *demuxstr, subtitle_t *current)
#define FORMAT_SUBRIP
#define ERR
long start
Starting time in msec or starting frame.
#define FORMAT_MPL2
#define FORMAT_RT
#define FORMAT_MICRODVD
char next_line[SUB_BUFSIZE]
#define LINE_LEN_QUOT