MythTV  master
mythframe.cpp
Go to the documentation of this file.
1 //
2 // mythframe.cpp
3 // MythTV
4 //
5 // Created by Jean-Yves Avenard on 10/06/2014.
6 // Copyright (c) 2014 Bubblestuff Pty Ltd. All rights reserved.
7 //
8 // derived from copy.c: Fast YV12/NV12 copy from VLC project
9 // portion of SSE Code Copyright (C) 2010 Laurent Aimar
10 
11 /******************************************************************************
12  * This program is free software; you can redistribute it and/or modify it
13  * under the terms of the GNU Lesser General Public License as published by
14  * the Free Software Foundation; either version 2.1 of the License, or
15  * (at your option) any later version.
16  *
17  * This program is distributed in the hope that it will be useful,
18  * but WITHOUT ANY WARRANTY; without even the implied warranty of
19  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20  * GNU Lesser General Public License for more details.
21  *
22  * You should have received a copy of the GNU Lesser General Public License
23  * along with this program; if not, write to the Free Software Foundation,
24  * Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
25  *****************************************************************************/
26 
27 #include <mythtimer.h>
28 #include "mythconfig.h"
29 #include "mythframe.h"
30 #include "mythcorecontext.h"
31 #include "mythlogging.h"
32 
33 extern "C" {
34 #include "libavcodec/avcodec.h"
35 }
36 
37 #ifndef __MAX
38 # define __MAX(a, b) ( ((a) > (b)) ? (a) : (b) )
39 #endif
40 #ifndef __MIN
41 # define __MIN(a, b) ( ((a) < (b)) ? (a) : (b) )
42 #endif
43 
44 #if ARCH_X86
45 
46 static int has_sse2 = -1;
47 static int has_sse3 = -1;
48 static int has_ssse3 = -1;
49 static int has_sse4 = -1;
50 
51 #if defined _WIN32 && !defined __MINGW32__
52 // Windows
53 #define cpuid __cpuid
54 
55 #else
56 inline void cpuid(int CPUInfo[4],int InfoType)
57 {
58  __asm__ __volatile__ (
59  // pic requires to save ebx/rbx
60 #if ARCH_X86_32
61  "push %%ebx\n"
62 #endif
63  "cpuid\n"
64  "movl %%ebx ,%[ebx]\n"
65 #if ARCH_X86_32
66  "pop %%ebx\n"
67 #endif
68  :"=a" (CPUInfo[0]),
69  [ebx] "=r"(CPUInfo[1]),
70  "=c" (CPUInfo[2]),
71  "=d" (CPUInfo[3])
72  :"a" (InfoType)
73  );
74 }
75 #endif
76 
77 static inline bool sse2_check()
78 {
79  if (has_sse2 != -1)
80  {
81  return has_sse2;
82  }
83 
84  int info[4];
85  cpuid(info, 0);
86  int nIds = info[0];
87 
88  // Detect Features
89  if (nIds >= 0x00000001)
90  {
91  cpuid(info,0x00000001);
92  has_sse2 = (info[3] & (1 << 26)) != 0;
93  has_sse3 = (info[2] & (1 << 0)) != 0;
94  has_ssse3 = (info[2] & (1 << 9)) != 0;
95  has_sse4 = (info[2] & (1 << 19)) != 0;
96  }
97  else
98  {
99  has_sse2 = 0;
100  has_sse3 = 0;
101  has_ssse3 = 0;
102  has_sse4 = 0;
103  }
104  return has_sse2;
105 }
106 
107 static inline bool sse3_check()
108 {
109  if (has_sse3 != -1)
110  {
111  return has_sse3;
112  }
113 
114  sse2_check();
115 
116  return has_sse3;
117 }
118 
119 static inline bool ssse3_check()
120 {
121  if (has_ssse3 != -1)
122  {
123  return has_ssse3;
124  }
125 
126  sse2_check();
127 
128  return has_ssse3;
129 }
130 
131 static inline bool sse4_check()
132 {
133  if (has_sse4 != -1)
134  {
135  return has_sse4;
136  }
137 
138  sse2_check();
139 
140  return has_sse4;
141 }
142 
143 static inline void SSE_splitplanes(uint8_t* dstu, int dstu_pitch,
144  uint8_t* dstv, int dstv_pitch,
145  const uint8_t* src, int src_pitch,
146  int width, int height)
147 {
148  const uint8_t shuffle[] = { 0, 2, 4, 6, 8, 10, 12, 14,
149  1, 3, 5, 7, 9, 11, 13, 15 };
150  const uint8_t mask[] = { 0xff, 0x00, 0xff, 0x00, 0xff, 0x00, 0xff, 0x00,
151  0xff, 0x00, 0xff, 0x00, 0xff, 0x00, 0xff, 0x00 };
152  const bool sse3 = sse3_check();
153  const bool ssse3 = ssse3_check();
154 
155  asm volatile ("mfence");
156 
157 #define LOAD64A \
158  "movdqa 0(%[src]), %%xmm0\n" \
159  "movdqa 16(%[src]), %%xmm1\n" \
160  "movdqa 32(%[src]), %%xmm2\n" \
161  "movdqa 48(%[src]), %%xmm3\n"
162 
163 #define LOAD64U \
164  "movdqu 0(%[src]), %%xmm0\n" \
165  "movdqu 16(%[src]), %%xmm1\n" \
166  "movdqu 32(%[src]), %%xmm2\n" \
167  "movdqu 48(%[src]), %%xmm3\n"
168 
169 #define STORE2X32 \
170  "movq %%xmm0, 0(%[dst1])\n" \
171  "movq %%xmm1, 8(%[dst1])\n" \
172  "movhpd %%xmm0, 0(%[dst2])\n" \
173  "movhpd %%xmm1, 8(%[dst2])\n" \
174  "movq %%xmm2, 16(%[dst1])\n" \
175  "movq %%xmm3, 24(%[dst1])\n" \
176  "movhpd %%xmm2, 16(%[dst2])\n" \
177  "movhpd %%xmm3, 24(%[dst2])\n"
178 
179  for (int y = 0; y < height; y++)
180  {
181  int x = 0;
182 
183  if (((uintptr_t)src & 0xf) == 0)
184  {
185  if (sse3 && ssse3)
186  {
187  for (; x < (width & ~31); x += 32)
188  {
189  asm volatile (
190  "movdqu (%[shuffle]), %%xmm7\n"
191  LOAD64A
192  "pshufb %%xmm7, %%xmm0\n"
193  "pshufb %%xmm7, %%xmm1\n"
194  "pshufb %%xmm7, %%xmm2\n"
195  "pshufb %%xmm7, %%xmm3\n"
196  STORE2X32
197  : : [dst1]"r"(&dstu[x]), [dst2]"r"(&dstv[x]), [src]"r"(&src[2*x]), [shuffle]"r"(shuffle) : "memory", "xmm0", "xmm1", "xmm2", "xmm3", "xmm7");
198  }
199  }
200  else
201  {
202  for (; x < (width & ~31); x += 32)
203  {
204  asm volatile (
205  "movdqu (%[mask]), %%xmm7\n"
206  LOAD64A
207  "movdqa %%xmm0, %%xmm4\n"
208  "movdqa %%xmm1, %%xmm5\n"
209  "movdqa %%xmm2, %%xmm6\n"
210  "psrlw $8, %%xmm0\n"
211  "psrlw $8, %%xmm1\n"
212  "pand %%xmm7, %%xmm4\n"
213  "pand %%xmm7, %%xmm5\n"
214  "pand %%xmm7, %%xmm6\n"
215  "packuswb %%xmm4, %%xmm0\n"
216  "packuswb %%xmm5, %%xmm1\n"
217  "pand %%xmm3, %%xmm7\n"
218  "psrlw $8, %%xmm2\n"
219  "psrlw $8, %%xmm3\n"
220  "packuswb %%xmm6, %%xmm2\n"
221  "packuswb %%xmm7, %%xmm3\n"
222  STORE2X32
223  : : [dst2]"r"(&dstu[x]), [dst1]"r"(&dstv[x]), [src]"r"(&src[2*x]), [mask]"r"(mask) : "memory", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7");
224  }
225  }
226  }
227  else
228  {
229  if (sse3 && ssse3)
230  {
231  for (; x < (width & ~31); x += 32)
232  {
233  asm volatile (
234  "movdqu (%[shuffle]), %%xmm7\n"
235  LOAD64U
236  "pshufb %%xmm7, %%xmm0\n"
237  "pshufb %%xmm7, %%xmm1\n"
238  "pshufb %%xmm7, %%xmm2\n"
239  "pshufb %%xmm7, %%xmm3\n"
240  STORE2X32
241  : : [dst1]"r"(&dstu[x]), [dst2]"r"(&dstv[x]), [src]"r"(&src[2*x]), [shuffle]"r"(shuffle) : "memory", "xmm0", "xmm1", "xmm2", "xmm3", "xmm7");
242  }
243  }
244  else
245  {
246  for (; x < (width & ~31); x += 32)
247  {
248  asm volatile (
249  "movdqu (%[mask]), %%xmm7\n"
250  LOAD64U
251  "movdqu %%xmm0, %%xmm4\n"
252  "movdqu %%xmm1, %%xmm5\n"
253  "movdqu %%xmm2, %%xmm6\n"
254  "psrlw $8, %%xmm0\n"
255  "psrlw $8, %%xmm1\n"
256  "pand %%xmm7, %%xmm4\n"
257  "pand %%xmm7, %%xmm5\n"
258  "pand %%xmm7, %%xmm6\n"
259  "packuswb %%xmm4, %%xmm0\n"
260  "packuswb %%xmm5, %%xmm1\n"
261  "pand %%xmm3, %%xmm7\n"
262  "psrlw $8, %%xmm2\n"
263  "psrlw $8, %%xmm3\n"
264  "packuswb %%xmm6, %%xmm2\n"
265  "packuswb %%xmm7, %%xmm3\n"
266  STORE2X32
267  : : [dst2]"r"(&dstu[x]), [dst1]"r"(&dstv[x]), [src]"r"(&src[2*x]), [mask]"r"(mask) : "memory", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7");
268  }
269  }
270  }
271 
272  for (; x < width; x++)
273  {
274  dstu[x] = src[2*x+0];
275  dstv[x] = src[2*x+1];
276  }
277  src += src_pitch;
278  dstu += dstu_pitch;
279  dstv += dstv_pitch;
280  }
281  asm volatile ("mfence");
282 
283 #undef STORE2X32
284 #undef LOAD64U
285 #undef LOAD64A
286 }
287 #endif /* ARCH_X86 */
288 
289 static inline void copyplane(uint8_t* dst, int dst_pitch,
290  const uint8_t* src, int src_pitch,
291  int width, int height)
292 {
293  for (int y = 0; y < height; y++)
294  {
295  memcpy(dst, src, width);
296  src += src_pitch;
297  dst += dst_pitch;
298  }
299 }
300 
301 static void splitplanes(uint8_t* dstu, int dstu_pitch,
302  uint8_t* dstv, int dstv_pitch,
303  const uint8_t* src, int src_pitch,
304  int width, int height)
305 {
306  for (int y = 0; y < height; y++)
307  {
308  for (int x = 0; x < width; x++)
309  {
310  dstu[x] = src[2*x+0];
311  dstv[x] = src[2*x+1];
312  }
313  src += src_pitch;
314  dstu += dstu_pitch;
315  dstv += dstv_pitch;
316  }
317 }
318 
319 void framecopy(VideoFrame* dst, const VideoFrame* src, bool useSSE)
320 {
321  VideoFrameType codec = dst->codec;
322  if (!(dst->codec == src->codec ||
323  (src->codec == FMT_NV12 && dst->codec == FMT_YV12)))
324  return;
325 
327  dst->repeat_pict = src->repeat_pict;
328  dst->top_field_first = src->top_field_first;
329 
330  if (FMT_YV12 == codec)
331  {
332  int width = src->width;
333  int height = src->height;
334  int dwidth = dst->width;
335  int dheight = dst->height;
336 
337  if (src->codec == FMT_NV12 &&
338  height == dheight && width == dwidth)
339  {
340  copyplane(dst->buf + dst->offsets[0], dst->pitches[0],
341  src->buf + src->offsets[0], src->pitches[0],
342  width, height);
343 #if ARCH_X86
344  if (useSSE && sse2_check())
345  {
346  SSE_splitplanes(dst->buf + dst->offsets[1], dst->pitches[1],
347  dst->buf + dst->offsets[2], dst->pitches[2],
348  src->buf + src->offsets[1], src->pitches[1],
349  (width+1) / 2, (height+1) / 2);
350  asm volatile ("emms");
351  return;
352  }
353 #else
354  Q_UNUSED(useSSE);
355 #endif
356  splitplanes(dst->buf + dst->offsets[1], dst->pitches[1],
357  dst->buf + dst->offsets[2], dst->pitches[2],
358  src->buf + src->offsets[1], src->pitches[1],
359  (width+1) / 2, (height+1) / 2);
360  return;
361  }
362 
363  if (dst->pitches[0] != src->pitches[0] ||
364  dst->pitches[1] != src->pitches[1] ||
365  dst->pitches[2] != src->pitches[2])
366  {
367  // We have a different stride between the two frames
368  // drop the garbage data
369  height = (dst->height < src->height) ? dst->height : src->height;
370  width = (dst->width < src->width) ? dst->width : src->width;
371  copyplane(dst->buf + dst->offsets[0], dst->pitches[0],
372  src->buf + src->offsets[0], src->pitches[0],
373  width, height);
374  copyplane(dst->buf + dst->offsets[1], dst->pitches[1],
375  src->buf + src->offsets[1], src->pitches[1],
376  (width+1) / 2, (height+1) / 2);
377  copyplane(dst->buf + dst->offsets[2], dst->pitches[2],
378  src->buf + src->offsets[2], src->pitches[2],
379  (width+1) / 2, (height+1) / 2);
380  return;
381  }
382 
383  int height0 = (dst->height < src->height) ? dst->height : src->height;
384  int height1 = (height0+1) >> 1;
385  int height2 = (height0+1) >> 1;
386  int pitch0 = ((dst->pitches[0] < src->pitches[0]) ?
387  dst->pitches[0] : src->pitches[0]);
388  int pitch1 = ((dst->pitches[1] < src->pitches[1]) ?
389  dst->pitches[1] : src->pitches[1]);
390  int pitch2 = ((dst->pitches[2] < src->pitches[2]) ?
391  dst->pitches[2] : src->pitches[2]);
392 
393  memcpy(dst->buf + dst->offsets[0],
394  src->buf + src->offsets[0], pitch0 * height0);
395  memcpy(dst->buf + dst->offsets[1],
396  src->buf + src->offsets[1], pitch1 * height1);
397  memcpy(dst->buf + dst->offsets[2],
398  src->buf + src->offsets[2], pitch2 * height2);
399  }
400 }
401 
402 /***************************************
403  * USWC Fast Copy
404  *
405  * https://software.intel.com/en-us/articles/copying-accelerated-video-decode-frame-buffers:
406  ***************************************/
407 #if ARCH_X86
408 #define COPY16(dstp, srcp, load, store) \
409  asm volatile ( \
410  load " 0(%[src]), %%xmm1\n" \
411  store " %%xmm1, 0(%[dst])\n" \
412  : : [dst]"r"(dstp), [src]"r"(srcp) : "memory", "xmm1")
413 
414 #define COPY64(dstp, srcp, load, store) \
415  asm volatile ( \
416  load " 0(%[src]), %%xmm1\n" \
417  load " 16(%[src]), %%xmm2\n" \
418  load " 32(%[src]), %%xmm3\n" \
419  load " 48(%[src]), %%xmm4\n" \
420  store " %%xmm1, 0(%[dst])\n" \
421  store " %%xmm2, 16(%[dst])\n" \
422  store " %%xmm3, 32(%[dst])\n" \
423  store " %%xmm4, 48(%[dst])\n" \
424  : : [dst]"r"(dstp), [src]"r"(srcp) : "memory", "xmm1", "xmm2", "xmm3", "xmm4")
425 
426 /*
427  * Optimized copy from "Uncacheable Speculative Write Combining" memory
428  * as used by some hardware accelerated decoder (VAAPI and DXVA2).
429  */
430 static void CopyFromUswc(uint8_t *dst, int dst_pitch,
431  const uint8_t *src, int src_pitch,
432  int width, int height)
433 {
434  const bool sse4 = sse4_check();
435 
436  asm volatile ("mfence");
437 
438  for (int y = 0; y < height; y++)
439  {
440  const int unaligned = (-(uintptr_t)src) & 0x0f;
441  int x = unaligned;
442 
443  if (sse4)
444  {
445  if (!unaligned)
446  {
447  for (; x+63 < width; x += 64)
448  {
449  COPY64(&dst[x], &src[x], "movntdqa", "movdqa");
450  }
451  }
452  else
453  {
454  COPY16(dst, src, "movdqu", "movdqa");
455  for (; x+63 < width; x += 64)
456  {
457  COPY64(&dst[x], &src[x], "movntdqa", "movdqu");
458  }
459  }
460  }
461  else
462  {
463  if (!unaligned)
464  {
465  for (; x+63 < width; x += 64)
466  {
467  COPY64(&dst[x], &src[x], "movdqa", "movdqa");
468  }
469  }
470  else
471  {
472  COPY16(dst, src, "movdqu", "movdqa");
473  for (; x+63 < width; x += 64)
474  {
475  COPY64(&dst[x], &src[x], "movdqa", "movdqu");
476  }
477  }
478  }
479 
480  for (; x < width; x++)
481  {
482  dst[x] = src[x];
483  }
484 
485  src += src_pitch;
486  dst += dst_pitch;
487  }
488  asm volatile ("mfence");
489 }
490 
491 static void Copy2d(uint8_t *dst, int dst_pitch,
492  const uint8_t *src, int src_pitch,
493  int width, int height)
494 {
495  for (int y = 0; y < height; y++)
496  {
497  int x = 0;
498 
499  bool unaligned = ((intptr_t)dst & 0x0f) != 0;
500  if (!unaligned)
501  {
502  for (; x+63 < width; x += 64)
503  {
504  COPY64(&dst[x], &src[x], "movdqa", "movntdq");
505  }
506  }
507  else
508  {
509  for (; x+63 < width; x += 64)
510  {
511  COPY64(&dst[x], &src[x], "movdqa", "movdqu");
512  }
513  }
514 
515  for (; x < width; x++)
516  {
517  dst[x] = src[x];
518  }
519 
520  src += src_pitch;
521  dst += dst_pitch;
522  }
523 }
524 
525 static void SSE_copyplane(uint8_t *dst, int dst_pitch,
526  const uint8_t *src, int src_pitch,
527  uint8_t *cache, int cache_size,
528  int width, int height)
529 {
530  const int w16 = (width+15) & ~15;
531  const int hstep = cache_size / w16;
532 
533  for (int y = 0; y < height; y += hstep)
534  {
535  const int hblock = __MIN(hstep, height - y);
536 
537  /* Copy a bunch of line into our cache */
538  CopyFromUswc(cache, w16,
539  src, src_pitch,
540  width, hblock);
541 
542  /* Copy from our cache to the destination */
543  Copy2d(dst, dst_pitch,
544  cache, w16,
545  width, hblock);
546 
547  /* */
548  src += src_pitch * hblock;
549  dst += dst_pitch * hblock;
550  }
551 }
552 
553 static void SSE_splitplanes(uint8_t *dstu, int dstu_pitch,
554  uint8_t *dstv, int dstv_pitch,
555  const uint8_t *src, int src_pitch,
556  uint8_t *cache, int cache_size,
557  int width, int height)
558 {
559  const int w16 = (2*width+15) & ~15;
560  const int hstep = cache_size / w16;
561 
562  for (int y = 0; y < height; y += hstep)
563  {
564  const int hblock = __MIN(hstep, height - y);
565 
566  /* Copy a bunch of line into our cache */
567  CopyFromUswc(cache, w16, src, src_pitch,
568  2*width, hblock);
569 
570  /* Copy from our cache to the destination */
571  SSE_splitplanes(dstu, dstu_pitch, dstv, dstv_pitch,
572  cache, w16, width, hblock);
573 
574  /* */
575  src += src_pitch * hblock;
576  dstu += dstu_pitch * hblock;
577  dstv += dstv_pitch * hblock;
578  }
579 }
580 #endif // ARCH_X86
581 
582 MythUSWCCopy::MythUSWCCopy(int width, bool nocache)
583  :m_cache(nullptr), m_size(0), m_uswc(-1)
584 {
585 #if ARCH_X86
586  if (!nocache)
587  {
588  allocateCache(width);
589  }
590 #else
591  Q_UNUSED(width);
592  Q_UNUSED(nocache);
593 #endif
594 }
595 
597 {
598  m_size = 0;
599 #if ARCH_X86
600  av_freep(&m_cache);
601 #endif
602 }
603 
615 {
617  dst->repeat_pict = src->repeat_pict;
618  dst->top_field_first = src->top_field_first;
619 
620  int width = src->width;
621  int height = src->height;
622 
623  if (src->codec == FMT_NV12)
624  {
625 #if ARCH_X86
626  if (sse2_check())
627  {
628  MythTimer *timer;
629 
630  if (m_uswc <= 0 && m_cache)
631  {
632  if (m_uswc < 0)
633  {
634  timer = new MythTimer(MythTimer::kStartRunning);
635  }
636  SSE_copyplane(dst->buf + dst->offsets[0], dst->pitches[0],
637  src->buf + src->offsets[0], src->pitches[0],
638  m_cache, m_size,
639  width, height);
640  SSE_splitplanes(dst->buf + dst->offsets[1], dst->pitches[1],
641  dst->buf + dst->offsets[2], dst->pitches[2],
642  src->buf + src->offsets[1], src->pitches[1],
643  m_cache, m_size,
644  (width+1) / 2, (height+1) / 2);
645  if (m_uswc < 0)
646  {
647  // Measure how long standard method takes
648  // if shorter, use it in the future
649  long duration = timer->nsecsElapsed();
650  timer->restart();
651  copyplane(dst->buf + dst->offsets[0], dst->pitches[0],
652  src->buf + src->offsets[0], src->pitches[0],
653  width, height);
654  SSE_splitplanes(dst->buf + dst->offsets[1], dst->pitches[1],
655  dst->buf + dst->offsets[2], dst->pitches[2],
656  src->buf + src->offsets[1], src->pitches[1],
657  (width+1) / 2, (height+1) / 2);
658  m_uswc = timer->nsecsElapsed() < duration;
659  if (m_uswc == 0)
660  {
661  LOG(VB_GENERAL, LOG_DEBUG, "Enabling USWC code acceleration");
662  }
663  delete timer;
664  }
665  }
666  else
667  {
668  copyplane(dst->buf + dst->offsets[0], dst->pitches[0],
669  src->buf + src->offsets[0], src->pitches[0],
670  width, height);
671  SSE_splitplanes(dst->buf + dst->offsets[1], dst->pitches[1],
672  dst->buf + dst->offsets[2], dst->pitches[2],
673  src->buf + src->offsets[1], src->pitches[1],
674  (width+1) / 2, (height+1) / 2);
675  }
676  asm volatile ("emms");
677  return;
678  }
679 #endif
680  copyplane(dst->buf + dst->offsets[0], dst->pitches[0],
681  src->buf + src->offsets[0], src->pitches[0],
682  width, height);
683  splitplanes(dst->buf + dst->offsets[1], dst->pitches[1],
684  dst->buf + dst->offsets[2], dst->pitches[2],
685  src->buf + src->offsets[1], src->pitches[1],
686  (width+1) / 2, (height+1) / 2);
687  return;
688  }
689 
690 #if ARCH_X86
691  if (sse2_check() && m_uswc <= 0 && m_cache)
692  {
693  MythTimer *timer;
694 
695  if (m_uswc < 0)
696  {
697  timer = new MythTimer(MythTimer::kStartRunning);
698  }
699  SSE_copyplane(dst->buf + dst->offsets[0], dst->pitches[0],
700  src->buf + src->offsets[0], src->pitches[0],
701  m_cache, m_size,
702  width, height);
703  SSE_copyplane(dst->buf + dst->offsets[1], dst->pitches[1],
704  src->buf + src->offsets[1], src->pitches[1],
705  m_cache, m_size,
706  (width+1) / 2, (height+1) / 2);
707  SSE_copyplane(dst->buf + dst->offsets[2], dst->pitches[2],
708  src->buf + src->offsets[2], src->pitches[2],
709  m_cache, m_size,
710  (width+1) / 2, (height+1) / 2);
711  if (m_uswc < 0)
712  {
713  // Measure how long standard method takes
714  // if shorter, use it in the future
715  long duration = timer->nsecsElapsed();
716  timer->restart();
717  copyplane(dst->buf + dst->offsets[0], dst->pitches[0],
718  src->buf + src->offsets[0], src->pitches[0],
719  width, height);
720  copyplane(dst->buf + dst->offsets[1], dst->pitches[1],
721  src->buf + src->offsets[1], src->pitches[1],
722  (width+1) / 2, (height+1) / 2);
723  copyplane(dst->buf + dst->offsets[2], dst->pitches[2],
724  src->buf + src->offsets[2], src->pitches[2],
725  (width+1) / 2, (height+1) / 2);
726  m_uswc = timer->nsecsElapsed() < duration;
727  if (m_uswc == 0)
728  {
729  LOG(VB_GENERAL, LOG_DEBUG, "Enabling USWC code acceleration");
730  }
731  delete timer;
732  }
733  asm volatile ("emms");
734  return;
735  }
736 #endif
737  copyplane(dst->buf + dst->offsets[0], dst->pitches[0],
738  src->buf + src->offsets[0], src->pitches[0],
739  width, height);
740  copyplane(dst->buf + dst->offsets[1], dst->pitches[1],
741  src->buf + src->offsets[1], src->pitches[1],
742  (width+1) / 2, (height+1) / 2);
743  copyplane(dst->buf + dst->offsets[2], dst->pitches[2],
744  src->buf + src->offsets[2], src->pitches[2],
745  (width+1) / 2, (height+1) / 2);
746 }
747 
752 {
753  m_uswc = -1;
754 }
755 
757 {
758  av_freep(&m_cache);
759  m_size = __MAX((width + 63) & ~63, 4096);
760  m_cache = (uint8_t*)av_malloc(m_size);
761 }
762 
767 void MythUSWCCopy::setUSWC(bool uswc)
768 {
769  m_uswc = !uswc;
770 }
771 
775 void MythUSWCCopy::reset(int width)
776 {
777 #if ARCH_X86
778  allocateCache(width);
779 #else
780  Q_UNUSED(width);
781 #endif
783 }
int pitches[3]
Y, U, & V pitches.
Definition: mythframe.h:63
int restart(void)
Returns milliseconds elapsed since last start() or restart() and resets the count.
Definition: mythtimer.cpp:62
A QElapsedTimer based timer to replace use of QTime as a timer.
Definition: mythtimer.h:13
static void splitplanes(uint8_t *dstu, int dstu_pitch, uint8_t *dstv, int dstv_pitch, const uint8_t *src, int src_pitch, int width, int height)
Definition: mythframe.cpp:301
void reset(int width)
Will reset the cache for a frame with "width" and reset USWC detection.
Definition: mythframe.cpp:775
int repeat_pict
Definition: mythframe.h:59
enum FrameType_ VideoFrameType
void setUSWC(bool uswc)
disable USWC detection.
Definition: mythframe.cpp:767
#define __MIN(a, b)
Definition: mythframe.cpp:41
int offsets[3]
Y, U, & V offsets.
Definition: mythframe.h:64
static void copyplane(uint8_t *dst, int dst_pitch, const uint8_t *src, int src_pitch, int width, int height)
Definition: mythframe.cpp:289
void copy(VideoFrame *dst, const VideoFrame *src)
Definition: mythframe.cpp:614
#define __MAX(a, b)
Definition: mythframe.cpp:38
int height
Definition: mythframe.h:42
void framecopy(VideoFrame *dst, const VideoFrame *src, bool useSSE)
Definition: mythframe.cpp:319
void resetUSWCDetection(void)
reset USWC detection.
Definition: mythframe.cpp:751
int top_field_first
1 if top field is first.
Definition: mythframe.h:58
void * av_malloc(unsigned int size)
uint8_t * m_cache
Definition: mythframe.h:90
#define LOG(_MASK_, _LEVEL_, _STRING_)
Definition: mythlogging.h:41
int64_t nsecsElapsed(void) const
Returns nanoseconds elapsed since last start() or restart()
Definition: mythtimer.cpp:118
virtual ~MythUSWCCopy()
Definition: mythframe.cpp:596
#define ARCH_X86_32
Definition: mythconfig.h:42
MythUSWCCopy(int width, bool nocache=false)
Definition: mythframe.cpp:582
int interlaced_frame
1 if interlaced.
Definition: mythframe.h:57
void allocateCache(int width)
Definition: mythframe.cpp:756
unsigned char * buf
Definition: mythframe.h:39
VideoFrameType codec
Definition: mythframe.h:38