A bundled STM32F10x Std Periph and CMSIS library
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 

287 lines
10 KiB

  1. /* ----------------------------------------------------------------------
  2. * Copyright (C) 2010-2014 ARM Limited. All rights reserved.
  3. *
  4. * $Date: 12. March 2014
  5. * $Revision: V1.4.4
  6. *
  7. * Project: CMSIS DSP Library
  8. * Title: arm_biquad_cascade_df1_fast_q15.c
  9. *
  10. * Description: Fast processing function for the
  11. * Q15 Biquad cascade filter.
  12. *
  13. * Target Processor: Cortex-M4/Cortex-M3
  14. *
  15. * Redistribution and use in source and binary forms, with or without
  16. * modification, are permitted provided that the following conditions
  17. * are met:
  18. * - Redistributions of source code must retain the above copyright
  19. * notice, this list of conditions and the following disclaimer.
  20. * - Redistributions in binary form must reproduce the above copyright
  21. * notice, this list of conditions and the following disclaimer in
  22. * the documentation and/or other materials provided with the
  23. * distribution.
  24. * - Neither the name of ARM LIMITED nor the names of its contributors
  25. * may be used to endorse or promote products derived from this
  26. * software without specific prior written permission.
  27. *
  28. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  29. * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  30. * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
  31. * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
  32. * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
  33. * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
  34. * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  35. * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  36. * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  37. * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
  38. * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  39. * POSSIBILITY OF SUCH DAMAGE.
  40. * -------------------------------------------------------------------- */
  41. #include "arm_math.h"
  42. /**
  43. * @ingroup groupFilters
  44. */
  45. /**
  46. * @addtogroup BiquadCascadeDF1
  47. * @{
  48. */
  49. /**
  50. * @details
  51. * @param[in] *S points to an instance of the Q15 Biquad cascade structure.
  52. * @param[in] *pSrc points to the block of input data.
  53. * @param[out] *pDst points to the block of output data.
  54. * @param[in] blockSize number of samples to process per call.
  55. * @return none.
  56. *
  57. * <b>Scaling and Overflow Behavior:</b>
  58. * \par
  59. * This fast version uses a 32-bit accumulator with 2.30 format.
  60. * The accumulator maintains full precision of the intermediate multiplication results but provides only a single guard bit.
  61. * Thus, if the accumulator result overflows it wraps around and distorts the result.
  62. * In order to avoid overflows completely the input signal must be scaled down by two bits and lie in the range [-0.25 +0.25).
  63. * The 2.30 accumulator is then shifted by <code>postShift</code> bits and the result truncated to 1.15 format by discarding the low 16 bits.
  64. *
  65. * \par
  66. * Refer to the function <code>arm_biquad_cascade_df1_q15()</code> for a slower implementation of this function which uses 64-bit accumulation to avoid wrap around distortion. Both the slow and the fast versions use the same instance structure.
  67. * Use the function <code>arm_biquad_cascade_df1_init_q15()</code> to initialize the filter structure.
  68. *
  69. */
  70. void arm_biquad_cascade_df1_fast_q15(
  71. const arm_biquad_casd_df1_inst_q15 * S,
  72. q15_t * pSrc,
  73. q15_t * pDst,
  74. uint32_t blockSize)
  75. {
  76. q15_t *pIn = pSrc; /* Source pointer */
  77. q15_t *pOut = pDst; /* Destination pointer */
  78. q31_t in; /* Temporary variable to hold input value */
  79. q31_t out; /* Temporary variable to hold output value */
  80. q31_t b0; /* Temporary variable to hold bo value */
  81. q31_t b1, a1; /* Filter coefficients */
  82. q31_t state_in, state_out; /* Filter state variables */
  83. q31_t acc; /* Accumulator */
  84. int32_t shift = (int32_t) (15 - S->postShift); /* Post shift */
  85. q15_t *pState = S->pState; /* State pointer */
  86. q15_t *pCoeffs = S->pCoeffs; /* Coefficient pointer */
  87. uint32_t sample, stage = S->numStages; /* Stage loop counter */
  88. do
  89. {
  90. /* Read the b0 and 0 coefficients using SIMD */
  91. b0 = *__SIMD32(pCoeffs)++;
  92. /* Read the b1 and b2 coefficients using SIMD */
  93. b1 = *__SIMD32(pCoeffs)++;
  94. /* Read the a1 and a2 coefficients using SIMD */
  95. a1 = *__SIMD32(pCoeffs)++;
  96. /* Read the input state values from the state buffer: x[n-1], x[n-2] */
  97. state_in = *__SIMD32(pState)++;
  98. /* Read the output state values from the state buffer: y[n-1], y[n-2] */
  99. state_out = *__SIMD32(pState)--;
  100. /* Apply loop unrolling and compute 2 output values simultaneously. */
  101. /* The variable acc hold output values that are being computed:
  102. *
  103. * acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2]
  104. * acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2]
  105. */
  106. sample = blockSize >> 1u;
  107. /* First part of the processing with loop unrolling. Compute 2 outputs at a time.
  108. ** a second loop below computes the remaining 1 sample. */
  109. while(sample > 0u)
  110. {
  111. /* Read the input */
  112. in = *__SIMD32(pIn)++;
  113. /* out = b0 * x[n] + 0 * 0 */
  114. out = __SMUAD(b0, in);
  115. /* acc = b1 * x[n-1] + acc += b2 * x[n-2] + out */
  116. acc = __SMLAD(b1, state_in, out);
  117. /* acc += a1 * y[n-1] + acc += a2 * y[n-2] */
  118. acc = __SMLAD(a1, state_out, acc);
  119. /* The result is converted from 3.29 to 1.31 and then saturation is applied */
  120. out = __SSAT((acc >> shift), 16);
  121. /* Every time after the output is computed state should be updated. */
  122. /* The states should be updated as: */
  123. /* Xn2 = Xn1 */
  124. /* Xn1 = Xn */
  125. /* Yn2 = Yn1 */
  126. /* Yn1 = acc */
  127. /* x[n-N], x[n-N-1] are packed together to make state_in of type q31 */
  128. /* y[n-N], y[n-N-1] are packed together to make state_out of type q31 */
  129. #ifndef ARM_MATH_BIG_ENDIAN
  130. state_in = __PKHBT(in, state_in, 16);
  131. state_out = __PKHBT(out, state_out, 16);
  132. #else
  133. state_in = __PKHBT(state_in >> 16, (in >> 16), 16);
  134. state_out = __PKHBT(state_out >> 16, (out), 16);
  135. #endif /* #ifndef ARM_MATH_BIG_ENDIAN */
  136. /* out = b0 * x[n] + 0 * 0 */
  137. out = __SMUADX(b0, in);
  138. /* acc0 = b1 * x[n-1] , acc0 += b2 * x[n-2] + out */
  139. acc = __SMLAD(b1, state_in, out);
  140. /* acc += a1 * y[n-1] + acc += a2 * y[n-2] */
  141. acc = __SMLAD(a1, state_out, acc);
  142. /* The result is converted from 3.29 to 1.31 and then saturation is applied */
  143. out = __SSAT((acc >> shift), 16);
  144. /* Store the output in the destination buffer. */
  145. #ifndef ARM_MATH_BIG_ENDIAN
  146. *__SIMD32(pOut)++ = __PKHBT(state_out, out, 16);
  147. #else
  148. *__SIMD32(pOut)++ = __PKHBT(out, state_out >> 16, 16);
  149. #endif /* #ifndef ARM_MATH_BIG_ENDIAN */
  150. /* Every time after the output is computed state should be updated. */
  151. /* The states should be updated as: */
  152. /* Xn2 = Xn1 */
  153. /* Xn1 = Xn */
  154. /* Yn2 = Yn1 */
  155. /* Yn1 = acc */
  156. /* x[n-N], x[n-N-1] are packed together to make state_in of type q31 */
  157. /* y[n-N], y[n-N-1] are packed together to make state_out of type q31 */
  158. #ifndef ARM_MATH_BIG_ENDIAN
  159. state_in = __PKHBT(in >> 16, state_in, 16);
  160. state_out = __PKHBT(out, state_out, 16);
  161. #else
  162. state_in = __PKHBT(state_in >> 16, in, 16);
  163. state_out = __PKHBT(state_out >> 16, out, 16);
  164. #endif /* #ifndef ARM_MATH_BIG_ENDIAN */
  165. /* Decrement the loop counter */
  166. sample--;
  167. }
  168. /* If the blockSize is not a multiple of 2, compute any remaining output samples here.
  169. ** No loop unrolling is used. */
  170. if((blockSize & 0x1u) != 0u)
  171. {
  172. /* Read the input */
  173. in = *pIn++;
  174. /* out = b0 * x[n] + 0 * 0 */
  175. #ifndef ARM_MATH_BIG_ENDIAN
  176. out = __SMUAD(b0, in);
  177. #else
  178. out = __SMUADX(b0, in);
  179. #endif /* #ifndef ARM_MATH_BIG_ENDIAN */
  180. /* acc = b1 * x[n-1], acc += b2 * x[n-2] + out */
  181. acc = __SMLAD(b1, state_in, out);
  182. /* acc += a1 * y[n-1] + acc += a2 * y[n-2] */
  183. acc = __SMLAD(a1, state_out, acc);
  184. /* The result is converted from 3.29 to 1.31 and then saturation is applied */
  185. out = __SSAT((acc >> shift), 16);
  186. /* Store the output in the destination buffer. */
  187. *pOut++ = (q15_t) out;
  188. /* Every time after the output is computed state should be updated. */
  189. /* The states should be updated as: */
  190. /* Xn2 = Xn1 */
  191. /* Xn1 = Xn */
  192. /* Yn2 = Yn1 */
  193. /* Yn1 = acc */
  194. /* x[n-N], x[n-N-1] are packed together to make state_in of type q31 */
  195. /* y[n-N], y[n-N-1] are packed together to make state_out of type q31 */
  196. #ifndef ARM_MATH_BIG_ENDIAN
  197. state_in = __PKHBT(in, state_in, 16);
  198. state_out = __PKHBT(out, state_out, 16);
  199. #else
  200. state_in = __PKHBT(state_in >> 16, in, 16);
  201. state_out = __PKHBT(state_out >> 16, out, 16);
  202. #endif /* #ifndef ARM_MATH_BIG_ENDIAN */
  203. }
  204. /* The first stage goes from the input buffer to the output buffer. */
  205. /* Subsequent (numStages - 1) occur in-place in the output buffer */
  206. pIn = pDst;
  207. /* Reset the output pointer */
  208. pOut = pDst;
  209. /* Store the updated state variables back into the state array */
  210. *__SIMD32(pState)++ = state_in;
  211. *__SIMD32(pState)++ = state_out;
  212. /* Decrement the loop counter */
  213. stage--;
  214. } while(stage > 0u);
  215. }
  216. /**
  217. * @} end of BiquadCascadeDF1 group
  218. */