A bundled STM32F10x Std Periph and CMSIS library
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 

412 lines
14 KiB

  1. /* ----------------------------------------------------------------------
  2. * Copyright (C) 2010-2014 ARM Limited. All rights reserved.
  3. *
  4. * $Date: 12. March 2014
  5. * $Revision: V1.4.4
  6. *
  7. * Project: CMSIS DSP Library
  8. * Title: arm_biquad_cascade_df1_q15.c
  9. *
  10. * Description: Processing function for the
  11. * Q15 Biquad cascade DirectFormI(DF1) filter.
  12. *
  13. * Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
  14. *
  15. * Redistribution and use in source and binary forms, with or without
  16. * modification, are permitted provided that the following conditions
  17. * are met:
  18. * - Redistributions of source code must retain the above copyright
  19. * notice, this list of conditions and the following disclaimer.
  20. * - Redistributions in binary form must reproduce the above copyright
  21. * notice, this list of conditions and the following disclaimer in
  22. * the documentation and/or other materials provided with the
  23. * distribution.
  24. * - Neither the name of ARM LIMITED nor the names of its contributors
  25. * may be used to endorse or promote products derived from this
  26. * software without specific prior written permission.
  27. *
  28. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  29. * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  30. * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
  31. * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
  32. * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
  33. * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
  34. * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  35. * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  36. * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  37. * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
  38. * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  39. * POSSIBILITY OF SUCH DAMAGE.
  40. * -------------------------------------------------------------------- */
  41. #include "arm_math.h"
  42. /**
  43. * @ingroup groupFilters
  44. */
  45. /**
  46. * @addtogroup BiquadCascadeDF1
  47. * @{
  48. */
  49. /**
  50. * @brief Processing function for the Q15 Biquad cascade filter.
  51. * @param[in] *S points to an instance of the Q15 Biquad cascade structure.
  52. * @param[in] *pSrc points to the block of input data.
  53. * @param[out] *pDst points to the location where the output result is written.
  54. * @param[in] blockSize number of samples to process per call.
  55. * @return none.
  56. *
  57. *
  58. * <b>Scaling and Overflow Behavior:</b>
  59. * \par
  60. * The function is implemented using a 64-bit internal accumulator.
  61. * Both coefficients and state variables are represented in 1.15 format and multiplications yield a 2.30 result.
  62. * The 2.30 intermediate results are accumulated in a 64-bit accumulator in 34.30 format.
  63. * There is no risk of internal overflow with this approach and the full precision of intermediate multiplications is preserved.
  64. * The accumulator is then shifted by <code>postShift</code> bits to truncate the result to 1.15 format by discarding the low 16 bits.
  65. * Finally, the result is saturated to 1.15 format.
  66. *
  67. * \par
  68. * Refer to the function <code>arm_biquad_cascade_df1_fast_q15()</code> for a faster but less precise implementation of this filter for Cortex-M3 and Cortex-M4.
  69. */
  70. void arm_biquad_cascade_df1_q15(
  71. const arm_biquad_casd_df1_inst_q15 * S,
  72. q15_t * pSrc,
  73. q15_t * pDst,
  74. uint32_t blockSize)
  75. {
  76. #ifndef ARM_MATH_CM0_FAMILY
  77. /* Run the below code for Cortex-M4 and Cortex-M3 */
  78. q15_t *pIn = pSrc; /* Source pointer */
  79. q15_t *pOut = pDst; /* Destination pointer */
  80. q31_t in; /* Temporary variable to hold input value */
  81. q31_t out; /* Temporary variable to hold output value */
  82. q31_t b0; /* Temporary variable to hold bo value */
  83. q31_t b1, a1; /* Filter coefficients */
  84. q31_t state_in, state_out; /* Filter state variables */
  85. q31_t acc_l, acc_h;
  86. q63_t acc; /* Accumulator */
  87. int32_t lShift = (15 - (int32_t) S->postShift); /* Post shift */
  88. q15_t *pState = S->pState; /* State pointer */
  89. q15_t *pCoeffs = S->pCoeffs; /* Coefficient pointer */
  90. uint32_t sample, stage = (uint32_t) S->numStages; /* Stage loop counter */
  91. int32_t uShift = (32 - lShift);
  92. do
  93. {
  94. /* Read the b0 and 0 coefficients using SIMD */
  95. b0 = *__SIMD32(pCoeffs)++;
  96. /* Read the b1 and b2 coefficients using SIMD */
  97. b1 = *__SIMD32(pCoeffs)++;
  98. /* Read the a1 and a2 coefficients using SIMD */
  99. a1 = *__SIMD32(pCoeffs)++;
  100. /* Read the input state values from the state buffer: x[n-1], x[n-2] */
  101. state_in = *__SIMD32(pState)++;
  102. /* Read the output state values from the state buffer: y[n-1], y[n-2] */
  103. state_out = *__SIMD32(pState)--;
  104. /* Apply loop unrolling and compute 2 output values simultaneously. */
  105. /* The variable acc hold output values that are being computed:
  106. *
  107. * acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2]
  108. * acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2]
  109. */
  110. sample = blockSize >> 1u;
  111. /* First part of the processing with loop unrolling. Compute 2 outputs at a time.
  112. ** a second loop below computes the remaining 1 sample. */
  113. while(sample > 0u)
  114. {
  115. /* Read the input */
  116. in = *__SIMD32(pIn)++;
  117. /* out = b0 * x[n] + 0 * 0 */
  118. out = __SMUAD(b0, in);
  119. /* acc += b1 * x[n-1] + b2 * x[n-2] + out */
  120. acc = __SMLALD(b1, state_in, out);
  121. /* acc += a1 * y[n-1] + a2 * y[n-2] */
  122. acc = __SMLALD(a1, state_out, acc);
  123. /* The result is converted from 3.29 to 1.31 if postShift = 1, and then saturation is applied */
  124. /* Calc lower part of acc */
  125. acc_l = acc & 0xffffffff;
  126. /* Calc upper part of acc */
  127. acc_h = (acc >> 32) & 0xffffffff;
  128. /* Apply shift for lower part of acc and upper part of acc */
  129. out = (uint32_t) acc_l >> lShift | acc_h << uShift;
  130. out = __SSAT(out, 16);
  131. /* Every time after the output is computed state should be updated. */
  132. /* The states should be updated as: */
  133. /* Xn2 = Xn1 */
  134. /* Xn1 = Xn */
  135. /* Yn2 = Yn1 */
  136. /* Yn1 = acc */
  137. /* x[n-N], x[n-N-1] are packed together to make state_in of type q31 */
  138. /* y[n-N], y[n-N-1] are packed together to make state_out of type q31 */
  139. #ifndef ARM_MATH_BIG_ENDIAN
  140. state_in = __PKHBT(in, state_in, 16);
  141. state_out = __PKHBT(out, state_out, 16);
  142. #else
  143. state_in = __PKHBT(state_in >> 16, (in >> 16), 16);
  144. state_out = __PKHBT(state_out >> 16, (out), 16);
  145. #endif /* #ifndef ARM_MATH_BIG_ENDIAN */
  146. /* out = b0 * x[n] + 0 * 0 */
  147. out = __SMUADX(b0, in);
  148. /* acc += b1 * x[n-1] + b2 * x[n-2] + out */
  149. acc = __SMLALD(b1, state_in, out);
  150. /* acc += a1 * y[n-1] + a2 * y[n-2] */
  151. acc = __SMLALD(a1, state_out, acc);
  152. /* The result is converted from 3.29 to 1.31 if postShift = 1, and then saturation is applied */
  153. /* Calc lower part of acc */
  154. acc_l = acc & 0xffffffff;
  155. /* Calc upper part of acc */
  156. acc_h = (acc >> 32) & 0xffffffff;
  157. /* Apply shift for lower part of acc and upper part of acc */
  158. out = (uint32_t) acc_l >> lShift | acc_h << uShift;
  159. out = __SSAT(out, 16);
  160. /* Store the output in the destination buffer. */
  161. #ifndef ARM_MATH_BIG_ENDIAN
  162. *__SIMD32(pOut)++ = __PKHBT(state_out, out, 16);
  163. #else
  164. *__SIMD32(pOut)++ = __PKHBT(out, state_out >> 16, 16);
  165. #endif /* #ifndef ARM_MATH_BIG_ENDIAN */
  166. /* Every time after the output is computed state should be updated. */
  167. /* The states should be updated as: */
  168. /* Xn2 = Xn1 */
  169. /* Xn1 = Xn */
  170. /* Yn2 = Yn1 */
  171. /* Yn1 = acc */
  172. /* x[n-N], x[n-N-1] are packed together to make state_in of type q31 */
  173. /* y[n-N], y[n-N-1] are packed together to make state_out of type q31 */
  174. #ifndef ARM_MATH_BIG_ENDIAN
  175. state_in = __PKHBT(in >> 16, state_in, 16);
  176. state_out = __PKHBT(out, state_out, 16);
  177. #else
  178. state_in = __PKHBT(state_in >> 16, in, 16);
  179. state_out = __PKHBT(state_out >> 16, out, 16);
  180. #endif /* #ifndef ARM_MATH_BIG_ENDIAN */
  181. /* Decrement the loop counter */
  182. sample--;
  183. }
  184. /* If the blockSize is not a multiple of 2, compute any remaining output samples here.
  185. ** No loop unrolling is used. */
  186. if((blockSize & 0x1u) != 0u)
  187. {
  188. /* Read the input */
  189. in = *pIn++;
  190. /* out = b0 * x[n] + 0 * 0 */
  191. #ifndef ARM_MATH_BIG_ENDIAN
  192. out = __SMUAD(b0, in);
  193. #else
  194. out = __SMUADX(b0, in);
  195. #endif /* #ifndef ARM_MATH_BIG_ENDIAN */
  196. /* acc = b1 * x[n-1] + b2 * x[n-2] + out */
  197. acc = __SMLALD(b1, state_in, out);
  198. /* acc += a1 * y[n-1] + a2 * y[n-2] */
  199. acc = __SMLALD(a1, state_out, acc);
  200. /* The result is converted from 3.29 to 1.31 if postShift = 1, and then saturation is applied */
  201. /* Calc lower part of acc */
  202. acc_l = acc & 0xffffffff;
  203. /* Calc upper part of acc */
  204. acc_h = (acc >> 32) & 0xffffffff;
  205. /* Apply shift for lower part of acc and upper part of acc */
  206. out = (uint32_t) acc_l >> lShift | acc_h << uShift;
  207. out = __SSAT(out, 16);
  208. /* Store the output in the destination buffer. */
  209. *pOut++ = (q15_t) out;
  210. /* Every time after the output is computed state should be updated. */
  211. /* The states should be updated as: */
  212. /* Xn2 = Xn1 */
  213. /* Xn1 = Xn */
  214. /* Yn2 = Yn1 */
  215. /* Yn1 = acc */
  216. /* x[n-N], x[n-N-1] are packed together to make state_in of type q31 */
  217. /* y[n-N], y[n-N-1] are packed together to make state_out of type q31 */
  218. #ifndef ARM_MATH_BIG_ENDIAN
  219. state_in = __PKHBT(in, state_in, 16);
  220. state_out = __PKHBT(out, state_out, 16);
  221. #else
  222. state_in = __PKHBT(state_in >> 16, in, 16);
  223. state_out = __PKHBT(state_out >> 16, out, 16);
  224. #endif /* #ifndef ARM_MATH_BIG_ENDIAN */
  225. }
  226. /* The first stage goes from the input wire to the output wire. */
  227. /* Subsequent numStages occur in-place in the output wire */
  228. pIn = pDst;
  229. /* Reset the output pointer */
  230. pOut = pDst;
  231. /* Store the updated state variables back into the state array */
  232. *__SIMD32(pState)++ = state_in;
  233. *__SIMD32(pState)++ = state_out;
  234. /* Decrement the loop counter */
  235. stage--;
  236. } while(stage > 0u);
  237. #else
  238. /* Run the below code for Cortex-M0 */
  239. q15_t *pIn = pSrc; /* Source pointer */
  240. q15_t *pOut = pDst; /* Destination pointer */
  241. q15_t b0, b1, b2, a1, a2; /* Filter coefficients */
  242. q15_t Xn1, Xn2, Yn1, Yn2; /* Filter state variables */
  243. q15_t Xn; /* temporary input */
  244. q63_t acc; /* Accumulator */
  245. int32_t shift = (15 - (int32_t) S->postShift); /* Post shift */
  246. q15_t *pState = S->pState; /* State pointer */
  247. q15_t *pCoeffs = S->pCoeffs; /* Coefficient pointer */
  248. uint32_t sample, stage = (uint32_t) S->numStages; /* Stage loop counter */
  249. do
  250. {
  251. /* Reading the coefficients */
  252. b0 = *pCoeffs++;
  253. pCoeffs++; // skip the 0 coefficient
  254. b1 = *pCoeffs++;
  255. b2 = *pCoeffs++;
  256. a1 = *pCoeffs++;
  257. a2 = *pCoeffs++;
  258. /* Reading the state values */
  259. Xn1 = pState[0];
  260. Xn2 = pState[1];
  261. Yn1 = pState[2];
  262. Yn2 = pState[3];
  263. /* The variables acc holds the output value that is computed:
  264. * acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2]
  265. */
  266. sample = blockSize;
  267. while(sample > 0u)
  268. {
  269. /* Read the input */
  270. Xn = *pIn++;
  271. /* acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */
  272. /* acc = b0 * x[n] */
  273. acc = (q31_t) b0 *Xn;
  274. /* acc += b1 * x[n-1] */
  275. acc += (q31_t) b1 *Xn1;
  276. /* acc += b[2] * x[n-2] */
  277. acc += (q31_t) b2 *Xn2;
  278. /* acc += a1 * y[n-1] */
  279. acc += (q31_t) a1 *Yn1;
  280. /* acc += a2 * y[n-2] */
  281. acc += (q31_t) a2 *Yn2;
  282. /* The result is converted to 1.31 */
  283. acc = __SSAT((acc >> shift), 16);
  284. /* Every time after the output is computed state should be updated. */
  285. /* The states should be updated as: */
  286. /* Xn2 = Xn1 */
  287. /* Xn1 = Xn */
  288. /* Yn2 = Yn1 */
  289. /* Yn1 = acc */
  290. Xn2 = Xn1;
  291. Xn1 = Xn;
  292. Yn2 = Yn1;
  293. Yn1 = (q15_t) acc;
  294. /* Store the output in the destination buffer. */
  295. *pOut++ = (q15_t) acc;
  296. /* decrement the loop counter */
  297. sample--;
  298. }
  299. /* The first stage goes from the input buffer to the output buffer. */
  300. /* Subsequent stages occur in-place in the output buffer */
  301. pIn = pDst;
  302. /* Reset to destination pointer */
  303. pOut = pDst;
  304. /* Store the updated state variables back into the pState array */
  305. *pState++ = Xn1;
  306. *pState++ = Xn2;
  307. *pState++ = Yn1;
  308. *pState++ = Yn2;
  309. } while(--stage);
  310. #endif /* #ifndef ARM_MATH_CM0_FAMILY */
  311. }
  312. /**
  313. * @} end of BiquadCascadeDF1 group
  314. */