Difference between revisions of "GPU610/TeamLean"
(→Assignment 1) |
|||
Line 7: | Line 7: | ||
== Progress == | == Progress == | ||
=== Assignment 1 === | === Assignment 1 === | ||
+ | |||
+ | '''For the first assignment we each profiled open source libraries, Lame - a audio compression utility and Squish an image compression utility.''' | ||
+ | |||
+ | ''' | ||
+ | ''' | ||
+ | |||
+ | '''Alex - results for Lame''' | ||
+ | |||
+ | I have examined the LAME mp3 encoder to see if the process of encoding a wav file into an mp3 can be parallelized. | ||
+ | |||
+ | The below source, code from the psymodel.c file, could potentially be parallelized. | ||
+ | |||
+ | From the sample runs and the given results in the profile, as the size of the wave file gets larger, the percentage of the function that takes the most time actually goes down. This might mean that it is not worth parallelizing. If my group chooses this project, we will have to examine this carefully. | ||
+ | |||
+ | '''<u>SOURCE CODE</u>''' | ||
+ | int L3psycho_anal_vbr(lame_internal_flags * gfc, const sample_t * const buffer[2], int gr_out, | ||
+ | III_psy_ratio masking_ratio[2][2], | ||
+ | III_psy_ratio masking_MS_ratio[2][2], | ||
+ | FLOAT percep_entropy[2], FLOAT percep_MS_entropy[2], | ||
+ | FLOAT energy[4], int blocktype_d[2]){ | ||
+ | SessionConfig_t const *const cfg = &gfc->cfg; | ||
+ | PsyStateVar_t *const psv = &gfc->sv_psy; | ||
+ | PsyConst_CB2SB_t const *const gdl = &gfc->cd_psy->l; | ||
+ | PsyConst_CB2SB_t const *const gds = &gfc->cd_psy->s; | ||
+ | plotting_data *plt = cfg->analysis ? gfc->pinfo : 0; | ||
+ | III_psy_xmin last_thm[4]; | ||
+ | /* fft and energy calculation */ | ||
+ | FLOAT(*wsamp_l)[BLKSIZE]; | ||
+ | FLOAT(*wsamp_s)[3][BLKSIZE_s]; | ||
+ | FLOAT fftenergy[HBLKSIZE]; | ||
+ | FLOAT fftenergy_s[3][HBLKSIZE_s]; | ||
+ | FLOAT wsamp_L[2][BLKSIZE]; | ||
+ | FLOAT wsamp_S[2][3][BLKSIZE_s]; | ||
+ | FLOAT eb[4][CBANDS], thr[4] [CBANDS]; | ||
+ | FLOAT sub_short_factor[4][3]; | ||
+ | FLOAT thmm; | ||
+ | FLOAT const pcfact = 0.6f; | ||
+ | FLOAT const ath_factor = (cfg->msfix > 0.f) ? (cfg->ATH_offset_factor * gfc->ATH->adjust_factor) : 1.f; | ||
+ | const FLOAT(*const_eb)[CBANDS] = (const FLOAT(*)[CBANDS]) eb; | ||
+ | const FLOAT(*const_fftenergy_s) [HBLKSIZE_s] = (const FLOAT(*)[HBLKSIZE_s]) fftenergy_s; | ||
+ | /* block type */ | ||
+ | int ns_attacks[4] [4] = { {0, 0, 0, 0}, {0, 0, 0, 0}, {0, 0, 0, 0}, {0, 0, 0, 0} }; | ||
+ | int uselongblock[2]; | ||
+ | /* usual variables like loop indices, etc.. */ | ||
+ | int chn, sb, sblock; | ||
+ | /* chn=2 and 3 = Mid and Side channels */ | ||
+ | int const n_chn_psy = (cfg->mode == JOINT_STEREO) ? 4 : cfg->channels_out; | ||
+ | memcpy(&last_thm[0], &psv->thm[0], sizeof(last_thm)); | ||
+ | vbrpsy_attack_detection(gfc, buffer, gr_out, masking_ratio, masking_MS_ratio, energy, | ||
+ | sub_short_factor, ns_attacks, uselongblock); | ||
+ | vbrpsy_compute_block_type(cfg, uselongblock); | ||
+ | /* LONG BLOCK CASE */ | ||
+ | { | ||
+ | for (chn = 0; chn < n_chn_psy; chn++) { | ||
+ | int const ch01 = chn & 0x01; | ||
+ | wsamp_l = wsamp_L + ch01; | ||
+ | vbrpsy_compute_fft_l(gfc, buffer, chn, gr_out, fftenergy, wsamp_l); | ||
+ | vbrpsy_compute_loudness_approximation_l(gfc, gr_out, chn, fftenergy); | ||
+ | vbrpsy_compute_masking_l(gfc, fftenergy, eb[chn], thr[chn], chn); | ||
+ | } | ||
+ | if (cfg->mode == JOINT_STEREO) { | ||
+ | if ((uselongblock[0] + uselongblock[1]) == 2) { | ||
+ | vbrpsy_compute_MS_thresholds(const_eb, thr, gdl->mld_cb, gfc->ATH->cb_l, | ||
+ | ath_factor, cfg->msfix, gdl->npart); | ||
+ | } | ||
+ | } | ||
+ | /* TODO: apply adaptive ATH masking here ?? */ | ||
+ | for (chn = 0; chn < n_chn_psy; chn++) { | ||
+ | convert_partition2scalefac_l(gfc, eb[chn], thr[chn], chn); | ||
+ | convert_partition2scalefac_l_to_s (gfc, eb[chn], thr[chn], chn); | ||
+ | } | ||
+ | } | ||
+ | /* SHORT BLOCKS CASE */ | ||
+ | { | ||
+ | int const force_short_block_calc = gfc->cd_psy->force_short_block_calc; | ||
+ | for (sblock = 0; sblock < 3; sblock++) { | ||
+ | for (chn = 0; chn < n_chn_psy; ++chn) { | ||
+ | int const ch01 = chn & 0x01; | ||
+ | if (uselongblock[ch01] && !force_short_block_calc) { | ||
+ | vbrpsy_skip_masking_s(gfc, chn, sblock); | ||
+ | } | ||
+ | else { | ||
+ | /* compute masking thresholds for short blocks */ | ||
+ | wsamp_s = wsamp_S + ch01; | ||
+ | vbrpsy_compute_fft_s(gfc, buffer, chn, sblock, fftenergy_s, wsamp_s); | ||
+ | vbrpsy_compute_masking_s(gfc, const_fftenergy_s, eb[chn], thr[chn], chn, | ||
+ | sblock); | ||
+ | } | ||
+ | } | ||
+ | if (cfg->mode == JOINT_STEREO) { | ||
+ | if ((uselongblock[0] + uselongblock[1]) == 0) { | ||
+ | vbrpsy_compute_MS_thresholds (const_eb, thr, gds->mld_cb, gfc->ATH->cb_s, | ||
+ | ath_factor, cfg->msfix, gds->npart); | ||
+ | } | ||
+ | } | ||
+ | /* TODO: apply adaptive ATH masking here ?? */ | ||
+ | for (chn = 0; chn < n_chn_psy; ++chn) { | ||
+ | int const ch01 = chn & 0x01; | ||
+ | if (!uselongblock[ch01] || force_short_block_calc) { | ||
+ | convert_partition2scalefac_s(gfc, eb[chn], thr[chn], chn, sblock); | ||
+ | } | ||
+ | } | ||
+ | } | ||
+ | /**** short block pre-echo control ****/ | ||
+ | for (chn = 0; chn < n_chn_psy; chn++) { | ||
+ | for (sb = 0; sb < SBMAX_s; sb++) { | ||
+ | FLOAT new_thmm[3], prev_thm, t1, t2; | ||
+ | for (sblock = 0; sblock < 3; sblock++) { | ||
+ | thmm = psv->thm[chn].s[sb][sblock]; | ||
+ | thmm *= NS_PREECHO_ATT0; | ||
+ | t1 = t2 = thmm; | ||
+ | if (sblock > 0) { | ||
+ | prev_thm = new_thmm[sblock - 1]; | ||
+ | } | ||
+ | else { | ||
+ | prev_thm = last_thm[chn].s[sb][2]; | ||
+ | } | ||
+ | if (ns_attacks[chn][sblock] >= 2 || ns_attacks[chn][sblock + 1] == 1) { | ||
+ | t1 = NS_INTERP(prev_thm, thmm, NS_PREECHO_ATT1 * pcfact); | ||
+ | } | ||
+ | thmm = Min(t1, thmm); | ||
+ | if (ns_attacks[chn][sblock] == 1) { | ||
+ | t2 = NS_INTERP(prev_thm, thmm, NS_PREECHO_ATT2 * pcfact); | ||
+ | } | ||
+ | else if ((sblock == 0 && psv->last_attacks[chn] == 3) | ||
+ | || (sblock > 0 && ns_attacks [chn][sblock - 1] == 3)) { /* 2nd preceeding block */ | ||
+ | switch (sblock) { | ||
+ | case 0: | ||
+ | prev_thm = last_thm[chn].s[sb][1]; | ||
+ | break; | ||
+ | case 1: | ||
+ | prev_thm = last_thm[chn].s[sb][2]; | ||
+ | break; | ||
+ | case 2: | ||
+ | prev_thm = new_thmm[0]; | ||
+ | break; | ||
+ | } | ||
+ | t2 = NS_INTERP(prev_thm, thmm, NS_PREECHO_ATT2 * pcfact); | ||
+ | } | ||
+ | thmm = Min (t1, thmm); | ||
+ | thmm = Min(t2, thmm); | ||
+ | /* pulse like signal detection for fatboy.wav and so on */ | ||
+ | thmm *= sub_short_factor[chn][sblock]; | ||
+ | new_thmm[sblock] = thmm; | ||
+ | } | ||
+ | for (sblock = 0; sblock < 3; sblock++) { | ||
+ | psv->thm[chn].s[sb][sblock] = new_thmm[sblock]; | ||
+ | } | ||
+ | } | ||
+ | } | ||
+ | } | ||
+ | for (chn = 0; chn < n_chn_psy; chn++) { | ||
+ | psv->last_attacks[chn] = ns_attacks[chn][2]; | ||
+ | } | ||
+ | /*************************************************************** | ||
+ | * determine final block type | ||
+ | ***************************************************************/ | ||
+ | vbrpsy_apply_block_type(psv, cfg- >channels_out, uselongblock, blocktype_d); | ||
+ | /********************************************************************* | ||
+ | * compute the value of PE to return ... no delay and advance | ||
+ | *********************************************************************/ | ||
+ | for (chn = 0; chn < n_chn_psy; chn++) { | ||
+ | FLOAT *ppe; | ||
+ | int type; | ||
+ | III_psy_ratio const *mr; | ||
+ | if (chn > 1) { | ||
+ | ppe = percep_MS_entropy - 2; | ||
+ | type = NORM_TYPE; | ||
+ | if (blocktype_d[0] == SHORT_TYPE || blocktype_d[1] == SHORT_TYPE) | ||
+ | type = SHORT_TYPE; | ||
+ | mr = &masking_MS_ratio[gr_out][chn - 2]; | ||
+ | } | ||
+ | else { | ||
+ | ppe = percep_entropy; | ||
+ | type = blocktype_d[chn]; | ||
+ | mr = &masking_ratio[gr_out][chn]; | ||
+ | } | ||
+ | if (type == SHORT_TYPE) { | ||
+ | ppe[chn] = pecalc_s(mr, gfc->sv_qnt.masking_lower); | ||
+ | } | ||
+ | else { | ||
+ | ppe[chn] = pecalc_l(mr, gfc->sv_qnt.masking_lower); | ||
+ | } | ||
+ | if (plt) { | ||
+ | plt->pe [gr_out][chn] = ppe[chn]; | ||
+ | } | ||
+ | } | ||
+ | return 0; | ||
+ | } |
Revision as of 17:49, 12 April 2013
Team Members
Progress
Assignment 1
For the first assignment we each profiled open source libraries, Lame - a audio compression utility and Squish an image compression utility.
Alex - results for Lame
I have examined the LAME mp3 encoder to see if the process of encoding a wav file into an mp3 can be parallelized.
The below source, code from the psymodel.c file, could potentially be parallelized.
From the sample runs and the given results in the profile, as the size of the wave file gets larger, the percentage of the function that takes the most time actually goes down. This might mean that it is not worth parallelizing. If my group chooses this project, we will have to examine this carefully.
SOURCE CODE
int L3psycho_anal_vbr(lame_internal_flags * gfc, const sample_t * const buffer[2], int gr_out,
III_psy_ratio masking_ratio[2][2], III_psy_ratio masking_MS_ratio[2][2], FLOAT percep_entropy[2], FLOAT percep_MS_entropy[2], FLOAT energy[4], int blocktype_d[2]){ SessionConfig_t const *const cfg = &gfc->cfg; PsyStateVar_t *const psv = &gfc->sv_psy; PsyConst_CB2SB_t const *const gdl = &gfc->cd_psy->l; PsyConst_CB2SB_t const *const gds = &gfc->cd_psy->s; plotting_data *plt = cfg->analysis ? gfc->pinfo : 0; III_psy_xmin last_thm[4]; /* fft and energy calculation */ FLOAT(*wsamp_l)[BLKSIZE]; FLOAT(*wsamp_s)[3][BLKSIZE_s]; FLOAT fftenergy[HBLKSIZE]; FLOAT fftenergy_s[3][HBLKSIZE_s]; FLOAT wsamp_L[2][BLKSIZE]; FLOAT wsamp_S[2][3][BLKSIZE_s]; FLOAT eb[4][CBANDS], thr[4] [CBANDS]; FLOAT sub_short_factor[4][3]; FLOAT thmm; FLOAT const pcfact = 0.6f; FLOAT const ath_factor = (cfg->msfix > 0.f) ? (cfg->ATH_offset_factor * gfc->ATH->adjust_factor) : 1.f; const FLOAT(*const_eb)[CBANDS] = (const FLOAT(*)[CBANDS]) eb; const FLOAT(*const_fftenergy_s) [HBLKSIZE_s] = (const FLOAT(*)[HBLKSIZE_s]) fftenergy_s; /* block type */ int ns_attacks[4] [4] = { {0, 0, 0, 0}, {0, 0, 0, 0}, {0, 0, 0, 0}, {0, 0, 0, 0} }; int uselongblock[2]; /* usual variables like loop indices, etc.. */ int chn, sb, sblock; /* chn=2 and 3 = Mid and Side channels */ int const n_chn_psy = (cfg->mode == JOINT_STEREO) ? 4 : cfg->channels_out; memcpy(&last_thm[0], &psv->thm[0], sizeof(last_thm)); vbrpsy_attack_detection(gfc, buffer, gr_out, masking_ratio, masking_MS_ratio, energy, sub_short_factor, ns_attacks, uselongblock); vbrpsy_compute_block_type(cfg, uselongblock); /* LONG BLOCK CASE */ { for (chn = 0; chn < n_chn_psy; chn++) { int const ch01 = chn & 0x01; wsamp_l = wsamp_L + ch01; vbrpsy_compute_fft_l(gfc, buffer, chn, gr_out, fftenergy, wsamp_l); vbrpsy_compute_loudness_approximation_l(gfc, gr_out, chn, fftenergy); vbrpsy_compute_masking_l(gfc, fftenergy, eb[chn], thr[chn], chn); } if (cfg->mode == JOINT_STEREO) { if ((uselongblock[0] + uselongblock[1]) == 2) { vbrpsy_compute_MS_thresholds(const_eb, thr, gdl->mld_cb, gfc->ATH->cb_l, ath_factor, cfg->msfix, gdl->npart); } } /* TODO: apply adaptive ATH masking here ?? */ for (chn = 0; chn < n_chn_psy; chn++) { convert_partition2scalefac_l(gfc, eb[chn], thr[chn], chn); convert_partition2scalefac_l_to_s (gfc, eb[chn], thr[chn], chn); } } /* SHORT BLOCKS CASE */ { int const force_short_block_calc = gfc->cd_psy->force_short_block_calc; for (sblock = 0; sblock < 3; sblock++) { for (chn = 0; chn < n_chn_psy; ++chn) { int const ch01 = chn & 0x01; if (uselongblock[ch01] && !force_short_block_calc) { vbrpsy_skip_masking_s(gfc, chn, sblock); } else { /* compute masking thresholds for short blocks */ wsamp_s = wsamp_S + ch01; vbrpsy_compute_fft_s(gfc, buffer, chn, sblock, fftenergy_s, wsamp_s); vbrpsy_compute_masking_s(gfc, const_fftenergy_s, eb[chn], thr[chn], chn, sblock); } } if (cfg->mode == JOINT_STEREO) { if ((uselongblock[0] + uselongblock[1]) == 0) { vbrpsy_compute_MS_thresholds (const_eb, thr, gds->mld_cb, gfc->ATH->cb_s, ath_factor, cfg->msfix, gds->npart); } } /* TODO: apply adaptive ATH masking here ?? */ for (chn = 0; chn < n_chn_psy; ++chn) { int const ch01 = chn & 0x01; if (!uselongblock[ch01] || force_short_block_calc) { convert_partition2scalefac_s(gfc, eb[chn], thr[chn], chn, sblock); } } } /**** short block pre-echo control ****/ for (chn = 0; chn < n_chn_psy; chn++) { for (sb = 0; sb < SBMAX_s; sb++) { FLOAT new_thmm[3], prev_thm, t1, t2; for (sblock = 0; sblock < 3; sblock++) { thmm = psv->thm[chn].s[sb][sblock]; thmm *= NS_PREECHO_ATT0; t1 = t2 = thmm; if (sblock > 0) { prev_thm = new_thmm[sblock - 1]; } else { prev_thm = last_thm[chn].s[sb][2]; } if (ns_attacks[chn][sblock] >= 2 || ns_attacks[chn][sblock + 1] == 1) { t1 = NS_INTERP(prev_thm, thmm, NS_PREECHO_ATT1 * pcfact); } thmm = Min(t1, thmm); if (ns_attacks[chn][sblock] == 1) { t2 = NS_INTERP(prev_thm, thmm, NS_PREECHO_ATT2 * pcfact); } else if ((sblock == 0 && psv->last_attacks[chn] == 3) || (sblock > 0 && ns_attacks [chn][sblock - 1] == 3)) { /* 2nd preceeding block */ switch (sblock) { case 0: prev_thm = last_thm[chn].s[sb][1]; break; case 1: prev_thm = last_thm[chn].s[sb][2]; break; case 2: prev_thm = new_thmm[0]; break; } t2 = NS_INTERP(prev_thm, thmm, NS_PREECHO_ATT2 * pcfact); } thmm = Min (t1, thmm); thmm = Min(t2, thmm); /* pulse like signal detection for fatboy.wav and so on */ thmm *= sub_short_factor[chn][sblock]; new_thmm[sblock] = thmm; } for (sblock = 0; sblock < 3; sblock++) { psv->thm[chn].s[sb][sblock] = new_thmm[sblock]; } } } } for (chn = 0; chn < n_chn_psy; chn++) { psv->last_attacks[chn] = ns_attacks[chn][2]; } /*************************************************************** * determine final block type ***************************************************************/ vbrpsy_apply_block_type(psv, cfg- >channels_out, uselongblock, blocktype_d); /********************************************************************* * compute the value of PE to return ... no delay and advance *********************************************************************/ for (chn = 0; chn < n_chn_psy; chn++) { FLOAT *ppe; int type; III_psy_ratio const *mr; if (chn > 1) { ppe = percep_MS_entropy - 2; type = NORM_TYPE; if (blocktype_d[0] == SHORT_TYPE || blocktype_d[1] == SHORT_TYPE) type = SHORT_TYPE; mr = &masking_MS_ratio[gr_out][chn - 2]; } else { ppe = percep_entropy; type = blocktype_d[chn]; mr = &masking_ratio[gr_out][chn]; } if (type == SHORT_TYPE) { ppe[chn] = pecalc_s(mr, gfc->sv_qnt.masking_lower); } else { ppe[chn] = pecalc_l(mr, gfc->sv_qnt.masking_lower); } if (plt) { plt->pe [gr_out][chn] = ppe[chn]; } } return 0;
}