00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037 #include "twod/hybrid.hpp"
00038
00039 template<typename T, typename Pr>
00040 void twod::Hybrid<T, Pr>::setLevel1(twod::PartBase<T,Pr> * l1)
00041 {
00042 algoLevel1 = std::auto_ptr<twod::PartBase<T,Pr> > (l1);
00043 }
00044
00045 template<typename T, typename Pr>
00046 void twod::Hybrid<T, Pr>::setProcLevel1(int s1)
00047 {
00048 this->s1=s1;
00049 }
00050
00051 template<typename T, typename Pr>
00052 void twod::Hybrid<T, Pr>::setLevel2(twod::PartBase<T, sPr > * l2)
00053 {
00054 algoLevel2 = std::auto_ptr<twod::PartBase<T, sPr > > (l2);
00055 }
00056
00057 template<typename T, typename Pr>
00058 void twod::Hybrid<T, Pr>::setLevel2Slow(twod::PartBase<T, sPr > * l2)
00059 {
00060 algoLevel2Slow = std::auto_ptr<twod::PartBase<T, sPr > > (l2);
00061 }
00062
00063 template<typename T, typename Pr>
00064 T twod::Hybrid<T, Pr>::part (int procCount, const Pr & prefixSumArray, util::RectList< T, Pr > & parts)
00065 {
00066 util::RectList<T, Pr > l1(prefixSumArray);
00067
00068 if (s1 >=0)
00069 phase1(procCount, prefixSumArray, l1);
00070 else
00071 probephase1(procCount, prefixSumArray, l1);
00072
00073 phase2(procCount, prefixSumArray, parts, l1);
00074
00075 return parts.get_maximum_load();
00076 }
00077
00078
00079 template<typename T, typename Pr>
00080 void twod::Hybrid<T, Pr>::probephase1(int procCount,const Pr & prefixSumArray, util::RectList<T, Pr > & l1)
00081 {
00082 int first_level = (int)(floor(sqrt(procCount)));
00083 first_level = better_guess(first_level, procCount);
00084
00085 double expectedload = ((double)prefixSumArray[prefixSumArray.prefixsizeX()-1][prefixSumArray.prefixsizeY()-1]+1);
00086
00087
00088 while (first_level <= procCount / 2)
00089 {
00090 assert (first_level>0);
00091 assert (first_level<procCount);
00092
00093 util::RectList<T, Pr > temp(prefixSumArray);
00094
00095
00096 algoLevel1->part (first_level, prefixSumArray, temp);
00097
00098 assert (temp.valid_part());
00099
00100
00101 std::vector<int> alloc;
00102 util::distribute_processors(prefixSumArray, temp, alloc, procCount);
00103
00104 double max_local_expected = 0.;
00105 int i=0;
00106 for(typename util::RectList<T,Pr>::container::const_iterator list_iter1 = temp.rectangles.begin();
00107 list_iter1 != temp.rectangles.end();
00108 list_iter1++, i++)
00109 {
00110 const util::rectangle &r = *list_iter1;
00111
00112 assert (alloc[i] > 0);
00113
00114 double local_expected = (double)r.get_load<T,Pr>(prefixSumArray)/(double)alloc[i];
00115
00116 assert (local_expected > 0);
00117
00118 if (local_expected > max_local_expected)
00119 max_local_expected = local_expected;
00120 }
00121
00122
00123
00124 if (expectedload > max_local_expected)
00125 {
00126 expectedload = max_local_expected;
00127 l1.clear();
00128 l1.copy_into(temp);
00129 }
00130
00131
00132 first_level = better_guess(first_level+1, procCount);
00133 }
00134 }
00135
00136 template<typename T, typename Pr>
00137 int twod::Hybrid<T, Pr>::better_guess(int p, int m)
00138 {
00139 int pprime = p;
00140 assert (p>=0);
00141 while ((int)ceil((double)(m-pprime)/(double)pprime) == (int)ceil((double)(m-p)/(double)p))
00142 pprime++;
00143
00144 assert (pprime>=p);
00145 return pprime;
00146 }
00147
00148 template<typename T, typename Pr>
00149 void twod::Hybrid<T, Pr>::phase1(int procCount,const Pr & prefixSumArray,
00150 util::RectList<T, Pr > & l1)
00151 {
00152 int first_level;
00153
00154
00155 {
00156 int log = (int)log2(procCount);
00157
00158 if (s1 == 0)
00159 {
00160
00161 int c = (int)ceil(sqrt(pow(2,log/2)));
00162 int f = (int)floor(sqrt(pow(2,log/2)));
00163
00164 c=c*c;
00165 f=f*f;
00166
00167 if (c >= procCount)
00168 first_level = f;
00169 else
00170 first_level = c;
00171
00172 first_level = (int)sqrt(procCount);
00173
00174
00175 }
00176 else
00177 first_level = s1;
00178
00179 }
00180
00181 assert (first_level>0);
00182 assert (first_level<procCount);
00183
00184
00185
00186 algoLevel1->part (first_level, prefixSumArray, l1);
00187
00188 assert (l1.valid_part());
00189
00190 }
00191
00192 template<typename T, typename Pr>
00193 void twod::Hybrid<T, Pr>::phase2(int procCount,const Pr & prefixSumArray,
00194 util::RectList< T, Pr > & parts, const util::RectList<T, Pr > & l1)
00195 {
00196 std::vector<int> alloc;
00197 util::distribute_processors(prefixSumArray, l1, alloc, procCount);
00198
00199 std::vector< util::RectList< T, Pr >* > temp;
00200
00201 std::vector< T> local_bottleneck;
00202 std::vector< bool > exec_fast;
00203
00204
00205
00206 unsigned int i=0;
00207 for(typename util::RectList<T,Pr>::container::const_iterator list_iter1 = l1.rectangles.begin();
00208 list_iter1 != l1.rectangles.end();
00209 list_iter1++)
00210 {
00211 const util::rectangle &r = *list_iter1;
00212 sPr spsa (prefixSumArray, r);
00213
00214 util::RectList<T, sPr > l2(spsa);
00215
00216 algoLevel2->part(alloc[i], spsa, l2);
00217
00218
00219 util::RectList<T, Pr>* l1l = new util::RectList<T, Pr>(prefixSumArray);
00220
00221 for(typename util::RectList<T,Pr>::container::iterator list_iter2 = l2.rectangles.begin();
00222 list_iter2 != l2.rectangles.end();
00223 list_iter2++)
00224 {
00225 const util::rectangle &r2 = *list_iter2;
00226 l1l->add_rect(spsa.inOriginalSpace(r2));
00227 }
00228
00229 temp.push_back(l1l);
00230 local_bottleneck.push_back(l2.get_maximum_load());
00231 exec_fast.push_back(false);
00232
00233 assert (l2.valid_part());
00234 i++;
00235 }
00236
00237
00238 if (algoLevel2Slow.get() != NULL)
00239 {
00240 bool cont = true;
00241 while (cont)
00242 {
00243 int max_index = 0;
00244 for (i=0; i<local_bottleneck.size(); i++)
00245 {
00246 if (local_bottleneck[i] > local_bottleneck[max_index])
00247 max_index = i;
00248 }
00249
00250 if (exec_fast[max_index])
00251 {
00252 cont = false;
00253 }
00254 else
00255 {
00256 exec_fast[max_index] = true;
00257
00258 const util::rectangle &r = l1.rectangles[max_index];
00259
00260 sPr spsa (prefixSumArray, r);
00261
00262 util::RectList<T, sPr > l2(spsa);
00263
00264 algoLevel2Slow->part(alloc[max_index], spsa, l2);
00265
00266
00267 if (l2.get_maximum_load() < local_bottleneck[max_index])
00268 {
00269
00270 local_bottleneck[max_index] = l2.get_maximum_load();
00271
00272 temp[max_index]->clear();
00273 for(typename util::RectList<T,Pr>::container::iterator list_iter2 = l2.rectangles.begin();
00274 list_iter2 != l2.rectangles.end();
00275 list_iter2++)
00276 {
00277 const util::rectangle &r2 = *list_iter2;
00278
00279 temp[max_index]->add_rect(spsa.inOriginalSpace(r2));
00280 }
00281 }
00282
00283 }
00284 }
00285 }
00286
00287
00288 {
00289 for (unsigned int j=0; j<temp.size(); j++)
00290 {
00291 parts.add_into(*temp[j]);
00292 delete temp[j];
00293 }
00294 }
00295 }
00296
00297 template<typename T, typename Pr>
00298 twod::Hybrid<T, Pr>::~Hybrid(){}