Previous 199869 Revisions Next

r31038 Thursday 19th June, 2014 at 11:44:56 UTC by Couriersud
Moved solver templates into separate header files.
[src/emu/netlist/analog]nld_bjt.h nld_ms_direct.h* nld_ms_direct1.h* nld_ms_direct2.h* nld_ms_gauss_seidel.h* nld_solver.c nld_solver.h nld_twoterm.h

trunk/src/emu/netlist/analog/nld_ms_direct.h
r0r31038
1/*
2 * nld_ms_direct.h
3 *
4 */
5
6#ifndef NLD_MS_DIRECT_H_
7#define NLD_MS_DIRECT_H_
8
9#include "nld_solver.h"
10
11template <int m_N, int _storage_N>
12class netlist_matrix_solver_direct_t: public netlist_matrix_solver_t
13{
14public:
15
16    netlist_matrix_solver_direct_t(const netlist_solver_parameters_t &params, int size);
17
18    virtual ~netlist_matrix_solver_direct_t();
19
20    ATTR_COLD virtual void vsetup(netlist_analog_net_t::list_t &nets);
21    ATTR_COLD virtual void reset() { netlist_matrix_solver_t::reset(); }
22
23    ATTR_HOT inline const int N() const { if (m_N == 0) return m_dim; else return m_N; }
24
25    ATTR_HOT inline int vsolve_non_dynamic();
26
27protected:
28    ATTR_COLD virtual void add_term(int net_idx, netlist_terminal_t *term);
29
30    ATTR_HOT virtual double vsolve();
31
32    ATTR_HOT int solve_non_dynamic();
33    ATTR_HOT void build_LE();
34    ATTR_HOT void gauss_LE(double (* RESTRICT x));
35    ATTR_HOT double delta(const double (* RESTRICT V));
36    ATTR_HOT void store(const double (* RESTRICT V), const bool store_RHS);
37
38    /* bring the whole system to the current time
39     * Don't schedule a new calculation time. The recalculation has to be
40     * triggered by the caller after the netlist element was changed.
41     */
42    ATTR_HOT double compute_next_timestep();
43
44    double m_A[_storage_N][((_storage_N + 7) / 8) * 8];
45    double m_RHS[_storage_N];
46    double m_last_RHS[_storage_N]; // right hand side - contains currents
47    double m_Vdelta[_storage_N];
48    double m_last_V[_storage_N];
49
50    terms_t **m_terms;
51    terms_t *m_rails_temp;
52
53private:
54    vector_ops_t *m_row_ops[_storage_N + 1];
55
56    int m_dim;
57    double m_lp_fact;
58};
59
60// ----------------------------------------------------------------------------------------
61// netlist_matrix_solver_direct
62// ----------------------------------------------------------------------------------------
63
64template <int m_N, int _storage_N>
65netlist_matrix_solver_direct_t<m_N, _storage_N>::~netlist_matrix_solver_direct_t()
66{
67    for (int k=0; k<_storage_N; k++)
68    {
69        //delete[] m_A[k];
70    }
71    //delete[] m_last_RHS;
72    //delete[] m_RHS;
73    delete[] m_terms;
74    delete[] m_rails_temp;
75    //delete[] m_row_ops;
76
77}
78
79template <int m_N, int _storage_N>
80ATTR_HOT double netlist_matrix_solver_direct_t<m_N, _storage_N>::compute_next_timestep()
81{
82    double new_solver_timestep = m_params.m_max_timestep;
83
84    if (m_params.m_dynamic)
85    {
86        /*
87         * FIXME: We should extend the logic to use either all nets or
88         *        only output nets.
89         */
90#if 0
91        for (netlist_analog_output_t * const *p = m_inps.first(); p != NULL; p = m_inps.next(p))
92        {
93            netlist_analog_net_t *n = (*p)->m_proxied_net;
94#else
95        for (int k = 0; k < N(); k++)
96        {
97            netlist_analog_net_t *n = m_nets[k];
98#endif
99            const double DD_n = (n->m_cur_Analog - m_last_V[k]);
100            const double hn = current_timestep();
101
102            double DD2 = (DD_n / hn - n->m_DD_n_m_1 / n->m_h_n_m_1) / (hn + n->m_h_n_m_1);
103            double new_net_timestep;
104
105            n->m_h_n_m_1 = hn;
106            n->m_DD_n_m_1 = DD_n;
107            if (fabs(DD2) > 1e-50) // avoid div-by-zero
108                new_net_timestep = sqrt(m_params.m_lte / fabs(0.5*DD2));
109            else
110                new_net_timestep = m_params.m_max_timestep;
111
112            if (new_net_timestep < new_solver_timestep)
113                new_solver_timestep = new_net_timestep;
114        }
115        if (new_solver_timestep < m_params.m_min_timestep)
116            new_solver_timestep = m_params.m_min_timestep;
117    }
118    //if (new_solver_timestep > 10.0 * hn)
119    //    new_solver_timestep = 10.0 * hn;
120    return new_solver_timestep;
121}
122
123template <int m_N, int _storage_N>
124ATTR_COLD void netlist_matrix_solver_direct_t<m_N, _storage_N>::add_term(int k, netlist_terminal_t *term)
125{
126    if (term->m_otherterm->net().isRailNet())
127    {
128        m_rails_temp[k].add(term, -1);
129    }
130    else
131    {
132        int ot = get_net_idx(&term->m_otherterm->net());
133        if (ot>=0)
134        {
135            m_terms[k]->add(term, ot);
136            SOLVER_VERBOSE_OUT(("Net %d Term %s %f %f\n", k, terms[i]->name().cstr(), terms[i]->m_gt, terms[i]->m_go));
137        }
138        /* Should this be allowed ? */
139        else // if (ot<0)
140        {
141           m_rails_temp[k].add(term, ot);
142           netlist().error("found term with missing othernet %s\n", term->name().cstr());
143        }
144    }
145}
146
147
148template <int m_N, int _storage_N>
149ATTR_COLD void netlist_matrix_solver_direct_t<m_N, _storage_N>::vsetup(netlist_analog_net_t::list_t &nets)
150{
151
152    if (m_dim < nets.count())
153        netlist().error("Dimension %d less than %d", m_dim, nets.count());
154
155    for (int k = 0; k < N(); k++)
156    {
157        m_terms[k]->clear();
158        m_rails_temp[k].clear();
159    }
160
161    netlist_matrix_solver_t::setup(nets);
162
163    for (int k = 0; k < N(); k++)
164    {
165        m_terms[k]->m_railstart = m_terms[k]->count();
166        for (int i = 0; i < m_rails_temp[k].count(); i++)
167            this->m_terms[k]->add(m_rails_temp[k].terms()[i], m_rails_temp[k].net_other()[i]);
168
169        m_rails_temp[k].clear(); // no longer needed
170        m_terms[k]->set_pointers();
171    }
172
173#if 1
174
175    /* Sort in descending order by number of connected matrix voltages.
176     * The idea is, that for Gauss-Seidel algo the first voltage computed
177     * depends on the greatest number of previous voltages thus taking into
178     * account the maximum amout of information.
179     *
180     * This actually improves performance on popeye slightly. Average
181     * GS computations reduce from 2.509 to 2.370
182     *
183     * Smallest to largest : 2.613
184     * Unsorted            : 2.509
185     * Largest to smallest : 2.370
186     *
187     * Sorting as a general matrix pre-conditioning is mentioned in
188     * literature but I have found no articles about Gauss Seidel.
189     *
190     */
191
192
193    for (int k = 0; k < N() / 2; k++)
194        for (int i = 0; i < N() - 1; i++)
195        {
196            if (m_terms[i]->m_railstart < m_terms[i+1]->m_railstart)
197            {
198                std::swap(m_terms[i],m_terms[i+1]);
199                m_nets.swap(i, i+1);
200            }
201        }
202
203    for (int k = 0; k < N(); k++)
204    {
205        int *other = m_terms[k]->net_other();
206        for (int i = 0; i < m_terms[k]->count(); i++)
207            if (other[i] != -1)
208                other[i] = get_net_idx(&m_terms[k]->terms()[i]->m_otherterm->net());
209    }
210
211#endif
212
213}
214
215template <int m_N, int _storage_N>
216ATTR_HOT void netlist_matrix_solver_direct_t<m_N, _storage_N>::build_LE()
217{
218#if 0
219    for (int k=0; k < N(); k++)
220        for (int i=0; i < N(); i++)
221            m_A[k][i] = 0.0;
222#endif
223
224    for (int k = 0; k < N(); k++)
225    {
226        for (int i=0; i < N(); i++)
227            m_A[k][i] = 0.0;
228
229        double rhsk = 0.0;
230        double akk  = 0.0;
231        {
232            const int terms_count = m_terms[k]->count();
233            const double * RESTRICT gt = m_terms[k]->gt();
234            const double * RESTRICT go = m_terms[k]->go();
235            const double * RESTRICT Idr = m_terms[k]->Idr();
236#if VECTALT
237
238            for (int i = 0; i < terms_count; i++)
239            {
240                rhsk = rhsk + Idr[i];
241                akk = akk + gt[i];
242            }
243#else
244            m_terms[k]->ops()->sum2(Idr, gt, rhsk, akk);
245#endif
246            double * const * RESTRICT other_cur_analog = m_terms[k]->other_curanalog();
247            for (int i = m_terms[k]->m_railstart; i < terms_count; i++)
248            {
249                //rhsk = rhsk + go[i] * terms[i]->m_otherterm->net().as_analog().Q_Analog();
250                rhsk = rhsk + go[i] * *other_cur_analog[i];
251            }
252        }
253#if 0
254        /*
255         * Matrix preconditioning with 1.0 / Akk
256         *
257         * will save a number of calculations during elimination
258         *
259         */
260        akk = 1.0 / akk;
261        m_RHS[k] = rhsk * akk;
262        m_A[k][k] += 1.0;
263        {
264            const int *net_other = m_terms[k]->net_other();
265            const double *go = m_terms[k]->go();
266            const int railstart =  m_terms[k]->m_railstart;
267
268            for (int i = 0; i < railstart; i++)
269            {
270                m_A[k][net_other[i]] += -go[i] * akk;
271            }
272        }
273#else
274        m_RHS[k] = rhsk;
275        m_A[k][k] += akk;
276        {
277            const int * RESTRICT net_other = m_terms[k]->net_other();
278            const double * RESTRICT go = m_terms[k]->go();
279            const int railstart =  m_terms[k]->m_railstart;
280
281            for (int i = 0; i < railstart; i++)
282            {
283                m_A[k][net_other[i]] += -go[i];
284            }
285        }
286#endif
287    }
288}
289
290template <int m_N, int _storage_N>
291ATTR_HOT void netlist_matrix_solver_direct_t<m_N, _storage_N>::gauss_LE(
292        double (* RESTRICT x))
293{
294#if 0
295    for (int i = 0; i < N(); i++)
296    {
297        for (int k = 0; k < N(); k++)
298            printf("%f ", m_A[i][k]);
299        printf("| %f = %f \n", x[i], m_RHS[i]);
300    }
301    printf("\n");
302#endif
303
304    const int kN = N();
305
306    for (int i = 0; i < kN; i++) {
307        // FIXME: use a parameter to enable pivoting?
308        if (USE_PIVOT_SEARCH)
309        {
310            /* Find the row with the largest first value */
311            int maxrow = i;
312            for (int j = i + 1; j < kN; j++)
313            {
314                if (fabs(m_A[j][i]) > fabs(m_A[maxrow][i]))
315                    maxrow = j;
316            }
317
318            if (maxrow != i)
319            {
320                /* Swap the maxrow and ith row */
321                for (int k = i; k < kN; k++) {
322                    std::swap(m_A[i][k], m_A[maxrow][k]);
323                }
324                std::swap(m_RHS[i], m_RHS[maxrow]);
325            }
326        }
327
328        /* FIXME: Singular matrix? */
329        const double f = 1.0 / m_A[i][i];
330
331        /* Eliminate column i from row j */
332
333        for (int j = i + 1; j < kN; j++)
334        {
335            const double f1 = - m_A[j][i] * f;
336            if (f1 != 0.0)
337            {
338#if 0 && VECTALT
339                for (int k = i + 1; k < kN; k++)
340                    m_A[j][k] += m_A[i][k] * f1;
341#else
342                // addmult gives some performance increase here...
343                m_row_ops[kN - (i + 1)]->addmult(&m_A[j][i+1], &m_A[i][i+1], f1) ;
344#endif
345                m_RHS[j] += m_RHS[i] * f1;
346            }
347        }
348    }
349    /* back substitution */
350    for (int j = kN - 1; j >= 0; j--)
351    {
352        double tmp = 0;
353
354        for (int k = j + 1; k < kN; k++)
355            tmp += m_A[j][k] * x[k];
356
357        x[j] = (m_RHS[j] - tmp) / m_A[j][j];
358    }
359#if 0
360    printf("Solution:\n");
361    for (int i = 0; i < N(); i++)
362    {
363        for (int k = 0; k < N(); k++)
364            printf("%f ", m_A[i][k]);
365        printf("| %f = %f \n", x[i], m_RHS[i]);
366    }
367    printf("\n");
368#endif
369
370}
371
372template <int m_N, int _storage_N>
373ATTR_HOT double netlist_matrix_solver_direct_t<m_N, _storage_N>::delta(
374        const double (* RESTRICT V))
375{
376    double cerr = 0;
377    double cerr2 = 0;
378    for (int i = 0; i < this->N(); i++)
379    {
380        const double e = (V[i] - this->m_nets[i]->m_cur_Analog);
381        const double e2 = (m_RHS[i] - this->m_last_RHS[i]);
382        cerr = (fabs(e) > cerr ? fabs(e) : cerr);
383        cerr2 = (fabs(e2) > cerr2 ? fabs(e2) : cerr2);
384    }
385    // FIXME: Review
386    return cerr + cerr2*100000.0;
387}
388
389template <int m_N, int _storage_N>
390ATTR_HOT void netlist_matrix_solver_direct_t<m_N, _storage_N>::store(
391        const double (* RESTRICT V), const bool store_RHS)
392{
393    for (int i = 0; i < this->N(); i++)
394    {
395        this->m_nets[i]->m_cur_Analog = V[i];
396    }
397    if (store_RHS)
398    {
399        for (int i = 0; i < this->N(); i++)
400        {
401            this->m_last_RHS[i] = m_RHS[i];
402        }
403    }
404}
405
406template <int m_N, int _storage_N>
407ATTR_HOT double netlist_matrix_solver_direct_t<m_N, _storage_N>::vsolve()
408{
409    solve_base<netlist_matrix_solver_direct_t>(this);
410    return this->compute_next_timestep();
411}
412
413
414template <int m_N, int _storage_N>
415ATTR_HOT int netlist_matrix_solver_direct_t<m_N, _storage_N>::solve_non_dynamic()
416{
417    double new_v[_storage_N] = { 0.0 };
418
419    this->gauss_LE(new_v);
420
421    if (this->is_dynamic())
422    {
423        double err = delta(new_v);
424
425        store(new_v, true);
426
427        if (err > this->m_params.m_accuracy)
428        {
429            return 2;
430        }
431        return 1;
432    }
433    store(new_v, false);  // ==> No need to store RHS
434    return 1;
435}
436
437template <int m_N, int _storage_N>
438ATTR_HOT inline int netlist_matrix_solver_direct_t<m_N, _storage_N>::vsolve_non_dynamic()
439{
440    this->build_LE();
441
442    return this->solve_non_dynamic();
443}
444
445template <int m_N, int _storage_N>
446netlist_matrix_solver_direct_t<m_N, _storage_N>::netlist_matrix_solver_direct_t(const netlist_solver_parameters_t &params, int size)
447: netlist_matrix_solver_t(params)
448, m_dim(size)
449, m_lp_fact(0)
450{
451    m_terms = new terms_t *[N()];
452    m_rails_temp = new terms_t[N()];
453
454    for (int k = 0; k < N(); k++)
455    {
456        m_terms[k] = new terms_t;
457        m_row_ops[k] = vector_ops_t::create_ops(k);
458    }
459    m_row_ops[N()] = vector_ops_t::create_ops(N());
460}
461
462
463
464#endif /* NLD_MS_DIRECT_H_ */
Property changes on: trunk/src/emu/netlist/analog/nld_ms_direct.h
Added: svn:mime-type
   + text/plain
Added: svn:eol-style
   + native
trunk/src/emu/netlist/analog/nld_ms_direct1.h
r0r31038
1/*
2 * nld_ms_direct1.h
3 *
4 */
5
6#ifndef NLD_MS_DIRECT1_H_
7#define NLD_MS_DIRECT1_H_
8
9#include "nld_solver.h"
10#include "nld_ms_direct.h"
11
12class ATTR_ALIGNED(64) netlist_matrix_solver_direct1_t: public netlist_matrix_solver_direct_t<1,1>
13{
14public:
15
16    netlist_matrix_solver_direct1_t(const netlist_solver_parameters_t &params)
17      : netlist_matrix_solver_direct_t<1, 1>(params, 1)
18      {}
19    ATTR_HOT inline int vsolve_non_dynamic();
20protected:
21    ATTR_HOT virtual double vsolve();
22private:
23};
24
25// ----------------------------------------------------------------------------------------
26// netlist_matrix_solver - Direct1
27// ----------------------------------------------------------------------------------------
28
29ATTR_HOT double netlist_matrix_solver_direct1_t::vsolve()
30{
31    solve_base<netlist_matrix_solver_direct1_t>(this);
32    return this->compute_next_timestep();
33}
34
35ATTR_HOT inline int netlist_matrix_solver_direct1_t::vsolve_non_dynamic()
36{
37
38    netlist_analog_net_t *net = m_nets[0];
39    this->build_LE();
40    //NL_VERBOSE_OUT(("%f %f\n", new_val, m_RHS[0] / m_A[0][0]);
41
42    double new_val =  m_RHS[0] / m_A[0][0];
43
44    double e = (new_val - net->m_cur_Analog);
45    double cerr = fabs(e);
46
47    net->m_cur_Analog = new_val;
48
49    if (is_dynamic() && (cerr  > m_params.m_accuracy))
50    {
51        return 2;
52    }
53    else
54        return 1;
55
56}
57
58
59
60#endif /* NLD_MS_DIRECT1_H_ */
Property changes on: trunk/src/emu/netlist/analog/nld_ms_direct1.h
Added: svn:mime-type
   + text/plain
Added: svn:eol-style
   + native
trunk/src/emu/netlist/analog/nld_solver.h
r31037r31038
1111
1212//#define ATTR_ALIGNED(N) __attribute__((aligned(N)))
1313#define ATTR_ALIGNED(N) ATTR_ALIGN
14//#undef RESTRICT
15//#define RESTRICT
1614
15#define USE_PIVOT_SEARCH (0)
16#define VECTALT 1
17#define USE_GABS 0
18#define USE_MATRIX_GS 0
19// savings are eaten up by effort
20#define USE_LINEAR_PREDICTION (0)
1721
1822// ----------------------------------------------------------------------------------------
1923// Macros
r31037r31038
3741   double m_lte;
3842   double m_min_timestep;
3943   double m_max_timestep;
44   double m_sor;
4045   bool m_dynamic;
4146   int m_gs_loops;
4247   int m_nr_loops;
r31037r31038
5459
5560    virtual ~vector_ops_t() {}
5661
57    ATTR_ALIGNED(64) double * RESTRICT  m_V;
58
5962    virtual const double sum(const double * v) = 0;
6063    virtual void sum2(const double * RESTRICT v1, const double * RESTRICT v2, double & RESTRICT  s1, double & RESTRICT s2) = 0;
6164    virtual void addmult(double * RESTRICT v1, const double * RESTRICT v2, const double &mult) = 0;
r31037r31038
6366
6467    virtual const double sumabs(const double * v) = 0;
6568
69    static vector_ops_t *create_ops(const int size);
70
6671protected:
6772    int m_dim;
6873
r31037r31038
150155    NETLIST_PREVENT_COPYING(terms_t)
151156
152157    public:
153    ATTR_COLD terms_t() {}
158    ATTR_COLD terms_t() : m_railstart(0), m_ops(NULL)
159    {}
154160
155161    ATTR_COLD void clear()
156162    {
r31037r31038
178184private:
179185    plinearlist_t<netlist_terminal_t *> m_term;
180186    plinearlist_t<int> m_net_other;
181    plinearlist_t<double> m_gt;
182187    plinearlist_t<double> m_go;
188    plinearlist_t<double> m_gt;
183189    plinearlist_t<double> m_Idr;
184190    plinearlist_t<double *> m_other_curanalog;
185191    vector_ops_t * m_ops;
r31037r31038
191197   typedef plinearlist_t<netlist_matrix_solver_t *> list_t;
192198   typedef netlist_core_device_t::list_t dev_list_t;
193199
194   ATTR_COLD netlist_matrix_solver_t();
200   ATTR_COLD netlist_matrix_solver_t(const netlist_solver_parameters_t &params);
195201   ATTR_COLD virtual ~netlist_matrix_solver_t();
196202
197203    ATTR_COLD virtual void vsetup(netlist_analog_net_t::list_t &nets) = 0;
r31037r31038
215221    ATTR_COLD virtual void start();
216222    ATTR_COLD virtual void reset();
217223
218   netlist_solver_parameters_t m_params;
219
220224    ATTR_COLD int get_net_idx(netlist_net_t *net);
221225   ATTR_COLD virtual void log_stats() {};
222226
r31037r31038
234238    plinearlist_t<netlist_analog_output_t *> m_inps;
235239
236240    int m_calculations;
241    const netlist_solver_parameters_t &m_params;
237242
238243    ATTR_HOT inline const double current_timestep() { return m_cur_ts; }
239244private:
r31037r31038
252257
253258};
254259
255template <int m_N, int _storage_N>
256class netlist_matrix_solver_direct_t: public netlist_matrix_solver_t
257{
258public:
259260
260   netlist_matrix_solver_direct_t(int size);
261261
262   virtual ~netlist_matrix_solver_direct_t();
263
264   ATTR_COLD virtual void vsetup(netlist_analog_net_t::list_t &nets);
265   ATTR_COLD virtual void reset() { netlist_matrix_solver_t::reset(); }
266
267   ATTR_HOT inline const int N() const { if (m_N == 0) return m_dim; else return m_N; }
268
269    ATTR_HOT inline int vsolve_non_dynamic();
270
271protected:
272    ATTR_COLD virtual void add_term(int net_idx, netlist_terminal_t *term);
273
274    ATTR_HOT virtual double vsolve();
275
276    ATTR_HOT int solve_non_dynamic();
277   ATTR_HOT void build_LE();
278   ATTR_HOT void gauss_LE(double (* RESTRICT x));
279   ATTR_HOT double delta(const double (* RESTRICT V));
280   ATTR_HOT void store(const double (* RESTRICT V), const bool store_RHS);
281
282    /* bring the whole system to the current time
283     * Don't schedule a new calculation time. The recalculation has to be
284     * triggered by the caller after the netlist element was changed.
285     */
286    ATTR_HOT double compute_next_timestep();
287
288    double m_A[_storage_N][((_storage_N + 7) / 8) * 8];
289    double m_RHS[_storage_N];
290    double m_last_RHS[_storage_N]; // right hand side - contains currents
291    double m_Vdelta[_storage_N];
292    double m_last_V[_storage_N];
293
294    terms_t **m_terms;
295
296    terms_t *m_rails_temp;
297
298private:
299    vector_ops_t *m_row_ops[_storage_N + 1];
300
301   int m_dim;
302   double m_lp_fact;
303};
304
305template <int m_N, int _storage_N>
306class ATTR_ALIGNED(64) netlist_matrix_solver_gauss_seidel_t: public netlist_matrix_solver_direct_t<m_N, _storage_N>
307{
308public:
309
310   netlist_matrix_solver_gauss_seidel_t(int size)
311      : netlist_matrix_solver_direct_t<m_N, _storage_N>(size)
312      , m_lp_fact(0)
313      , m_gs_fail(0)
314      , m_gs_total(0)
315      {}
316
317   virtual ~netlist_matrix_solver_gauss_seidel_t() {}
318
319    ATTR_COLD virtual void log_stats();
320
321    ATTR_HOT inline int vsolve_non_dynamic();
322protected:
323    ATTR_HOT virtual double vsolve();
324
325private:
326    double m_lp_fact;
327    int m_gs_fail;
328    int m_gs_total;
329
330};
331
332class ATTR_ALIGNED(64) netlist_matrix_solver_direct1_t: public netlist_matrix_solver_direct_t<1,1>
333{
334public:
335
336    netlist_matrix_solver_direct1_t()
337      : netlist_matrix_solver_direct_t<1, 1>(1)
338      {}
339    ATTR_HOT inline int vsolve_non_dynamic();
340protected:
341    ATTR_HOT virtual double vsolve();
342private:
343};
344
345class ATTR_ALIGNED(64) netlist_matrix_solver_direct2_t: public netlist_matrix_solver_direct_t<2,2>
346{
347public:
348
349    netlist_matrix_solver_direct2_t()
350      : netlist_matrix_solver_direct_t<2, 2>(2)
351      {}
352    ATTR_HOT inline int vsolve_non_dynamic();
353protected:
354    ATTR_HOT virtual double vsolve();
355private:
356};
357
358262class ATTR_ALIGNED(64) NETLIB_NAME(solver) : public netlist_device_t
359263{
360264public:
r31037r31038
381285    netlist_param_double_t m_accuracy;
382286    netlist_param_double_t m_gmin;
383287    netlist_param_double_t m_lte;
288    netlist_param_double_t m_sor;
384289    netlist_param_logic_t  m_dynamic;
385290    netlist_param_double_t m_min_timestep;
386291
trunk/src/emu/netlist/analog/nld_ms_direct2.h
r0r31038
1/*
2 * nld_ms_direct1.h
3 *
4 */
5
6#ifndef NLD_MS_DIRECT2_H_
7#define NLD_MS_DIRECT2_H_
8
9#include "nld_solver.h"
10#include "nld_ms_direct.h"
11
12
13
14class ATTR_ALIGNED(64) netlist_matrix_solver_direct2_t: public netlist_matrix_solver_direct_t<2,2>
15{
16public:
17
18    netlist_matrix_solver_direct2_t(const netlist_solver_parameters_t &params)
19      : netlist_matrix_solver_direct_t<2, 2>(params, 2)
20      {}
21    ATTR_HOT inline int vsolve_non_dynamic();
22protected:
23    ATTR_HOT virtual double vsolve();
24private:
25};
26
27// ----------------------------------------------------------------------------------------
28// netlist_matrix_solver - Direct2
29// ----------------------------------------------------------------------------------------
30
31ATTR_HOT double netlist_matrix_solver_direct2_t::vsolve()
32{
33    solve_base<netlist_matrix_solver_direct2_t>(this);
34    return this->compute_next_timestep();
35}
36
37ATTR_HOT inline int netlist_matrix_solver_direct2_t::vsolve_non_dynamic()
38{
39
40    build_LE();
41
42    const double a = m_A[0][0];
43    const double b = m_A[0][1];
44    const double c = m_A[1][0];
45    const double d = m_A[1][1];
46
47    double new_val[2];
48    new_val[1] = (a * m_RHS[1] - c * m_RHS[0]) / (a * d - b * c);
49    new_val[0] = (m_RHS[0] - b * new_val[1]) / a;
50
51    if (is_dynamic())
52    {
53        double err = this->delta(new_val);
54        store(new_val, true);
55        if (err > m_params.m_accuracy )
56            return 2;
57        else
58            return 1;
59    }
60    store(new_val, false);
61    return 1;
62}
63
64
65
66#endif /* NLD_MS_DIRECT2_H_ */
Property changes on: trunk/src/emu/netlist/analog/nld_ms_direct2.h
Added: svn:mime-type
   + text/plain
Added: svn:eol-style
   + native
trunk/src/emu/netlist/analog/nld_bjt.h
r31037r31038
9898
9999   NETLIB_UPDATE_TERMINALS()
100100   {
101      double m = (is_qtype( BJT_NPN) ? 1 : -1);
101       const double m = (is_qtype( BJT_NPN) ? 1 : -1);
102102
103      int new_state = (m_RB.deltaV() * m > m_V ) ? 1 : 0;
103      const int new_state = (m_RB.deltaV() * m > m_V ) ? 1 : 0;
104104      if (m_state_on ^ new_state)
105105      {
106#if 0
106107         double gb = m_gB;
107108         double gc = m_gC;
108109         double v  = m_V * m;
r31037r31038
113114            v = 0;
114115            gc = netlist().gmin();
115116         }
117#else
118         const double gb = new_state ? m_gB : netlist().gmin();
119         const double gc = new_state ? m_gC : netlist().gmin();
120         const double v  = new_state ? m_V * m : 0;
121#endif
116122         m_RB.set(gb, v,   0.0);
117123         m_RC.set(gc, 0.0, 0.0);
118124         //m_RB.update_dev();
r31037r31038
163169
164170   NETLIB_UPDATE_TERMINALS()
165171   {
166      double polarity = (qtype() == BJT_NPN ? 1.0 : -1.0);
172      const double polarity = (qtype() == BJT_NPN ? 1.0 : -1.0);
167173
168174      m_gD_BE.update_diode(-m_D_EB.deltaV() * polarity);
169175      m_gD_BC.update_diode(-m_D_CB.deltaV() * polarity);
170176
171      double gee = m_gD_BE.G();
172      double gcc = m_gD_BC.G();
173      double gec =  m_alpha_r * gcc;
174      double gce =  m_alpha_f * gee;
175      double sIe = -m_gD_BE.I() + m_alpha_r * m_gD_BC.I();
176      double sIc = m_alpha_f * m_gD_BE.I() - m_gD_BC.I();
177      double Ie = (sIe + gee * m_gD_BE.Vd() - gec * m_gD_BC.Vd()) * polarity;
178      double Ic = (sIc - gce * m_gD_BE.Vd() + gcc * m_gD_BC.Vd()) * polarity;
179      //double Ie = sIe + gee * -m_D_EB.deltaV() - gec * -m_D_CB.deltaV();
180      //double Ic = sIc - gce * -m_D_EB.deltaV() + gcc * -m_D_CB.deltaV();
181      //printf("EB %f sIe %f sIc %f\n", m_D_BE.deltaV(), sIe, sIc);
177      const double gee = m_gD_BE.G();
178      const double gcc = m_gD_BC.G();
179      const double gec =  m_alpha_r * gcc;
180      const double gce =  m_alpha_f * gee;
181      const double sIe = -m_gD_BE.I() + m_alpha_r * m_gD_BC.I();
182      const double sIc = m_alpha_f * m_gD_BE.I() - m_gD_BC.I();
183      const double Ie = (sIe + gee * m_gD_BE.Vd() - gec * m_gD_BC.Vd()) * polarity;
184      const double Ic = (sIc - gce * m_gD_BE.Vd() + gcc * m_gD_BC.Vd()) * polarity;
182185
183186      m_D_EB.set_mat(gee, gec - gee, gce - gee, gee - gec, Ie, -Ie);
184187      m_D_CB.set_mat(gcc, gce - gcc, gec - gcc, gcc - gce, Ic, -Ic);
trunk/src/emu/netlist/analog/nld_twoterm.h
r31037r31038
103103      m_N.set( G,  G, ( -V) * G + I);
104104   }
105105
106   ATTR_HOT inline double deltaV()
106   ATTR_HOT inline double deltaV() const
107107   {
108108       return m_P.net().as_analog().Q_Analog() - m_N.net().as_analog().Q_Analog();
109109   }
r31037r31038
172172
173173   ATTR_HOT void step_time(const double st)
174174   {
175      double G = m_C.Value() / st;
176      double I = -G * deltaV();
175      const double G = m_C.Value() / st;
176      const double I = -G * deltaV();
177177      set(G, 0.0, I);
178178   }
179179
trunk/src/emu/netlist/analog/nld_solver.c
r31037r31038
33 *
44 */
55
6#include <algorithm>
7
8#include "nld_solver.h"
9#include "nld_twoterm.h"
10#include "../nl_lists.h"
11
12#if HAS_OPENMP
13#include "omp.h"
14#endif
15
16#define USE_PIVOT_SEARCH (0)
17#define VECTALT 1
18#define USE_GABS 0
19#define USE_MATRIX_GS 0
20//#define SORP 1.059
21#define SORP 1.059
22// savings are eaten up by effort
23#define USE_LINEAR_PREDICTION (0)
24
25#define SOLVER_VERBOSE_OUT(x) do {} while (0)
26//#define SOLVER_VERBOSE_OUT(x) printf x
27
28/* Commented out for now. Relatively low number of terminals / nes makes
29 * the vectorizations this enables pretty expensive
6/* Commented out for now. Relatively low number of terminals / nets make
7 * the vectorizations fast-math enables pretty expensive
308 */
319
3210#if 0
r31037r31038
3513#pragma GCC optimize "-funswitch-loops"
3614#pragma GCC optimize "-fvariable-expansion-in-unroller"
3715#pragma GCC optimize "-funsafe-loop-optimizations"
16#pragma GCC optimize "-fvect-cost-model"
17#pragma GCC optimize "-fvariable-expansion-in-unroller"
3818#pragma GCC optimize "-ftree-loop-if-convert-stores"
3919#pragma GCC optimize "-ftree-loop-distribution"
4020#pragma GCC optimize "-ftree-loop-im"
4121#pragma GCC optimize "-ftree-loop-ivcanon"
4222#pragma GCC optimize "-fivopts"
4323#pragma GCC optimize "-ftree-parallelize-loops=4"
44#pragma GCC optimize "-fvect-cost-model"
45#pragma GCC optimize "-fvariable-expansion-in-unroller"
4624#endif
4725
48static vector_ops_t *create_ops(const int size)
26#define SOLVER_VERBOSE_OUT(x) do {} while (0)
27//#define SOLVER_VERBOSE_OUT(x) printf x
28
29#include <algorithm>
30#include "nld_solver.h"
31#include "nld_ms_direct.h"
32#include "nld_ms_direct1.h"
33#include "nld_ms_direct2.h"
34#include "nld_ms_gauss_seidel.h"
35#include "nld_twoterm.h"
36#include "../nl_lists.h"
37
38#if HAS_OPENMP
39#include "omp.h"
40#endif
41
42vector_ops_t *vector_ops_t::create_ops(const int size)
4943{
5044    switch (size)
5145    {
r31037r31038
9892        m_other_curanalog[i] = &m_term[i]->m_otherterm->net().as_analog().m_cur_Analog;
9993    }
10094
101    m_ops = create_ops(m_gt.count());
95    m_ops = vector_ops_t::create_ops(m_gt.count());
10296}
10397
10498// ----------------------------------------------------------------------------------------
10599// netlist_matrix_solver
106100// ----------------------------------------------------------------------------------------
107101
108ATTR_COLD netlist_matrix_solver_t::netlist_matrix_solver_t()
109: m_calculations(0), m_cur_ts(0)
102ATTR_COLD netlist_matrix_solver_t::netlist_matrix_solver_t(const netlist_solver_parameters_t &params)
103: m_calculations(0), m_params(params), m_cur_ts(0)
110104{
111105}
112106
r31037r31038
197191   }
198192}
199193
200// ----------------------------------------------------------------------------------------
201// netlist_matrix_solver_direct
202// ----------------------------------------------------------------------------------------
203194
204template <int m_N, int _storage_N>
205netlist_matrix_solver_direct_t<m_N, _storage_N>::netlist_matrix_solver_direct_t(int size)
206: netlist_matrix_solver_t()
207, m_dim(size)
208, m_lp_fact(0)
209{
210    m_terms = new terms_t *[N()];
211    m_rails_temp = new terms_t[N()];
212
213    for (int k = 0; k < N(); k++)
214    {
215        m_terms[k] = new terms_t;
216        m_row_ops[k] = create_ops(k);
217    }
218    m_row_ops[N()] = create_ops(N());
219}
220
221template <int m_N, int _storage_N>
222netlist_matrix_solver_direct_t<m_N, _storage_N>::~netlist_matrix_solver_direct_t()
223{
224    for (int k=0; k<_storage_N; k++)
225    {
226        //delete[] m_A[k];
227    }
228    //delete[] m_last_RHS;
229    //delete[] m_RHS;
230    delete[] m_terms;
231    delete[] m_rails_temp;
232    //delete[] m_row_ops;
233
234}
235
236template <int m_N, int _storage_N>
237ATTR_HOT double netlist_matrix_solver_direct_t<m_N, _storage_N>::compute_next_timestep()
238{
239    double new_solver_timestep = m_params.m_max_timestep;
240
241    if (m_params.m_dynamic)
242    {
243        /*
244         * FIXME: We should extend the logic to use either all nets or
245         *        only output nets.
246         */
247#if 0
248        for (netlist_analog_output_t * const *p = m_inps.first(); p != NULL; p = m_inps.next(p))
249        {
250            netlist_analog_net_t *n = (*p)->m_proxied_net;
251#else
252        for (int k = 0; k < N(); k++)
253        {
254            netlist_analog_net_t *n = m_nets[k];
255#endif
256            const double DD_n = (n->m_cur_Analog - m_last_V[k]);
257            const double hn = current_timestep();
258
259            double DD2 = (DD_n / hn - n->m_DD_n_m_1 / n->m_h_n_m_1) / (hn + n->m_h_n_m_1);
260            double new_net_timestep;
261
262            n->m_h_n_m_1 = hn;
263            n->m_DD_n_m_1 = DD_n;
264            if (fabs(DD2) > 1e-50) // avoid div-by-zero
265                new_net_timestep = sqrt(m_params.m_lte / fabs(0.5*DD2));
266            else
267                new_net_timestep = m_params.m_max_timestep;
268
269            if (new_net_timestep < new_solver_timestep)
270                new_solver_timestep = new_net_timestep;
271        }
272        if (new_solver_timestep < m_params.m_min_timestep)
273            new_solver_timestep = m_params.m_min_timestep;
274    }
275    //if (new_solver_timestep > 10.0 * hn)
276    //    new_solver_timestep = 10.0 * hn;
277   return new_solver_timestep;
278}
279
280195ATTR_HOT void netlist_matrix_solver_t::update_inputs()
281196{
282197    // avoid recursive calls. Inputs are updated outside this call
r31037r31038
386301   return next_time_step;
387302}
388303
389template <int m_N, int _storage_N>
390void netlist_matrix_solver_gauss_seidel_t<m_N, _storage_N>::log_stats()
391{
392#if 1
393    printf("==============================================\n");
394    printf("Solver %s\n", this->name().cstr());
395    printf("       ==> %d nets\n", this->N()); //, (*(*groups[i].first())->m_core_terms.first())->name().cstr());
396    printf("       has %s elements\n", this->is_dynamic() ? "dynamic" : "no dynamic");
397    printf("       has %s elements\n", this->is_timestep() ? "timestep" : "no timestep");
398    printf("       %10d invocations (%6d Hz)  %10d gs fails (%6.2f%%) %6.3f average\n",
399            this->m_calculations,
400            this->m_calculations * 10 / (int) (this->netlist().time().as_double() * 10.0),
401            this->m_gs_fail,
402            100.0 * (double) this->m_gs_fail / (double) this->m_calculations,
403            (double) this->m_gs_total / (double) this->m_calculations);
404#endif
405}
406304
407305// ----------------------------------------------------------------------------------------
408306// netlist_matrix_solver - Direct base
r31037r31038
416314   return -1;
417315}
418316
419template <int m_N, int _storage_N>
420ATTR_COLD void netlist_matrix_solver_direct_t<m_N, _storage_N>::add_term(int k, netlist_terminal_t *term)
421{
422    if (term->m_otherterm->net().isRailNet())
423    {
424        m_rails_temp[k].add(term, -1);
425    }
426    else
427    {
428        int ot = get_net_idx(&term->m_otherterm->net());
429        if (ot>=0)
430        {
431            m_terms[k]->add(term, ot);
432            SOLVER_VERBOSE_OUT(("Net %d Term %s %f %f\n", k, terms[i]->name().cstr(), terms[i]->m_gt, terms[i]->m_go));
433        }
434        /* Should this be allowed ? */
435        else // if (ot<0)
436        {
437           m_rails_temp[k].add(term, ot);
438           netlist().error("found term with missing othernet %s\n", term->name().cstr());
439        }
440    }
441}
442317
443318
444template <int m_N, int _storage_N>
445ATTR_COLD void netlist_matrix_solver_direct_t<m_N, _storage_N>::vsetup(netlist_analog_net_t::list_t &nets)
446{
447319
448    if (m_dim < nets.count())
449        netlist().error("Dimension %d less than %d", m_dim, nets.count());
450320
451    for (int k = 0; k < N(); k++)
452    {
453        m_terms[k]->clear();
454        m_rails_temp[k].clear();
455    }
456321
457    netlist_matrix_solver_t::setup(nets);
458322
459    for (int k = 0; k < N(); k++)
460    {
461        m_terms[k]->m_railstart = m_terms[k]->count();
462        for (int i = 0; i < m_rails_temp[k].count(); i++)
463            this->m_terms[k]->add(m_rails_temp[k].terms()[i], m_rails_temp[k].net_other()[i]);
464
465        m_rails_temp[k].clear(); // no longer needed
466        m_terms[k]->set_pointers();
467    }
468
469#if 1
470
471    /* Sort in descending order by number of connected matrix voltages.
472     * The idea is, that for Gauss-Seidel algo the first voltage computed
473     * depends on the greatest number of previous voltages thus taking into
474     * account the maximum amout of information.
475     *
476     * This actually improves performance on popeye slightly. Average
477     * GS computations reduce from 2.509 to 2.370
478     *
479     * Smallest to largest : 2.613
480     * Unsorted            : 2.509
481     * Largest to smallest : 2.370
482     *
483     * Sorting as a general matrix pre-conditioning is mentioned in
484     * literature but I have found no articles about Gauss Seidel.
485     *
486     */
487
488
489    for (int k = 0; k < N() / 2; k++)
490        for (int i = 0; i < N() - 1; i++)
491        {
492            if (m_terms[i]->m_railstart < m_terms[i+1]->m_railstart)
493            {
494                std::swap(m_terms[i],m_terms[i+1]);
495                m_nets.swap(i, i+1);
496            }
497        }
498
499    for (int k = 0; k < N(); k++)
500    {
501        int *other = m_terms[k]->net_other();
502        for (int i = 0; i < m_terms[k]->count(); i++)
503            if (other[i] != -1)
504                other[i] = get_net_idx(&m_terms[k]->terms()[i]->m_otherterm->net());
505    }
506
507#endif
508
509}
510
511template <int m_N, int _storage_N>
512ATTR_HOT void netlist_matrix_solver_direct_t<m_N, _storage_N>::build_LE()
513{
514#if 0
515    for (int k=0; k < N(); k++)
516        for (int i=0; i < N(); i++)
517            m_A[k][i] = 0.0;
518#endif
519
520    for (int k = 0; k < N(); k++)
521   {
522        for (int i=0; i < N(); i++)
523            m_A[k][i] = 0.0;
524
525        double rhsk = 0.0;
526        double akk  = 0.0;
527        {
528            const int terms_count = m_terms[k]->count();
529            const double * RESTRICT gt = m_terms[k]->gt();
530            const double * RESTRICT go = m_terms[k]->go();
531            const double * RESTRICT Idr = m_terms[k]->Idr();
532#if VECTALT
533
534            for (int i = 0; i < terms_count; i++)
535            {
536                rhsk = rhsk + Idr[i];
537                akk = akk + gt[i];
538            }
539#else
540            m_terms[k]->ops()->sum2(Idr, gt, rhsk, akk);
541#endif
542            double * const * RESTRICT other_cur_analog = m_terms[k]->other_curanalog();
543            for (int i = m_terms[k]->m_railstart; i < terms_count; i++)
544            {
545                //rhsk = rhsk + go[i] * terms[i]->m_otherterm->net().as_analog().Q_Analog();
546                rhsk = rhsk + go[i] * *other_cur_analog[i];
547            }
548        }
549#if 0
550        /*
551         * Matrix preconditioning with 1.0 / Akk
552         *
553         * will save a number of calculations during elimination
554         *
555         */
556        akk = 1.0 / akk;
557        m_RHS[k] = rhsk * akk;
558        m_A[k][k] += 1.0;
559        {
560            const int *net_other = m_terms[k]->net_other();
561            const double *go = m_terms[k]->go();
562            const int railstart =  m_terms[k]->m_railstart;
563
564            for (int i = 0; i < railstart; i++)
565            {
566                m_A[k][net_other[i]] += -go[i] * akk;
567            }
568        }
569#else
570        m_RHS[k] = rhsk;
571        m_A[k][k] += akk;
572        {
573            const int * RESTRICT net_other = m_terms[k]->net_other();
574            const double * RESTRICT go = m_terms[k]->go();
575            const int railstart =  m_terms[k]->m_railstart;
576
577            for (int i = 0; i < railstart; i++)
578            {
579                m_A[k][net_other[i]] += -go[i];
580            }
581        }
582#endif
583    }
584}
585
586template <int m_N, int _storage_N>
587ATTR_HOT void netlist_matrix_solver_direct_t<m_N, _storage_N>::gauss_LE(
588      double (* RESTRICT x))
589{
590#if 0
591   for (int i = 0; i < N(); i++)
592   {
593      for (int k = 0; k < N(); k++)
594         printf("%f ", m_A[i][k]);
595      printf("| %f = %f \n", x[i], m_RHS[i]);
596   }
597   printf("\n");
598#endif
599
600    const int kN = N();
601
602   for (int i = 0; i < kN; i++) {
603       // FIXME: use a parameter to enable pivoting?
604       if (USE_PIVOT_SEARCH)
605       {
606           /* Find the row with the largest first value */
607           int maxrow = i;
608           for (int j = i + 1; j < kN; j++)
609           {
610               if (fabs(m_A[j][i]) > fabs(m_A[maxrow][i]))
611                   maxrow = j;
612           }
613
614           if (maxrow != i)
615           {
616               /* Swap the maxrow and ith row */
617               for (int k = i; k < kN; k++) {
618                   std::swap(m_A[i][k], m_A[maxrow][k]);
619               }
620               std::swap(m_RHS[i], m_RHS[maxrow]);
621           }
622       }
623
624       /* FIXME: Singular matrix? */
625      const double f = 1.0 / m_A[i][i];
626
627      /* Eliminate column i from row j */
628
629      for (int j = i + 1; j < kN; j++)
630      {
631            const double f1 = - m_A[j][i] * f;
632         if (f1 != 0.0)
633         {
634#if 0 && VECTALT
635                for (int k = i + 1; k < kN; k++)
636                    m_A[j][k] += m_A[i][k] * f1;
637#else
638                // addmult gives some performance increase here...
639                m_row_ops[kN - (i + 1)]->addmult(&m_A[j][i+1], &m_A[i][i+1], f1) ;
640#endif
641               m_RHS[j] += m_RHS[i] * f1;
642         }
643      }
644   }
645   /* back substitution */
646   for (int j = kN - 1; j >= 0; j--)
647   {
648      double tmp = 0;
649
650      for (int k = j + 1; k < kN; k++)
651            tmp += m_A[j][k] * x[k];
652
653        x[j] = (m_RHS[j] - tmp) / m_A[j][j];
654   }
655#if 0
656   printf("Solution:\n");
657   for (int i = 0; i < N(); i++)
658   {
659      for (int k = 0; k < N(); k++)
660         printf("%f ", m_A[i][k]);
661      printf("| %f = %f \n", x[i], m_RHS[i]);
662   }
663   printf("\n");
664#endif
665
666}
667
668template <int m_N, int _storage_N>
669ATTR_HOT double netlist_matrix_solver_direct_t<m_N, _storage_N>::delta(
670      const double (* RESTRICT V))
671{
672   double cerr = 0;
673   double cerr2 = 0;
674   for (int i = 0; i < this->N(); i++)
675   {
676      const double e = (V[i] - this->m_nets[i]->m_cur_Analog);
677      const double e2 = (m_RHS[i] - this->m_last_RHS[i]);
678      cerr = (fabs(e) > cerr ? fabs(e) : cerr);
679        cerr2 = (fabs(e2) > cerr2 ? fabs(e2) : cerr2);
680   }
681   // FIXME: Review
682   return cerr + cerr2*100000.0;
683}
684
685template <int m_N, int _storage_N>
686ATTR_HOT void netlist_matrix_solver_direct_t<m_N, _storage_N>::store(
687      const double (* RESTRICT V), const bool store_RHS)
688{
689   for (int i = 0; i < this->N(); i++)
690   {
691      this->m_nets[i]->m_cur_Analog = V[i];
692   }
693   if (store_RHS)
694   {
695      for (int i = 0; i < this->N(); i++)
696      {
697         this->m_last_RHS[i] = m_RHS[i];
698      }
699   }
700}
701
702template <int m_N, int _storage_N>
703ATTR_HOT double netlist_matrix_solver_direct_t<m_N, _storage_N>::vsolve()
704{
705    solve_base<netlist_matrix_solver_direct_t>(this);
706    return this->compute_next_timestep();
707}
708
709
710template <int m_N, int _storage_N>
711ATTR_HOT int netlist_matrix_solver_direct_t<m_N, _storage_N>::solve_non_dynamic()
712{
713    double new_v[_storage_N] = { 0.0 };
714
715    this->gauss_LE(new_v);
716
717    if (this->is_dynamic())
718    {
719        double err = delta(new_v);
720
721        store(new_v, true);
722
723        if (err > this->m_params.m_accuracy)
724        {
725            return 2;
726        }
727        return 1;
728    }
729    store(new_v, false);  // ==> No need to store RHS
730    return 1;
731}
732
733template <int m_N, int _storage_N>
734ATTR_HOT inline int netlist_matrix_solver_direct_t<m_N, _storage_N>::vsolve_non_dynamic()
735{
736   this->build_LE();
737
738   return this->solve_non_dynamic();
739}
740
741
742323// ----------------------------------------------------------------------------------------
743// netlist_matrix_solver - Direct1
744// ----------------------------------------------------------------------------------------
745
746ATTR_HOT double netlist_matrix_solver_direct1_t::vsolve()
747{
748    solve_base<netlist_matrix_solver_direct1_t>(this);
749    return this->compute_next_timestep();
750}
751
752ATTR_HOT inline int netlist_matrix_solver_direct1_t::vsolve_non_dynamic()
753{
754
755    netlist_analog_net_t *net = m_nets[0];
756   this->build_LE();
757   //NL_VERBOSE_OUT(("%f %f\n", new_val, m_RHS[0] / m_A[0][0]);
758
759   double new_val =  m_RHS[0] / m_A[0][0];
760
761   double e = (new_val - net->m_cur_Analog);
762   double cerr = fabs(e);
763
764   net->m_cur_Analog = new_val;
765
766   if (is_dynamic() && (cerr  > m_params.m_accuracy))
767   {
768      return 2;
769   }
770   else
771      return 1;
772
773}
774
775
776
777// ----------------------------------------------------------------------------------------
778// netlist_matrix_solver - Direct2
779// ----------------------------------------------------------------------------------------
780
781ATTR_HOT double netlist_matrix_solver_direct2_t::vsolve()
782{
783    solve_base<netlist_matrix_solver_direct2_t>(this);
784    return this->compute_next_timestep();
785}
786
787ATTR_HOT inline int netlist_matrix_solver_direct2_t::vsolve_non_dynamic()
788{
789
790   build_LE();
791
792   const double a = m_A[0][0];
793   const double b = m_A[0][1];
794   const double c = m_A[1][0];
795   const double d = m_A[1][1];
796
797   double new_val[2];
798   new_val[1] = (a * m_RHS[1] - c * m_RHS[0]) / (a * d - b * c);
799   new_val[0] = (m_RHS[0] - b * new_val[1]) / a;
800
801   if (is_dynamic())
802   {
803      double err = this->delta(new_val);
804      store(new_val, true);
805      if (err > m_params.m_accuracy )
806         return 2;
807      else
808         return 1;
809   }
810   store(new_val, false);
811   return 1;
812}
813
814// ----------------------------------------------------------------------------------------
815// netlist_matrix_solver - Gauss - Seidel
816// ----------------------------------------------------------------------------------------
817
818template <int m_N, int _storage_N>
819ATTR_HOT double netlist_matrix_solver_gauss_seidel_t<m_N, _storage_N>::vsolve()
820{
821    /*
822     * enable linear prediction on first newton pass
823     */
824
825    if (USE_LINEAR_PREDICTION)
826        for (int k = 0; k < this->N(); k++)
827        {
828            this->m_last_V[k] = this->m_nets[k]->m_cur_Analog;
829            this->m_nets[k]->m_cur_Analog = this->m_nets[k]->m_cur_Analog + this->m_Vdelta[k] * this->current_timestep() * m_lp_fact;
830        }
831    else
832        for (int k = 0; k < this->N(); k++)
833        {
834            this->m_last_V[k] = this->m_nets[k]->m_cur_Analog;
835        }
836
837    this->solve_base(this);
838
839    if (USE_LINEAR_PREDICTION)
840    {
841        double sq = 0;
842        double sqo = 0;
843        for (int k = 0; k < this->N(); k++)
844        {
845            netlist_analog_net_t *n = this->m_nets[k];
846            double nv = (n->m_cur_Analog - this->m_last_V[k]) / this->current_timestep();
847            sq += nv * nv;
848            sqo += this->m_Vdelta[k] * this->m_Vdelta[k];
849            this->m_Vdelta[k] = nv;
850        }
851        if (sqo > 1e-90)
852            m_lp_fact = sqrt(sq/sqo);
853        else
854            m_lp_fact = 0.0;
855        if (m_lp_fact > 2.0)
856            m_lp_fact = 2.0;
857        //printf("fact %f\n", fact);
858    }
859
860
861    return this->compute_next_timestep();
862}
863
864template <int m_N, int _storage_N>
865ATTR_HOT inline int netlist_matrix_solver_gauss_seidel_t<m_N, _storage_N>::vsolve_non_dynamic()
866{
867    /* The matrix based code looks a lot nicer but actually is 30% slower than
868     * the optimized code which works directly on the data structures.
869     * Need something like that for gaussian elimination as well.
870     */
871
872#if USE_MATRIX_GS
873    static double ws = 1.0;
874    ATTR_ALIGN double new_v[_storage_N] = { 0.0 };
875    const int iN = this->N();
876
877    bool resched = false;
878
879    int  resched_cnt = 0;
880
881    this->build_LE();
882
883    {
884        double frob;
885        frob = 0;
886        for (int k = 0; k < iN; k++)
887        {
888            new_v[k] = this->m_nets[k]->m_cur_Analog;
889            for (int i = 0; i < iN; i++)
890            {
891                frob += this->m_A[k][i] * this->m_A[k][i];
892            }
893
894        }
895        double frobA = sqrt(frob /(iN));
896        if (1 &&frobA < 1.0)
897            //ws = 2.0 / (1.0 + sqrt(1.0-frobA));
898            ws = 2.0 / (2.0 - frobA);
899        else
900            ws = 1.0;
901        ws = 0.9;
902    }
903
904    // Frobenius norm for (D-L)^(-1)U
905    //double frobU;
906    //double frobL;
907    //double norm;
908    do {
909        resched = false;
910        double cerr = 0.0;
911        //frobU = 0;
912        //frobL = 0;
913        //norm = 0;
914
915        for (int k = 0; k < iN; k++)
916        {
917            double Idrive = 0;
918            //double norm_t = 0;
919            // Reduction loops need -ffast-math
920            for (int i = 0; i < iN; i++)
921                Idrive += this->m_A[k][i] * new_v[i];
922
923            for (int i = 0; i < iN; i++)
924            {
925                //if (i < k) frobL += this->m_A[k][i] * this->m_A[k][i] / this->m_A[k][k] /this-> m_A[k][k];
926                //if (i > k) frobU += this->m_A[k][i] * this->m_A[k][i] / this->m_A[k][k] / this->m_A[k][k];
927                //norm_t += fabs(this->m_A[k][i]);
928            }
929
930            //if (norm_t > norm) norm = norm_t;
931            const double new_val = (1.0-ws) * new_v[k] + ws * (this->m_RHS[k] - Idrive + this->m_A[k][k] * new_v[k]) / this->m_A[k][k];
932
933            const double e = fabs(new_val - new_v[k]);
934            cerr = (e > cerr ? e : cerr);
935            new_v[k] = new_val;
936        }
937
938        if (cerr > this->m_params.m_accuracy)
939        {
940            resched = true;
941        }
942        resched_cnt++;
943        //ATTR_UNUSED double frobUL = sqrt((frobU + frobL) / (double) (iN) / (double) (iN));
944    } while (resched && (resched_cnt < this->m_params.m_gs_loops));
945    //printf("Frobenius %f %f %f %f %f\n", sqrt(frobU), sqrt(frobL), frobUL, frobA, norm);
946    //printf("Omega Estimate1 %f %f\n", 2.0 / (1.0 + sqrt(1-frobUL)), 2.0 / (1.0 + sqrt(1-frobA)) ); //        printf("Frobenius %f\n", sqrt(frob / (double) (iN * iN) ));
947    //printf("Omega Estimate2 %f %f\n", 2.0 / (2.0 - frobUL), 2.0 / (2.0 - frobA) ); //        printf("Frobenius %f\n", sqrt(frob / (double) (iN * iN) ));
948
949
950    this->store(new_v, false);
951
952    this->m_gs_total += resched_cnt;
953    if (resched)
954    {
955        //this->netlist().warning("Falling back to direct solver .. Consider increasing RESCHED_LOOPS");
956        this->m_gs_fail++;
957        int tmp = netlist_matrix_solver_direct_t<m_N, _storage_N>::solve_non_dynamic();
958        this->m_calculations++;
959        return tmp;
960    }
961    else {
962        this->m_calculations++;
963
964        return resched_cnt;
965    }
966
967#else
968    const int iN = this->N();
969   bool resched = false;
970   int  resched_cnt = 0;
971
972   /* ideally, we could get an estimate for the spectral radius of
973    * Inv(D - L) * U
974    *
975    * and estimate using
976    *
977    * omega = 2.0 / (1.0 + sqrt(1-rho))
978    */
979
980    const double ws = SORP; //1.045; //2.0 / (1.0 + /*sin*/(3.14159 * 5.5 / (double) (m_nets.count()+1)));
981
982   ATTR_ALIGN double w[_storage_N];
983   ATTR_ALIGN double one_m_w[_storage_N];
984   ATTR_ALIGN double RHS[_storage_N];
985   ATTR_ALIGN double new_V[_storage_N];
986
987    for (int k = 0; k < iN; k++)
988    {
989        new_V[k] = this->m_nets[k]->m_cur_Analog;
990    }
991   for (int k = 0; k < iN; k++)
992   {
993      double gtot_t = 0.0;
994      double gabs_t = 0.0;
995      double RHS_t = 0.0;
996
997      {
998            const int term_count = this->m_terms[k]->count();
999            const double * RESTRICT gt = this->m_terms[k]->gt();
1000            const double * RESTRICT go = this->m_terms[k]->go();
1001            const double * RESTRICT Idr = this->m_terms[k]->Idr();
1002#if VECTALT
1003            for (int i = 0; i < term_count; i++)
1004            {
1005                gtot_t += gt[i];
1006                if (USE_GABS) gabs_t += fabs(go[i]);
1007                RHS_t += Idr[i];
1008            }
1009#else
1010            if (USE_GABS)
1011                this->m_terms[k]->ops()->sum2a(gt, Idr, go, gtot_t, RHS_t, gabs_t);
1012            else
1013                this->m_terms[k]->ops()->sum2(gt, Idr, gtot_t, RHS_t);
1014#endif
1015            double * const *other_cur_analog = this->m_terms[k]->other_curanalog();
1016            for (int i = this->m_terms[k]->m_railstart; i < term_count; i++)
1017                //RHS_t += go[i] * terms[i]->m_otherterm->net().as_analog().Q_Analog();
1018                RHS_t += go[i] * *other_cur_analog[i];
1019      }
1020
1021        RHS[k] = RHS_t;
1022
1023        //if (fabs(gabs_t - fabs(gtot_t)) > 1e-20)
1024        //    printf("%d %e abs: %f tot: %f\n",k, gabs_t / gtot_t -1.0, gabs_t, gtot_t);
1025
1026        gabs_t *= 0.5; // avoid rounding issues
1027        if (!USE_GABS || gabs_t <= gtot_t)
1028        {
1029            w[k] = ws / gtot_t;
1030            one_m_w[k] = 1.0 - ws;
1031        }
1032        else
1033        {
1034            //printf("abs: %f tot: %f\n", gabs_t, gtot_t);
1035            w[k] = 1.0 / (gtot_t + gabs_t);
1036            one_m_w[k] = 1.0 - 1.0 * gtot_t / (gtot_t + gabs_t);
1037        }
1038
1039   }
1040
1041   do {
1042      resched = false;
1043      //double cerr = 0.0;
1044
1045      for (int k = 0; k < iN; k++)
1046      {
1047            const int * RESTRICT net_other = this->m_terms[k]->net_other();
1048         const int railstart = this->m_terms[k]->m_railstart;
1049            const double * RESTRICT go = this->m_terms[k]->go();
1050
1051         double Idrive = 0.0;
1052            for (int i = 0; i < railstart; i++)
1053                Idrive = Idrive + go[i] * new_V[net_other[i]];
1054
1055            //double new_val = (net->m_cur_Analog * gabs[k] + iIdr) / (gtot[k]);
1056         const double new_val = new_V[k] * one_m_w[k] + (Idrive + RHS[k]) * w[k];
1057
1058         resched = resched || (fabs(new_val - new_V[k]) > this->m_params.m_accuracy);
1059            new_V[k] = new_val;
1060      }
1061
1062      resched_cnt++;
1063   } while (resched && (resched_cnt < this->m_params.m_gs_loops));
1064
1065    for (int k = 0; k < iN; k++)
1066        this->m_nets[k]->m_cur_Analog = new_V[k];
1067
1068   this->m_gs_total += resched_cnt;
1069
1070   if (resched)
1071   {
1072       //this->netlist().warning("Falling back to direct solver .. Consider increasing RESCHED_LOOPS");
1073       this->m_gs_fail++;
1074       int tmp = netlist_matrix_solver_direct_t<m_N, _storage_N>::vsolve_non_dynamic();
1075       this->m_calculations++;
1076        return tmp;
1077   }
1078   else {
1079       this->m_calculations++;
1080
1081       return resched_cnt;
1082   }
1083#endif
1084}
1085
1086// ----------------------------------------------------------------------------------------
1087324// solver
1088325// ----------------------------------------------------------------------------------------
1089326
r31037r31038
1099336
1100337   register_param("ACCURACY", m_accuracy, 1e-7);
1101338    register_param("GS_LOOPS", m_gs_loops, 9);              // Gauss-Seidel loops
1102    register_param("GS_THRESHOLD", m_gs_threshold, 5);          // below this value, gaussian elimination is used
339    register_param("GS_THRESHOLD", m_gs_threshold, 5);      // below this value, gaussian elimination is used
1103340    register_param("NR_LOOPS", m_nr_loops, 25);             // Newton-Raphson loops
1104341   register_param("PARALLEL", m_parallel, 0);
342    register_param("SOR_FACTOR", m_sor, 1.059);
1105343    register_param("GMIN", m_gmin, NETLIST_GMIN_DEFAULT);
1106344    register_param("DYNAMIC_TS", m_dynamic, 0);
1107345    register_param("LTE", m_lte, 5e-5);                     // diff/timestep
r31037r31038
1190428netlist_matrix_solver_t * NETLIB_NAME(solver)::create_solver(int size, const int gs_threshold, const bool use_specific)
1191429{
1192430    if (use_specific && m_N == 1)
1193        return new netlist_matrix_solver_direct1_t();
431        return new netlist_matrix_solver_direct1_t(m_params);
1194432    else if (use_specific && m_N == 2)
1195        return new netlist_matrix_solver_direct2_t();
433        return new netlist_matrix_solver_direct2_t(m_params);
1196434    else
1197435    {
1198436        if (size >= gs_threshold)
1199            return new netlist_matrix_solver_gauss_seidel_t<m_N,_storage_N>(size);
437            return new netlist_matrix_solver_gauss_seidel_t<m_N,_storage_N>(m_params, size);
1200438        else
1201            return new netlist_matrix_solver_direct_t<m_N, _storage_N>(size);
439            return new netlist_matrix_solver_direct_t<m_N, _storage_N>(m_params, size);
1202440    }
1203441}
1204442
r31037r31038
1214452    m_params.m_nr_loops = m_nr_loops.Value();
1215453    m_params.m_nt_sync_delay = m_sync_delay.Value();
1216454    m_params.m_lte = m_lte.Value();
455    m_params.m_sor = m_sor.Value();
456
1217457    m_params.m_min_timestep = m_min_timestep.Value();
1218458    m_params.m_dynamic = (m_dynamic.Value() == 1 ? true : false);
1219459    m_params.m_max_timestep = netlist_time::from_hz(m_freq.Value()).as_double();
r31037r31038
1302542            break;
1303543      }
1304544
1305      ms->m_params = m_params;
1306
1307545      register_sub(*ms, pstring::sprintf("Solver %d",m_mat_solvers.count()));
1308546
1309547        ms->vsetup(groups[i]);
trunk/src/emu/netlist/analog/nld_ms_gauss_seidel.h
r0r31038
1/*
2 * nld_ms_direct1.h
3 *
4 */
5
6#ifndef NLD_MS_GAUSS_SEIDEL_H_
7#define NLD_MS_GAUSS_SEIDEL_H_
8
9#include <cmath>
10
11#include "nld_solver.h"
12#include "nld_ms_direct.h"
13
14template <int m_N, int _storage_N>
15class ATTR_ALIGNED(64) netlist_matrix_solver_gauss_seidel_t: public netlist_matrix_solver_direct_t<m_N, _storage_N>
16{
17public:
18
19    netlist_matrix_solver_gauss_seidel_t(const netlist_solver_parameters_t &params, int size)
20      : netlist_matrix_solver_direct_t<m_N, _storage_N>(params, size)
21      , m_lp_fact(0)
22      , m_gs_fail(0)
23      , m_gs_total(0)
24      {}
25
26    virtual ~netlist_matrix_solver_gauss_seidel_t() {}
27
28    ATTR_COLD virtual void log_stats();
29
30    ATTR_HOT inline int vsolve_non_dynamic();
31protected:
32    ATTR_HOT virtual double vsolve();
33
34private:
35    double m_lp_fact;
36    int m_gs_fail;
37    int m_gs_total;
38
39};
40
41// ----------------------------------------------------------------------------------------
42// netlist_matrix_solver - Gauss - Seidel
43// ----------------------------------------------------------------------------------------
44
45template <int m_N, int _storage_N>
46void netlist_matrix_solver_gauss_seidel_t<m_N, _storage_N>::log_stats()
47{
48#if 1
49    printf("==============================================\n");
50    printf("Solver %s\n", this->name().cstr());
51    printf("       ==> %d nets\n", this->N()); //, (*(*groups[i].first())->m_core_terms.first())->name().cstr());
52    printf("       has %s elements\n", this->is_dynamic() ? "dynamic" : "no dynamic");
53    printf("       has %s elements\n", this->is_timestep() ? "timestep" : "no timestep");
54    printf("       %10d invocations (%6d Hz)  %10d gs fails (%6.2f%%) %6.3f average\n",
55            this->m_calculations,
56            this->m_calculations * 10 / (int) (this->netlist().time().as_double() * 10.0),
57            this->m_gs_fail,
58            100.0 * (double) this->m_gs_fail / (double) this->m_calculations,
59            (double) this->m_gs_total / (double) this->m_calculations);
60#endif
61}
62
63template <int m_N, int _storage_N>
64ATTR_HOT double netlist_matrix_solver_gauss_seidel_t<m_N, _storage_N>::vsolve()
65{
66    /*
67     * enable linear prediction on first newton pass
68     */
69
70    if (USE_LINEAR_PREDICTION)
71        for (int k = 0; k < this->N(); k++)
72        {
73            this->m_last_V[k] = this->m_nets[k]->m_cur_Analog;
74            this->m_nets[k]->m_cur_Analog = this->m_nets[k]->m_cur_Analog + this->m_Vdelta[k] * this->current_timestep() * m_lp_fact;
75        }
76    else
77        for (int k = 0; k < this->N(); k++)
78        {
79            this->m_last_V[k] = this->m_nets[k]->m_cur_Analog;
80        }
81
82    this->solve_base(this);
83
84    if (USE_LINEAR_PREDICTION)
85    {
86        double sq = 0;
87        double sqo = 0;
88        const double rez_cts = 1.0 / this->current_timestep();
89        for (int k = 0; k < this->N(); k++)
90        {
91            const netlist_analog_net_t *n = this->m_nets[k];
92            const double nv = (n->m_cur_Analog - this->m_last_V[k]) * rez_cts ;
93            sq += nv * nv;
94            sqo += this->m_Vdelta[k] * this->m_Vdelta[k];
95            this->m_Vdelta[k] = nv;
96        }
97        if (sqo > 1e-90)
98            m_lp_fact = std::min(sqrt(sq/sqo), 2.0);
99        else
100            m_lp_fact = 0.0;
101    }
102
103
104    return this->compute_next_timestep();
105}
106
107template <int m_N, int _storage_N>
108ATTR_HOT inline int netlist_matrix_solver_gauss_seidel_t<m_N, _storage_N>::vsolve_non_dynamic()
109{
110    /* The matrix based code looks a lot nicer but actually is 30% slower than
111     * the optimized code which works directly on the data structures.
112     * Need something like that for gaussian elimination as well.
113     */
114
115#if 0 || USE_MATRIX_GS
116    static double ws = 1.0;
117    ATTR_ALIGN double new_v[_storage_N] = { 0.0 };
118    const int iN = this->N();
119
120    bool resched = false;
121
122    int  resched_cnt = 0;
123
124    this->build_LE();
125
126    {
127        double frob;
128        frob = 0;
129        double rmin = 1e99, rmax = -1e99;
130        for (int k = 0; k < iN; k++)
131        {
132            new_v[k] = this->m_nets[k]->m_cur_Analog;
133            double s=0.0;
134            for (int i = 0; i < iN; i++)
135            {
136                frob += this->m_A[k][i] * this->m_A[k][i];
137                s = s + fabs(this->m_A[k][i]);
138            }
139
140            if (s<rmin)
141                rmin = s;
142            if (s>rmax)
143                rmax = s;
144        }
145        //if (fabs(rmax) > 0.01)
146        //    printf("rmin %f rmax %f\n", rmin, rmax);
147#if 0
148        double frobA = sqrt(frob /(iN));
149        if (1 &&frobA < 1.0)
150            //ws = 2.0 / (1.0 + sqrt(1.0-frobA));
151            ws = 2.0 / (2.0 - frobA);
152        else
153            ws = 1.0;
154        ws = 0.9;
155#else
156        // calculate an estimate for rho.
157        // This is based on the Perron–Frobenius theorem for positive matrices.
158        // No mathematical proof here. The following estimates the
159        // optimal relaxation parameter pretty well. Unfortunately, the
160        // overhead is bigger than the gain. Consequently the fast GS below
161        // uses a fixed GS. One can however use this here to determine a
162        // suitable parameter.
163        double rm = (rmax + rmin) * 0.5;
164        if (rm < 1.0)
165            ws = 2.0 / (1.0 + sqrt(1.0-rm));
166        else
167            ws = 1.0;
168#endif
169    }
170
171    // Frobenius norm for (D-L)^(-1)U
172    //double frobU;
173    //double frobL;
174    //double norm;
175    do {
176        resched = false;
177        double cerr = 0.0;
178        //frobU = 0;
179        //frobL = 0;
180        //norm = 0;
181
182        for (int k = 0; k < iN; k++)
183        {
184            double Idrive = 0;
185            //double norm_t = 0;
186            // Reduction loops need -ffast-math
187            for (int i = 0; i < iN; i++)
188                Idrive += this->m_A[k][i] * new_v[i];
189
190            for (int i = 0; i < iN; i++)
191            {
192                //if (i < k) frobL += this->m_A[k][i] * this->m_A[k][i] / this->m_A[k][k] /this-> m_A[k][k];
193                //if (i > k) frobU += this->m_A[k][i] * this->m_A[k][i] / this->m_A[k][k] / this->m_A[k][k];
194                //norm_t += fabs(this->m_A[k][i]);
195            }
196
197            //if (norm_t > norm) norm = norm_t;
198            const double new_val = (1.0-ws) * new_v[k] + ws * (this->m_RHS[k] - Idrive + this->m_A[k][k] * new_v[k]) / this->m_A[k][k];
199
200            const double e = fabs(new_val - new_v[k]);
201            cerr = (e > cerr ? e : cerr);
202            new_v[k] = new_val;
203        }
204
205        if (cerr > this->m_params.m_accuracy)
206        {
207            resched = true;
208        }
209        resched_cnt++;
210        //ATTR_UNUSED double frobUL = sqrt((frobU + frobL) / (double) (iN) / (double) (iN));
211    } while (resched && (resched_cnt < this->m_params.m_gs_loops));
212    //printf("Frobenius %f %f %f %f %f\n", sqrt(frobU), sqrt(frobL), frobUL, frobA, norm);
213    //printf("Omega Estimate1 %f %f\n", 2.0 / (1.0 + sqrt(1-frobUL)), 2.0 / (1.0 + sqrt(1-frobA)) ); //        printf("Frobenius %f\n", sqrt(frob / (double) (iN * iN) ));
214    //printf("Omega Estimate2 %f %f\n", 2.0 / (2.0 - frobUL), 2.0 / (2.0 - frobA) ); //        printf("Frobenius %f\n", sqrt(frob / (double) (iN * iN) ));
215
216
217    this->store(new_v, false);
218
219    this->m_gs_total += resched_cnt;
220    if (resched)
221    {
222        //this->netlist().warning("Falling back to direct solver .. Consider increasing RESCHED_LOOPS");
223        this->m_gs_fail++;
224        int tmp = netlist_matrix_solver_direct_t<m_N, _storage_N>::solve_non_dynamic();
225        this->m_calculations++;
226        return tmp;
227    }
228    else {
229        this->m_calculations++;
230
231        return resched_cnt;
232    }
233
234#else
235    const int iN = this->N();
236    bool resched = false;
237    int  resched_cnt = 0;
238
239    /* ideally, we could get an estimate for the spectral radius of
240     * Inv(D - L) * U
241     *
242     * and estimate using
243     *
244     * omega = 2.0 / (1.0 + sqrt(1-rho))
245     */
246
247    const double ws = this->m_params.m_sor; //1.045; //2.0 / (1.0 + /*sin*/(3.14159 * 5.5 / (double) (m_nets.count()+1)));
248    //const double ws = 2.0 / (1.0 + sin(3.14159 * 4 / (double) (this->N())));
249
250    ATTR_ALIGN double w[_storage_N];
251    ATTR_ALIGN double one_m_w[_storage_N];
252    ATTR_ALIGN double RHS[_storage_N];
253    ATTR_ALIGN double new_V[_storage_N];
254
255    for (int k = 0; k < iN; k++)
256    {
257        double gtot_t = 0.0;
258        double gabs_t = 0.0;
259        double RHS_t = 0.0;
260
261        new_V[k] = this->m_nets[k]->m_cur_Analog;
262
263        {
264            const int term_count = this->m_terms[k]->count();
265            const double * const RESTRICT gt = this->m_terms[k]->gt();
266            const double * const RESTRICT go = this->m_terms[k]->go();
267            const double * const RESTRICT Idr = this->m_terms[k]->Idr();
268            const double * const *other_cur_analog = this->m_terms[k]->other_curanalog();
269#if VECTALT
270            for (int i = 0; i < term_count; i++)
271            {
272                gtot_t = gtot_t + gt[i];
273                RHS_t = RHS_t + Idr[i];
274            }
275            if (USE_GABS)
276                for (int i = 0; i < term_count; i++)
277                    gabs_t = gabs_t + fabs(go[i]);
278#else
279            if (USE_GABS)
280                this->m_terms[k]->ops()->sum2a(gt, Idr, go, gtot_t, RHS_t, gabs_t);
281            else
282                this->m_terms[k]->ops()->sum2(gt, Idr, gtot_t, RHS_t);
283#endif
284            for (int i = this->m_terms[k]->m_railstart; i < term_count; i++)
285                RHS_t = RHS_t  + go[i] * *other_cur_analog[i];
286        }
287
288        RHS[k] = RHS_t;
289
290        //if (fabs(gabs_t - fabs(gtot_t)) > 1e-20)
291        //    printf("%d %e abs: %f tot: %f\n",k, gabs_t / gtot_t -1.0, gabs_t, gtot_t);
292
293        gabs_t *= 0.5; // avoid rounding issues
294        if (!USE_GABS || gabs_t <= gtot_t)
295        {
296            w[k] = ws / gtot_t;
297            one_m_w[k] = 1.0 - ws;
298        }
299        else
300        {
301            //printf("abs: %f tot: %f\n", gabs_t, gtot_t);
302            w[k] = 1.0 / (gtot_t + gabs_t);
303            one_m_w[k] = 1.0 - 1.0 * gtot_t / (gtot_t + gabs_t);
304        }
305
306    }
307
308    const double accuracy = this->m_params.m_accuracy;
309
310    do {
311        resched = false;
312
313        for (int k = 0; k < iN; k++)
314        {
315            const int * RESTRICT net_other = this->m_terms[k]->net_other();
316            const int railstart = this->m_terms[k]->m_railstart;
317            const double * RESTRICT go = this->m_terms[k]->go();
318
319            double Idrive = 0.0;
320            for (int i = 0; i < railstart; i++)
321                Idrive = Idrive + go[i] * new_V[net_other[i]];
322
323            //double new_val = (net->m_cur_Analog * gabs[k] + iIdr) / (gtot[k]);
324            const double new_val = new_V[k] * one_m_w[k] + (Idrive + RHS[k]) * w[k];
325
326            resched = resched || (std::abs(new_val - new_V[k]) > accuracy);
327            new_V[k] = new_val;
328        }
329
330        resched_cnt++;
331    } while (resched && (resched_cnt < this->m_params.m_gs_loops));
332
333    for (int k = 0; k < iN; k++)
334        this->m_nets[k]->m_cur_Analog = new_V[k];
335
336    this->m_gs_total += resched_cnt;
337    this->m_calculations++;
338
339    if (resched)
340    {
341        //this->netlist().warning("Falling back to direct solver .. Consider increasing RESCHED_LOOPS");
342        this->m_gs_fail++;
343        return netlist_matrix_solver_direct_t<m_N, _storage_N>::vsolve_non_dynamic();
344    }
345    else {
346        return resched_cnt;
347    }
348#endif
349}
350
351
352#endif /* NLD_MS_GAUSS_SEIDEL_H_ */
Property changes on: trunk/src/emu/netlist/analog/nld_ms_gauss_seidel.h
Added: svn:mime-type
   + text/plain
Added: svn:eol-style
   + native

Previous 199869 Revisions Next


© 1997-2024 The MAME Team